Coverage Report - org.apache.commons.feedparser.ContentDetector
 
Classes in this File Line Coverage Branch Coverage Complexity
ContentDetector
0%
0/16
0%
0/24
1.111
 
 1  
 /*
 2  
  * Copyright 1999,2004 The Apache Software Foundation.
 3  
  * 
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  * 
 8  
  *      http://www.apache.org/licenses/LICENSE-2.0
 9  
  * 
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 
 17  
 package org.apache.commons.feedparser;
 18  
 
 19  
 
 20  
 /**
 21  
  * Given the RAW content of a URL, determine if we're looking at an RSS file or
 22  
  * an HTML file.  We also return the given RSS version or Atom version.
 23  
  * 
 24  
  * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
 25  
  * @version $Id: ContentDetector.java 373614 2006-01-30 22:31:21Z mvdb $
 26  
  */
 27  0
 public class ContentDetector {
 28  
 
 29  
     /**
 30  
      * Return true if the given content seems to be RSS.  This is going to be a
 31  
      * cheat because really we have no way of telling if this is RSS other than if
 32  
      * it is XML and it starts with an RSS 1.0, 2.0, 0.91 or 0.9 decl
 33  
      *
 34  
      * 
 35  
      */
 36  
     public static ContentDetectorResult detect( String content ) throws Exception {
 37  
 
 38  0
         ContentDetectorResult result = new ContentDetectorResult();
 39  
         
 40  0
         result.isHTML = isHTMLContent( content );
 41  0
         result.isRSS = ( isRSS_1_0_Content( content ) ||
 42  
                          isRSS_2_0_Content( content ) ||
 43  
                          isRSS_0_9_0_Content( content ) ||
 44  
                          isRSS_0_9_1_Content( content ) ||
 45  
                          isRSS_0_9_2_Content( content ) );
 46  
 
 47  0
         result.isAtom = isAtomContent( content );
 48  
 
 49  0
         result.isFeed = result.isRSS || result.isAtom;
 50  
 
 51  0
         return result;
 52  
                          
 53  
     }
 54  
 
 55  
     /**
 56  
      * Return true if this is RSS 1.0 content
 57  
      *
 58  
      * 
 59  
      */
 60  
     public static boolean isRSS_1_0_Content( String content ) throws Exception {
 61  
 
 62  
         //do a search for the RSS 1.0 namespace.  This is a bit of a trick right
 63  
         //now.
 64  
 
 65  0
         return content.indexOf( "http://purl.org/rss/1.0/" ) != -1;
 66  
         
 67  
     }
 68  
 
 69  
     /**
 70  
      * Return true if this is RSS 2.0 content
 71  
      *
 72  
      * 
 73  
      */
 74  
     public static boolean isRSS_0_9_1_Content( String content ) throws Exception {
 75  
 
 76  
         //look for the beginning of the RSS element
 77  0
         return content.indexOf( "<rss" ) != -1;
 78  
 
 79  
     }
 80  
 
 81  
     /**
 82  
      * Return true if this is RSS 0.9.2 content
 83  
      *
 84  
      * 
 85  
      */
 86  
     public static boolean isRSS_0_9_2_Content( String content ) throws Exception {
 87  
 
 88  
         //same check for RSS 0.9.1
 89  0
         return isRSS_0_9_1_Content( content );
 90  
         
 91  
     }
 92  
 
 93  
     /**
 94  
      * Return true if this is RSS 2.0 content
 95  
      *
 96  
      * 
 97  
      */
 98  
     public static boolean isRSS_2_0_Content( String content ) throws Exception {
 99  
 
 100  0
         return isRSS_0_9_1_Content( content );
 101  
 
 102  
     }
 103  
 
 104  
     /**
 105  
      * Return true if this is RSS 2.0 content
 106  
      *
 107  
      * 
 108  
      */
 109  
     public static boolean isRSS_0_9_0_Content( String content ) throws Exception {
 110  
 
 111  
         //FIXME: look for the RDF namespace and the RSS DTD namespace
 112  0
         return content.indexOf( "http://my.netscape.com/rdf/simple/0.9/" ) != -1;
 113  
 
 114  
     }
 115  
 
 116  
     public static boolean isAtomContent( String content ) throws Exception {
 117  
 
 118  0
         return content.indexOf( "http://purl.org/atom/ns#" ) != -1;
 119  
 
 120  
     }
 121  
 
 122  
     /**
 123  
      * Return true if this is RSS 2.0 content
 124  
      *
 125  
      * 
 126  
      */
 127  
     public static boolean isHTMLContent( String content ) throws Exception {
 128  
 
 129  
         //look for the beginning of the RSS element
 130  0
         return content.indexOf( "<html" ) != -1;
 131  
 
 132  
     }
 133  
 
 134  
     public static void main( String[] args ) {
 135  
 
 136  
         try { 
 137  
             
 138  
             //System.out.println( RSSContentVerifier.isRSSContent( new URL( args[0] ) ) );
 139  
             
 140  
         } catch ( Throwable t ) {
 141  
             
 142  
             t.printStackTrace();
 143  
             
 144  0
         }
 145  
 
 146  0
     }
 147  
     
 148  
 }