Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
ContentDetector |
|
| 1.1111111111111112;1.111 |
1 | /* | |
2 | * Copyright 1999,2004 The Apache Software Foundation. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | package org.apache.commons.feedparser; | |
18 | ||
19 | ||
20 | /** | |
21 | * Given the RAW content of a URL, determine if we're looking at an RSS file or | |
22 | * an HTML file. We also return the given RSS version or Atom version. | |
23 | * | |
24 | * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a> | |
25 | * @version $Id: ContentDetector.java 373614 2006-01-30 22:31:21Z mvdb $ | |
26 | */ | |
27 | 0 | public class ContentDetector { |
28 | ||
29 | /** | |
30 | * Return true if the given content seems to be RSS. This is going to be a | |
31 | * cheat because really we have no way of telling if this is RSS other than if | |
32 | * it is XML and it starts with an RSS 1.0, 2.0, 0.91 or 0.9 decl | |
33 | * | |
34 | * | |
35 | */ | |
36 | public static ContentDetectorResult detect( String content ) throws Exception { | |
37 | ||
38 | 0 | ContentDetectorResult result = new ContentDetectorResult(); |
39 | ||
40 | 0 | result.isHTML = isHTMLContent( content ); |
41 | 0 | result.isRSS = ( isRSS_1_0_Content( content ) || |
42 | isRSS_2_0_Content( content ) || | |
43 | isRSS_0_9_0_Content( content ) || | |
44 | isRSS_0_9_1_Content( content ) || | |
45 | isRSS_0_9_2_Content( content ) ); | |
46 | ||
47 | 0 | result.isAtom = isAtomContent( content ); |
48 | ||
49 | 0 | result.isFeed = result.isRSS || result.isAtom; |
50 | ||
51 | 0 | return result; |
52 | ||
53 | } | |
54 | ||
55 | /** | |
56 | * Return true if this is RSS 1.0 content | |
57 | * | |
58 | * | |
59 | */ | |
60 | public static boolean isRSS_1_0_Content( String content ) throws Exception { | |
61 | ||
62 | //do a search for the RSS 1.0 namespace. This is a bit of a trick right | |
63 | //now. | |
64 | ||
65 | 0 | return content.indexOf( "http://purl.org/rss/1.0/" ) != -1; |
66 | ||
67 | } | |
68 | ||
69 | /** | |
70 | * Return true if this is RSS 2.0 content | |
71 | * | |
72 | * | |
73 | */ | |
74 | public static boolean isRSS_0_9_1_Content( String content ) throws Exception { | |
75 | ||
76 | //look for the beginning of the RSS element | |
77 | 0 | return content.indexOf( "<rss" ) != -1; |
78 | ||
79 | } | |
80 | ||
81 | /** | |
82 | * Return true if this is RSS 0.9.2 content | |
83 | * | |
84 | * | |
85 | */ | |
86 | public static boolean isRSS_0_9_2_Content( String content ) throws Exception { | |
87 | ||
88 | //same check for RSS 0.9.1 | |
89 | 0 | return isRSS_0_9_1_Content( content ); |
90 | ||
91 | } | |
92 | ||
93 | /** | |
94 | * Return true if this is RSS 2.0 content | |
95 | * | |
96 | * | |
97 | */ | |
98 | public static boolean isRSS_2_0_Content( String content ) throws Exception { | |
99 | ||
100 | 0 | return isRSS_0_9_1_Content( content ); |
101 | ||
102 | } | |
103 | ||
104 | /** | |
105 | * Return true if this is RSS 2.0 content | |
106 | * | |
107 | * | |
108 | */ | |
109 | public static boolean isRSS_0_9_0_Content( String content ) throws Exception { | |
110 | ||
111 | //FIXME: look for the RDF namespace and the RSS DTD namespace | |
112 | 0 | return content.indexOf( "http://my.netscape.com/rdf/simple/0.9/" ) != -1; |
113 | ||
114 | } | |
115 | ||
116 | public static boolean isAtomContent( String content ) throws Exception { | |
117 | ||
118 | 0 | return content.indexOf( "http://purl.org/atom/ns#" ) != -1; |
119 | ||
120 | } | |
121 | ||
122 | /** | |
123 | * Return true if this is RSS 2.0 content | |
124 | * | |
125 | * | |
126 | */ | |
127 | public static boolean isHTMLContent( String content ) throws Exception { | |
128 | ||
129 | //look for the beginning of the RSS element | |
130 | 0 | return content.indexOf( "<html" ) != -1; |
131 | ||
132 | } | |
133 | ||
134 | public static void main( String[] args ) { | |
135 | ||
136 | try { | |
137 | ||
138 | //System.out.println( RSSContentVerifier.isRSSContent( new URL( args[0] ) ) ); | |
139 | ||
140 | } catch ( Throwable t ) { | |
141 | ||
142 | t.printStackTrace(); | |
143 | ||
144 | 0 | } |
145 | ||
146 | 0 | } |
147 | ||
148 | } |