Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
BlogService |
|
| 2.0;2 |
1 | /* | |
2 | * Copyright 1999,2004 The Apache Software Foundation. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | package org.apache.commons.feedparser.locate.blogservice; | |
18 | ||
19 | import java.net.*; | |
20 | import java.util.*; | |
21 | import java.util.regex.*; | |
22 | ||
23 | import org.apache.commons.feedparser.*; | |
24 | import org.apache.commons.feedparser.locate.*; | |
25 | ||
26 | /** | |
27 | * Models the different kinds of blog services that are available. This | |
28 | * is needed for two reasons. First, sometimes it is useful to simply | |
29 | * know what provider a given weblog is being hosted by, such as Blogger | |
30 | * or PMachine, in order to use special, non-standard capabilities. Second, | |
31 | * many services have "quirks" that don't follow the standards, such as | |
32 | * supporting autodiscovery or supporting it in an incorrect way, and we | |
33 | * therefore need to know what service we are dealing with so that we | |
34 | * can find its feed. | |
35 | * | |
36 | * The BlogService object encapsulates how to determine if a given | |
37 | * weblog is of that type and how to find its feeds. Concrete subclasses, | |
38 | * such as org.apache.commons.feedparser.locate.blogservice.Blogger, | |
39 | * fill in this class and provide the actual way to determine these | |
40 | * things for each blog service type. | |
41 | * | |
42 | * @author Brad Neuberg, bkn3@columbia.edu | |
43 | */ | |
44 | 0 | public abstract class BlogService { |
45 | 0 | protected static List blogServices = new ArrayList(); |
46 | ||
47 | /** Subclasses should have a static block similar to the following: | |
48 | * <code> | |
49 | * { | |
50 | * BlogService.addBlogService(new MyBlogService()); | |
51 | * } | |
52 | * </code> | |
53 | */ | |
54 | ||
55 | /** Locates all the generator meta tags | |
56 | * (i.e. <meta content="generator" content="someGenerator"/>) | |
57 | */ | |
58 | 0 | protected static Pattern metaTagsPattern = |
59 | Pattern.compile("<[\\s]*meta[\\w\\s=\"']*name=['\" ]generator[\"' ][\\w\\s=\"']*[^>]*", | |
60 | Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); | |
61 | ||
62 | /** | |
63 | * A regex to find any trailing filename and strip it | |
64 | */ | |
65 | 0 | protected static Pattern patternToStrip = Pattern.compile("[^/](/\\w*\\.\\w*$)"); |
66 | ||
67 | /** Returns whether we can trust the results of this blog service's | |
68 | * autodiscovery links. For example, TextAmerica returns invalid | |
69 | * autodiscovery results. | |
70 | */ | |
71 | public abstract boolean hasValidAutoDiscovery(); | |
72 | ||
73 | /** Returns whether we should follow HTTP redirects for this blog service. | |
74 | * Some services don't implement HTTP redirects correctly, while others, | |
75 | * like Xanga, require it. | |
76 | */ | |
77 | public abstract boolean followRedirects(); | |
78 | ||
79 | /** Determines if the weblog at the given resource and with the given | |
80 | * content is this blog service. | |
81 | * @param resource A full URI to this resource, such as | |
82 | * "http://www.codinginparadise.org". | |
83 | * @param content The full HTML content at the resource's URL. | |
84 | * @throws FeedParserException Thrown if an error occurs while | |
85 | * determining the type of this weblog. | |
86 | */ | |
87 | public abstract boolean isThisService(String resource, String content) | |
88 | throws FeedParserException; | |
89 | ||
90 | /** | |
91 | * Returns an array of FeedReferences that contains information on the | |
92 | * usual locations this blog service contains its feed. The feeds should | |
93 | * be ordered by quality, so that higher quality feeds come before lower | |
94 | * quality ones (i.e. you would want to have an Atom FeedReference | |
95 | * object come before an RSS 0.91 FeedReference object in this list). | |
96 | * @param resource A URL to the given weblog that might be used to build | |
97 | * up where feeds are usually located. | |
98 | * @param content The full content of the resource URL, which might | |
99 | * be useful to determine where feeds are usually located. This can be | |
100 | * null. | |
101 | * @throws FeedParserException Thrown if an error occurs while trying | |
102 | * to determine the usual locations of feeds for this service. | |
103 | */ | |
104 | public abstract FeedReference[] getFeedLocations(String resource, | |
105 | String content) | |
106 | throws FeedParserException; | |
107 | ||
108 | /** Determines if the weblog at the given resource is this blog service. | |
109 | * @param resource A full URI to this resource, such as | |
110 | * "http://www.codinginparadise.org". | |
111 | * @throws FeedParserException Thrown if an error occurs while | |
112 | * determining the type of this weblog. | |
113 | */ | |
114 | public boolean isThisService(String resource) throws FeedParserException { | |
115 | 0 | return isThisService(resource, null); |
116 | } | |
117 | ||
118 | /** This method takes a resource, such as "http://www.codinginparadise.org/myweblog.php", | |
119 | * and gets the path necessary to build up a feed, such as | |
120 | * "http://www.codinginparadise.org/". Basicly it appends a slash | |
121 | * to the end if there is not one, and removes any file names that | |
122 | * might be at the end, such as "myweblog.php". | |
123 | * | |
124 | * There is a special exception for some Blosxom blogs, | |
125 | * which have things inside of a cgi-script and 'hang' their RSS files | |
126 | * off of this cgi-bin. For example, | |
127 | * http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi has its RSS file | |
128 | * at http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi/index.rss, so | |
129 | * we must return the blosxom.cgi at the end as well for this method. | |
130 | * | |
131 | * @throws MalformedURLException Thrown if the given resource's URL is | |
132 | * incorrectly formatted. | |
133 | */ | |
134 | public String getBaseFeedPath( String resource ) { | |
135 | // strip off any query string or anchors | |
136 | 0 | int end = resource.lastIndexOf( "#" ); |
137 | ||
138 | 0 | if ( end != -1 ) |
139 | 0 | resource = resource.substring( 0, end ); |
140 | ||
141 | 0 | end = resource.lastIndexOf( "?" ); |
142 | ||
143 | 0 | if ( end != -1 ) |
144 | 0 | resource = resource.substring( 0, end ); |
145 | ||
146 | 0 | Matcher fileMatcher = patternToStrip.matcher(resource); |
147 | 0 | if (fileMatcher.find()) { |
148 | 0 | String stringToStrip = fileMatcher.group(1); |
149 | 0 | int startStrip = resource.indexOf(stringToStrip); |
150 | 0 | resource = resource.substring(0, startStrip); |
151 | } | |
152 | ||
153 | 0 | if ( ! resource.endsWith( "/" ) ) { |
154 | 0 | resource = resource + "/"; |
155 | } | |
156 | ||
157 | 0 | return resource; |
158 | } | |
159 | ||
160 | public String toString() { | |
161 | 0 | return this.getClass().getName(); |
162 | } | |
163 | ||
164 | public boolean equals(Object obj) { | |
165 | 0 | if (obj == null) |
166 | 0 | return false; |
167 | ||
168 | 0 | if (obj instanceof BlogService == false) |
169 | 0 | return false; |
170 | ||
171 | 0 | return (obj.getClass().equals(this.getClass())); |
172 | } | |
173 | ||
174 | public int hashCode() { | |
175 | 0 | return this.getClass().hashCode(); |
176 | } | |
177 | ||
178 | /** Gets an array of all of the available BlogService implementations. */ | |
179 | public static BlogService[] getBlogServices() { | |
180 | 0 | if (blogServices.size() == 0) |
181 | 0 | initializeBlogServices(); |
182 | ||
183 | 0 | BlogService[] results = new BlogService[blogServices.size()]; |
184 | ||
185 | 0 | return (BlogService[])blogServices.toArray(results); |
186 | } | |
187 | ||
188 | // **** util code *********************************************************** | |
189 | // These methods are useful for non-abstract subclasses of this object | |
190 | // to actually implement their functionality. | |
191 | ||
192 | /** Determines if the given resource contains the given domain name | |
193 | * fragment. | |
194 | */ | |
195 | protected boolean containsDomain(String resource, String domain) { | |
196 | 0 | return (resource.indexOf(domain) != -1); |
197 | } | |
198 | ||
199 | /** | |
200 | * Determines if the given content was generated by the given generator. | |
201 | ||
202 | * Example. This document contains a meta tag with name="generator" and | |
203 | * content equal to the generatorType). | |
204 | */ | |
205 | protected boolean hasGenerator(String content, String generatorType) { | |
206 | 0 | if (content == null) { |
207 | 0 | return false; |
208 | } | |
209 | ||
210 | 0 | Matcher metaTagsMatcher = metaTagsPattern.matcher(content); |
211 | 0 | if (metaTagsMatcher.find()) { |
212 | 0 | String metaTag = metaTagsMatcher.group(0).toLowerCase(); |
213 | 0 | generatorType = generatorType.toLowerCase(); |
214 | 0 | return (metaTag.indexOf(generatorType) != -1); |
215 | } | |
216 | else { | |
217 | 0 | return false; |
218 | } | |
219 | } | |
220 | ||
221 | protected static void initializeBlogServices() { | |
222 | 0 | blogServices.add(new AOLJournal()); |
223 | 0 | blogServices.add(new Blogger()); |
224 | 0 | blogServices.add(new Blosxom()); |
225 | 0 | blogServices.add(new DiaryLand()); |
226 | 0 | blogServices.add(new ExpressionEngine()); |
227 | 0 | blogServices.add(new Flickr()); |
228 | 0 | blogServices.add(new GreyMatter()); |
229 | 0 | blogServices.add(new iBlog()); |
230 | 0 | blogServices.add(new LiveJournal()); |
231 | 0 | blogServices.add(new Manila()); |
232 | 0 | blogServices.add(new MovableType()); |
233 | 0 | blogServices.add(new PMachine()); |
234 | 0 | blogServices.add(new RadioUserland()); |
235 | 0 | blogServices.add(new TextAmerica()); |
236 | 0 | blogServices.add(new TextPattern()); |
237 | 0 | blogServices.add(new Typepad()); |
238 | 0 | blogServices.add(new WordPress()); |
239 | 0 | blogServices.add(new Xanga()); |
240 | 0 | blogServices.add(new YahooGroups()); |
241 | 0 | } |
242 | } |