Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
Blosxom |
|
| 1.6;1.6 |
1 | /* | |
2 | * Copyright 1999,2004 The Apache Software Foundation. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | package org.apache.commons.feedparser.locate.blogservice; | |
18 | ||
19 | import java.net.MalformedURLException; | |
20 | import java.util.regex.*; | |
21 | ||
22 | import org.apache.commons.feedparser.FeedParserException; | |
23 | import org.apache.commons.feedparser.locate.*; | |
24 | ||
25 | /** | |
26 | * Models the Blosxom blog service, encapsulating whether a given weblog | |
27 | * is this type of service and where it usually keeps its feeds. | |
28 | * | |
29 | * @author Brad Neuberg, bkn3@columbia.edu | |
30 | */ | |
31 | 0 | public class Blosxom extends BlogService { |
32 | ||
33 | /** A pattern used to discover Blosxom blogs. */ | |
34 | 0 | private static Pattern blosxomPattern = |
35 | Pattern.compile("alt=[\"' ]powered by blosxom[\"' ]", | |
36 | Pattern.CASE_INSENSITIVE); | |
37 | ||
38 | /** Returns whether we can trust the results of this blog service's | |
39 | * autodiscovery links. For example, TextAmerica returns invalid | |
40 | * autodiscovery results. | |
41 | */ | |
42 | public boolean hasValidAutoDiscovery() { | |
43 | 0 | return true; |
44 | } | |
45 | ||
46 | /** Returns whether we should follow HTTP redirects for this blog service. | |
47 | * Some services don't implement HTTP redirects correctly, while others, | |
48 | * like Xanga, require it. | |
49 | */ | |
50 | public boolean followRedirects() { | |
51 | 0 | return false; |
52 | } | |
53 | ||
54 | /** Determines if the weblog at the given resource and with the given | |
55 | * content is this blog service. | |
56 | * @param resource A full URI to this resource, such as | |
57 | * "http://www.codinginparadise.org". | |
58 | * @param content The full HTML content at the resource's URL. | |
59 | * @throws FeedParserException Thrown if an error occurs while | |
60 | * determining the type of this weblog. | |
61 | */ | |
62 | public boolean isThisService(String resource, String content) | |
63 | throws FeedParserException { | |
64 | 0 | boolean results = false; |
65 | ||
66 | // This is the only kind of blog that we need to check for a | |
67 | // 'Powered by Blosxom'. We do this with the alt= value on the | |
68 | // Powered By image. | |
69 | // FIXME: This might be fragile, but it is used across all of the | |
70 | // Blosxom blogs I have looked at so far. Brad Neuberg, bkn3@columbia.edu | |
71 | ||
72 | 0 | Matcher blosxomMatcher = blosxomPattern.matcher(content); |
73 | 0 | results = blosxomMatcher.find(); |
74 | ||
75 | 0 | return results; |
76 | } | |
77 | ||
78 | /** | |
79 | * Returns an array of FeedReferences that contains information on the | |
80 | * usual locations this blog service contains its feed. The feeds should | |
81 | * be ordered by quality, so that higher quality feeds come before lower | |
82 | * quality ones (i.e. you would want to have an Atom FeedReference | |
83 | * object come before an RSS 0.91 FeedReference object in this list). | |
84 | * @param resource A URL to the given weblog that might be used to build | |
85 | * up where feeds are usually located. | |
86 | * @param content The full content of the resource URL, which might | |
87 | * be useful to determine where feeds are usually located. This can be | |
88 | * null. | |
89 | * @throws FeedParserException Thrown if an error occurs while trying | |
90 | * to determine the usual locations of feeds for this service. | |
91 | */ | |
92 | public FeedReference[] getFeedLocations(String resource, | |
93 | String content) | |
94 | throws FeedParserException { | |
95 | // there is sometimes an index.rss20 file, but Blosxom has a bug where | |
96 | // it incorrectly responds to HTTP HEAD requests for that file, | |
97 | // saying that it exists when it doesn't. Most sites don't seem | |
98 | // to have this file so we don't include it here. | |
99 | // Brad Neuberg, bkn3@columbia.edu | |
100 | 0 | FeedReference[] blosxomLocations = |
101 | { new FeedReference("index.rss", FeedReference.RSS_MEDIA_TYPE) }; | |
102 | ||
103 | 0 | return blosxomLocations; |
104 | } | |
105 | ||
106 | /** This method takes a resource, such as "http://www.codinginparadise.org/myweblog.php", | |
107 | * and gets the path necessary to build up a feed, such as | |
108 | * "http://www.codinginparadise.org/". Basicly it appends a slash | |
109 | * to the end if there is not one, and removes any file names that | |
110 | * might be at the end, such as "myweblog.php". | |
111 | * | |
112 | * There is a special exception for some Blosxom blogs, | |
113 | * which have things inside of a cgi-script and 'hang' their RSS files | |
114 | * off of this cgi-bin. For example, | |
115 | * http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi has its RSS file | |
116 | * at http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi/index.rss, so | |
117 | * we must return the blosxom.cgi at the end as well for this method. | |
118 | * | |
119 | * @throws MalformedURLException Thrown if the given resource's URL is | |
120 | * incorrectly formatted. | |
121 | */ | |
122 | public String getBaseFeedPath( String resource ) { | |
123 | ||
124 | // strip off any query string or anchors | |
125 | 0 | int end = resource.lastIndexOf( "#" ); |
126 | ||
127 | 0 | if ( end != -1 ) |
128 | 0 | resource = resource.substring( 0, end ); |
129 | ||
130 | 0 | end = resource.lastIndexOf( "?" ); |
131 | ||
132 | 0 | if ( end != -1 ) |
133 | 0 | resource = resource.substring( 0, end ); |
134 | ||
135 | 0 | if ( ! resource.endsWith( "/" ) ) { |
136 | 0 | resource = resource + "/"; |
137 | } | |
138 | ||
139 | 0 | return resource; |
140 | } | |
141 | } |