Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
ProbeLocator |
|
| 4.666666666666667;4.667 |
1 | /* | |
2 | * Copyright 1999,2004 The Apache Software Foundation. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | package org.apache.commons.feedparser.locate; | |
18 | ||
19 | import java.util.ArrayList; | |
20 | import java.util.HashSet; | |
21 | import java.util.Iterator; | |
22 | import java.util.List; | |
23 | import java.util.Set; | |
24 | ||
25 | import org.apache.commons.feedparser.FeedList; | |
26 | import org.apache.commons.feedparser.locate.blogservice.BlogService; | |
27 | import org.apache.commons.feedparser.locate.blogservice.Unknown; | |
28 | import org.apache.commons.feedparser.network.ResourceRequest; | |
29 | import org.apache.commons.feedparser.network.ResourceRequestFactory; | |
30 | import org.apache.log4j.Logger; | |
31 | ||
32 | /** | |
33 | * Locator which uses Link probing. It also attempts to determine the type of | |
34 | * blog service provider it is dealing with, such as BlogSpot, Blogsxom, etc., | |
35 | * in order to find feed URLs that are not specified through autodiscovery. | |
36 | * | |
37 | * If ProbeLocator.AGGRESIVE_PROBING_ENABLED is true (by default it is false), | |
38 | * then we probe for links. | |
39 | * | |
40 | * | |
41 | * | |
42 | * @author <a href="mailto:burton@apache.org">Kevin A. Burton</a> | |
43 | */ | |
44 | 0 | public class ProbeLocator { |
45 | ||
46 | 0 | private static Logger log = Logger.getLogger( ProbeLocator.class ); |
47 | ||
48 | /** If true, then we aggresively probe a site if it doesn't have | |
49 | * autodiscovery. This includes trying to determine what the blog provider | |
50 | * is, trying individual locations based on a blog provider, and probing | |
51 | * in several locations if the blog provider is unknown. | |
52 | * | |
53 | * The default value for this should be false. This should only be | |
54 | * used on server-side aggregators that generate few requests, and | |
55 | * _never_ on client-side aggregators. The level of traffic for | |
56 | * client-side aggregators would be too great. | |
57 | */ | |
58 | 0 | public static boolean AGGRESIVE_PROBING_ENABLED = false; |
59 | ||
60 | /** If true, then after discovering what a site's blog provider is we | |
61 | * probe in select locations for feeds based on the provider. This | |
62 | * is useful if autodiscovery is not enabled on this blog and we don't | |
63 | * want to do the full aggresive probing. | |
64 | * | |
65 | * The default value for this should be false. This should only | |
66 | * be used on server-side aggregators that generate few requests, | |
67 | * and _never_ on client-side aggregators. The level of traffic | |
68 | * for client-side aggregators would be too great. | |
69 | */ | |
70 | 0 | public static boolean BLOG_SERVICE_PROBING_ENABLED = false; |
71 | ||
72 | ||
73 | /** | |
74 | * | |
75 | * | |
76 | */ | |
77 | public static final List locate( String resource, String content, FeedList list ) | |
78 | throws Exception { | |
79 | 0 | log.debug("ProbeLocator, resource="+resource+", list="+list); |
80 | ||
81 | // determine what blog service we are dealing with | |
82 | 0 | BlogService blogService = BlogServiceDiscovery.discover( resource, content ); |
83 | 0 | log.debug("blogService="+blogService); |
84 | 0 | log.debug("blogService.hasValidAutoDiscovery="+blogService.hasValidAutoDiscovery()); |
85 | // fail-fast if we already have some results and if we determine that | |
86 | // we can trust the results (TextAmerica has invalid autodiscovery, | |
87 | // for example) | |
88 | 0 | if ( list.size() > 0 && blogService.hasValidAutoDiscovery() ) |
89 | 0 | return list; |
90 | 0 | else if ( blogService.hasValidAutoDiscovery() == false ) { |
91 | // clear out the list so far since we can't trust the results | |
92 | 0 | list.clear(); |
93 | } | |
94 | ||
95 | 0 | if ( BLOG_SERVICE_PROBING_ENABLED || AGGRESIVE_PROBING_ENABLED ) { |
96 | 0 | log.debug("PROBING!!"); |
97 | 0 | List servicesToTry = new ArrayList(); |
98 | 0 | servicesToTry.add(blogService); |
99 | // only try the Unknown service if we want aggresive probing | |
100 | 0 | if (AGGRESIVE_PROBING_ENABLED) |
101 | 0 | servicesToTry.add(new Unknown()); |
102 | 0 | Iterator iter = servicesToTry.iterator(); |
103 | 0 | Set previousAttempts = new HashSet(); |
104 | ||
105 | 0 | while (iter.hasNext() && list.size() == 0) { |
106 | 0 | BlogService currentService = (BlogService)iter.next(); |
107 | 0 | FeedReference[] mapping = currentService.getFeedLocations(resource, content); |
108 | 0 | log.debug( "mapping = " + mapping ); |
109 | ||
110 | // try out each mapping | |
111 | 0 | for (int i = 0; i < mapping.length; i++) { |
112 | 0 | String baseFeedPath = currentService.getBaseFeedPath(resource); |
113 | String pathToTest ; | |
114 | // build up our path to test differently if we are a | |
115 | // relative or an exact path; needed because some | |
116 | // blog services rewrite the domain name, such as | |
117 | // Yahoo Groups | |
118 | 0 | if (mapping[i].isRelative()) |
119 | 0 | pathToTest = baseFeedPath + mapping[i].resource; |
120 | else | |
121 | 0 | pathToTest = mapping[i].resource; |
122 | ||
123 | 0 | log.debug( "pathToTest = " + pathToTest ); |
124 | ||
125 | 0 | if ( !previousAttempts.contains( pathToTest ) |
126 | && feedExists( pathToTest, currentService ) ) { | |
127 | 0 | log.debug("Feed exists"); |
128 | 0 | FeedReference feedReference = new FeedReference( pathToTest, |
129 | mapping[i].type ); | |
130 | 0 | feedReference.method = FeedReference.METHOD_PROBE_DISCOVERY; |
131 | 0 | previousAttempts.add( pathToTest ); |
132 | 0 | onFeedReference( feedReference, list ); |
133 | } | |
134 | ||
135 | // record this attempt so we don't repeat it again if | |
136 | // we are doing aggresive probing | |
137 | 0 | previousAttempts.add( pathToTest ); |
138 | } | |
139 | 0 | } |
140 | ||
141 | 0 | log.info( "Using aggresive probing, found the following:" ); |
142 | 0 | log.info( "Blog service: " + blogService ); |
143 | } | |
144 | ||
145 | 0 | log.info( "List: " + list ); |
146 | 0 | log.info( "RSS feed: " + list.getAdRSSFeed() ); |
147 | 0 | log.info( "Atom feed: " + list.getAdAtomFeed() ); |
148 | 0 | return list; |
149 | ||
150 | } | |
151 | ||
152 | /** | |
153 | * Called each time we find a feed so that we can set the Ad method. | |
154 | * | |
155 | * FIXME: This doesn't seem like the right place for this. Can you | |
156 | * document this more? It's cryptic. Brad Neuberg, bkn3@columbia.edu. | |
157 | * | |
158 | */ | |
159 | private static void onFeedReference( FeedReference ref, FeedList list ) { | |
160 | ||
161 | 0 | if ( list.getAdAtomFeed() == null && |
162 | FeedReference.ATOM_MEDIA_TYPE.equals( ref.type ) ) { | |
163 | ||
164 | 0 | list.setAdAtomFeed( ref ); |
165 | ||
166 | 0 | } else if ( list.getAdRSSFeed() == null && |
167 | FeedReference.RSS_MEDIA_TYPE.equals( ref.type ) ) { | |
168 | ||
169 | 0 | list.setAdRSSFeed( ref ); |
170 | ||
171 | } | |
172 | ||
173 | 0 | list.add( ref ); |
174 | ||
175 | 0 | } |
176 | ||
177 | /** Does an HTTP HEAD to see if the given resource exists. | |
178 | * | |
179 | * @param resource The full URI to the resource to check for. | |
180 | * | |
181 | * | |
182 | */ | |
183 | protected static boolean feedExists( String resource, | |
184 | BlogService blogService) | |
185 | throws Exception { | |
186 | ||
187 | 0 | log.debug("feedExists, resource="+resource); |
188 | 0 | ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource ); |
189 | ||
190 | 0 | request.setRequestMethod( "HEAD" ); |
191 | ||
192 | // Some services need to follow redirects; others block if you do. | |
193 | // Ask the blog service itself what to do. | |
194 | 0 | request.setFollowRedirects( blogService.followRedirects() ); |
195 | ||
196 | // the call below actually causes the connection to be made | |
197 | 0 | request.getContentLength(); |
198 | ||
199 | 0 | long response = request.getResponseCode(); |
200 | 0 | log.debug("response="+response); |
201 | ||
202 | 0 | return response == 200; |
203 | } | |
204 | ||
205 | ||
206 | ||
207 | } |