Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
FeedLocator |
|
| 1.6666666666666667;1.667 |
1 | /* | |
2 | * Copyright 1999,2004 The Apache Software Foundation. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | package org.apache.commons.feedparser.locate; | |
18 | ||
19 | import java.util.Iterator; | |
20 | ||
21 | import org.apache.commons.feedparser.FeedList; | |
22 | import org.apache.commons.feedparser.network.ResourceRequest; | |
23 | import org.apache.commons.feedparser.network.ResourceRequestFactory; | |
24 | import org.apache.log4j.Logger; | |
25 | ||
26 | /** | |
27 | * Method to determine feed URLs from a given resource URI. For example, | |
28 | * you would pass in the URI: | |
29 | * | |
30 | * http://www.codinginparadise.org | |
31 | * | |
32 | * and this class would pass back a List with one address of the feed URL, | |
33 | * which is | |
34 | * | |
35 | * http://www.codinginparadise.org/weblog/atom.xml" | |
36 | * | |
37 | * <code> | |
38 | * String resource = "http://www.codinginparadise.org"; | |
39 | * FeedList l = FeedLocator.locate( resource ); | |
40 | * </code> | |
41 | * | |
42 | * @author <a href="mailto:burton@apache.org">Kevin A. Burton</a> | |
43 | */ | |
44 | 0 | public class FeedLocator { |
45 | ||
46 | 0 | private static Logger log = Logger.getLogger( FeedLocator.class ); |
47 | ||
48 | /** | |
49 | * Locate all feeds within the given resource. The resource should be a link | |
50 | * to an (X)HTML document, usually a weblog or a website. | |
51 | * | |
52 | * Example: http://peerfear.org | |
53 | * | |
54 | * @param resource The weblog we need to discover | |
55 | * | |
56 | */ | |
57 | public static final FeedList locate( String resource ) throws Exception { | |
58 | // \: Use my network library when it's migrated into Apache. | |
59 | ||
60 | //fetch content | |
61 | 0 | ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource ); |
62 | ||
63 | 0 | String content = request.getInputStreamAsString(); |
64 | ||
65 | //return resources | |
66 | 0 | return locate( resource, content ); |
67 | ||
68 | } | |
69 | ||
70 | /** | |
71 | * Locate the feed with the given content. | |
72 | * | |
73 | * | |
74 | */ | |
75 | public static final FeedList locate( String resource, String content ) throws Exception { | |
76 | ||
77 | 0 | log.info( "Locating " + resource + "..." ); |
78 | ||
79 | 0 | FeedList list = new FeedList(); |
80 | ||
81 | //FIXME: if we were GIVEN an RSS/Atom/OPML/etc file then we should just | |
82 | //attempt to use this and return a FeedList with just one entry. Parse | |
83 | //it first I think to make sure its valid XML and then move forward. | |
84 | //The downside here is that it would be wasted CPU if its HTML content. | |
85 | ||
86 | 0 | log.debug( "Using DiscoveryLocator..." ); |
87 | 0 | DiscoveryLocator.locate( resource, content, list ); |
88 | 0 | log.debug("after discoverylocator, list="+list); |
89 | ||
90 | 0 | log.debug( "Using LinkLocator..." ); |
91 | //this failed... try looking for links | |
92 | 0 | LinkLocator.locate( resource, content, list ); |
93 | 0 | log.debug("after linklocator, list="+list); |
94 | ||
95 | //this failed... try probe location. This is more reliable than | |
96 | //LinkLocation but requires a few more HTTP gets. | |
97 | 0 | log.debug( "Using ProbeLocator..." ); |
98 | 0 | ProbeLocator.locate( resource, content, list ); |
99 | 0 | log.debug("after probelocator, list="+list); |
100 | ||
101 | 0 | log.info( "After locating, list="+list ); |
102 | ||
103 | 0 | return list; |
104 | ||
105 | } | |
106 | ||
107 | public static void main( String[] args ) throws Exception { | |
108 | ||
109 | //This should find http://www.electoral-vote.com/index.rss | |
110 | //String resource = "http://brendonwilson.com/"; | |
111 | ||
112 | //String resource = "file:///projects/feedparser/tests/locate4.html"; | |
113 | //String resource = "file:///projects/feedparser/tests/locate5.html"; | |
114 | //String resource = "file:///projects/feedparser/tests/locate6.html"; | |
115 | ||
116 | //FIXME: add UNIT TESTS for Yahoo Groups and Flickr | |
117 | ||
118 | 0 | String resource = "http://craigslist.org/w4m/"; |
119 | ||
120 | //String resource = "http://groups.yahoo.com/group/aggregators/"; | |
121 | ||
122 | //String resource = "http://flickr.com/photos/tags/cats"; | |
123 | ||
124 | //String resource = "file:///projects/feedparser/tests/locate8.html"; | |
125 | ||
126 | //String resource = "http://blogs.sun.com/roller/page/gonzo"; | |
127 | ||
128 | //String resource = "http://gonze.com/weblog/"; | |
129 | ||
130 | //String resource = "http://codinginparadise.org/"; | |
131 | ||
132 | // String resource = "http://bucsfishingreport.com/pMachine/weblog.php"; | |
133 | ||
134 | //String resource = "http://www.livejournal.com/community/indiexiankids/"; | |
135 | //String resource= "http://www.thealarmclock.com/mt/"; | |
136 | ||
137 | //String resource = "http://guinness.joeuser.com"; | |
138 | ||
139 | //String resource = "http://georgewbush.com/blog"; | |
140 | ||
141 | //String resource = "http://carolinascl.blogspot.com/"; | |
142 | ||
143 | //String resource = "http://www.corante.com/strange/"; | |
144 | //String resource = "http://peerfear.org"; | |
145 | ||
146 | 0 | ProbeLocator.BLOG_SERVICE_PROBING_ENABLED = true; |
147 | 0 | ProbeLocator.AGGRESIVE_PROBING_ENABLED = true; |
148 | ||
149 | 0 | FeedList l = locate( resource ); |
150 | ||
151 | 0 | Iterator it = l.iterator(); |
152 | ||
153 | 0 | if ( it.hasNext() == false ) { |
154 | 0 | System.out.println( "NO LINKS FOUND" ); |
155 | } | |
156 | ||
157 | 0 | System.out.println( "AD RSS: " + l.getAdRSSFeed() ); |
158 | 0 | System.out.println( "AD Atom: " + l.getAdAtomFeed() ); |
159 | ||
160 | 0 | while ( it.hasNext() ) { |
161 | ||
162 | 0 | FeedReference ref = (FeedReference)it.next(); |
163 | ||
164 | 0 | System.out.println( ref.resource ); |
165 | ||
166 | 0 | } |
167 | ||
168 | 0 | } |
169 | ||
170 | } |