Coverage Report - org.apache.commons.feedparser.locate.FeedLocator
 
Classes in this File Line Coverage Branch Coverage Complexity
FeedLocator
0%
0/32
0%
0/4
1.667
 
 1  
 /*
 2  
  * Copyright 1999,2004 The Apache Software Foundation.
 3  
  * 
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  * 
 8  
  *      http://www.apache.org/licenses/LICENSE-2.0
 9  
  * 
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 
 17  
 package org.apache.commons.feedparser.locate;
 18  
 
 19  
 import java.util.Iterator;
 20  
 
 21  
 import org.apache.commons.feedparser.FeedList;
 22  
 import org.apache.commons.feedparser.network.ResourceRequest;
 23  
 import org.apache.commons.feedparser.network.ResourceRequestFactory;
 24  
 import org.apache.log4j.Logger;
 25  
 
 26  
 /**
 27  
  * Method to determine feed URLs from a given resource URI.  For example,
 28  
  * you would pass in the URI:
 29  
  * 
 30  
  * http://www.codinginparadise.org
 31  
  * 
 32  
  * and this class would pass back a List with one address of the feed URL,
 33  
  * which is
 34  
  * 
 35  
  * http://www.codinginparadise.org/weblog/atom.xml"
 36  
  *
 37  
  * <code>
 38  
  * String resource = "http://www.codinginparadise.org";
 39  
  * FeedList l = FeedLocator.locate( resource );
 40  
  * </code>
 41  
  * 
 42  
  * @author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
 43  
  */
 44  0
 public class FeedLocator {
 45  
 
 46  0
     private static Logger log = Logger.getLogger( FeedLocator.class );
 47  
     
 48  
     /**
 49  
      * Locate all feeds within the given resource.  The resource should be a link
 50  
      * to an (X)HTML document, usually a weblog or a website.
 51  
      * 
 52  
      * Example: http://peerfear.org
 53  
      *
 54  
      * @param resource The weblog we need to discover
 55  
      * 
 56  
      */
 57  
     public static final FeedList locate( String resource ) throws Exception {
 58  
         // \: Use my network library when it's migrated into Apache.
 59  
         
 60  
         //fetch content
 61  0
         ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource );
 62  
 
 63  0
         String content = request.getInputStreamAsString();
 64  
 
 65  
         //return resources
 66  0
         return locate( resource, content );
 67  
         
 68  
     }
 69  
 
 70  
     /**
 71  
      * Locate the feed with the given content.
 72  
      *
 73  
      * 
 74  
      */
 75  
     public static final FeedList locate( String resource, String content ) throws Exception {
 76  
 
 77  0
         log.info( "Locating " + resource + "..." );
 78  
         
 79  0
         FeedList list = new FeedList();
 80  
 
 81  
         //FIXME: if we were GIVEN an RSS/Atom/OPML/etc file then we should just
 82  
         //attempt to use this and return a FeedList with just one entry.  Parse
 83  
         //it first I think to make sure its valid XML and then move forward.
 84  
         //The downside here is that it would be wasted CPU if its HTML content.
 85  
         
 86  0
         log.debug( "Using DiscoveryLocator..." );
 87  0
         DiscoveryLocator.locate( resource, content, list );
 88  0
         log.debug("after discoverylocator, list="+list);
 89  
 
 90  0
         log.debug( "Using LinkLocator..." );
 91  
         //this failed... try looking for links
 92  0
         LinkLocator.locate( resource, content, list );
 93  0
         log.debug("after linklocator, list="+list);
 94  
 
 95  
         //this failed... try probe location.  This is more reliable than
 96  
         //LinkLocation but requires a few more HTTP gets.
 97  0
         log.debug( "Using ProbeLocator..." );
 98  0
         ProbeLocator.locate( resource, content, list );
 99  0
         log.debug("after probelocator, list="+list);
 100  
         
 101  0
         log.info( "After locating, list="+list );
 102  
             
 103  0
         return list;
 104  
         
 105  
     }
 106  
 
 107  
     public static void main( String[] args ) throws Exception {
 108  
 
 109  
         //This should find http://www.electoral-vote.com/index.rss
 110  
         //String resource = "http://brendonwilson.com/";
 111  
 
 112  
         //String resource = "file:///projects/feedparser/tests/locate4.html";
 113  
         //String resource = "file:///projects/feedparser/tests/locate5.html";
 114  
         //String resource = "file:///projects/feedparser/tests/locate6.html";
 115  
 
 116  
         //FIXME: add UNIT TESTS for Yahoo Groups and Flickr
 117  
 
 118  0
         String resource = "http://craigslist.org/w4m/";
 119  
         
 120  
         //String resource = "http://groups.yahoo.com/group/aggregators/";
 121  
 
 122  
         //String resource = "http://flickr.com/photos/tags/cats";
 123  
 
 124  
         //String resource = "file:///projects/feedparser/tests/locate8.html";
 125  
 
 126  
         //String resource = "http://blogs.sun.com/roller/page/gonzo";
 127  
 
 128  
         //String resource = "http://gonze.com/weblog/";
 129  
 
 130  
         //String resource = "http://codinginparadise.org/";
 131  
 
 132  
         //        String resource = "http://bucsfishingreport.com/pMachine/weblog.php";
 133  
         
 134  
         //String resource = "http://www.livejournal.com/community/indiexiankids/";
 135  
 //String resource= "http://www.thealarmclock.com/mt/";
 136  
         
 137  
         //String resource = "http://guinness.joeuser.com";
 138  
         
 139  
         //String resource = "http://georgewbush.com/blog";
 140  
 
 141  
         //String resource = "http://carolinascl.blogspot.com/";
 142  
         
 143  
         //String resource = "http://www.corante.com/strange/";
 144  
         //String resource = "http://peerfear.org";
 145  
 
 146  0
         ProbeLocator.BLOG_SERVICE_PROBING_ENABLED = true;
 147  0
         ProbeLocator.AGGRESIVE_PROBING_ENABLED = true;
 148  
 
 149  0
         FeedList l = locate( resource );
 150  
 
 151  0
         Iterator it = l.iterator();
 152  
 
 153  0
         if ( it.hasNext() == false ) {
 154  0
             System.out.println( "NO LINKS FOUND" );
 155  
         } 
 156  
 
 157  0
         System.out.println( "AD RSS: " + l.getAdRSSFeed() );
 158  0
         System.out.println( "AD Atom: " + l.getAdAtomFeed() );
 159  
         
 160  0
         while ( it.hasNext() ) {
 161  
 
 162  0
             FeedReference ref = (FeedReference)it.next();
 163  
 
 164  0
             System.out.println( ref.resource );
 165  
             
 166  0
         }
 167  
 
 168  0
     }
 169  
 
 170  
 }