Coverage Report - org.apache.commons.feedparser.locate.ProbeLocator
 
Classes in this File Line Coverage Branch Coverage Complexity
ProbeLocator
0%
0/58
0%
0/34
4.667
 
 1  
 /*
 2  
  * Copyright 1999,2004 The Apache Software Foundation.
 3  
  * 
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  * 
 8  
  *      http://www.apache.org/licenses/LICENSE-2.0
 9  
  * 
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 
 17  
 package org.apache.commons.feedparser.locate;
 18  
 
 19  
 import java.util.ArrayList;
 20  
 import java.util.HashSet;
 21  
 import java.util.Iterator;
 22  
 import java.util.List;
 23  
 import java.util.Set;
 24  
 
 25  
 import org.apache.commons.feedparser.FeedList;
 26  
 import org.apache.commons.feedparser.locate.blogservice.BlogService;
 27  
 import org.apache.commons.feedparser.locate.blogservice.Unknown;
 28  
 import org.apache.commons.feedparser.network.ResourceRequest;
 29  
 import org.apache.commons.feedparser.network.ResourceRequestFactory;
 30  
 import org.apache.log4j.Logger;
 31  
 
 32  
 /**
 33  
  * Locator which uses Link probing.  It also attempts to determine the type of
 34  
  * blog service provider it is dealing with, such as BlogSpot, Blogsxom, etc.,
 35  
  * in order to find feed URLs that are not specified through autodiscovery.
 36  
  * 
 37  
  * If ProbeLocator.AGGRESIVE_PROBING_ENABLED is true (by default it is false),
 38  
  * then we probe for links.
 39  
  * 
 40  
  * 
 41  
  * 
 42  
  * @author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
 43  
  */
 44  0
 public class ProbeLocator {
 45  
 
 46  0
     private static Logger log = Logger.getLogger( ProbeLocator.class );
 47  
 
 48  
     /** If true, then we aggresively probe a site if it doesn't have
 49  
      *  autodiscovery.  This includes trying to determine what the blog provider
 50  
      *  is, trying individual locations based on a blog provider, and probing
 51  
      *  in several locations if the blog provider is unknown.
 52  
      * 
 53  
      *  The default value for this should be false.  This should only be 
 54  
      *  used on server-side aggregators that generate few requests, and 
 55  
      *  _never_ on client-side aggregators.  The level of traffic for 
 56  
      *  client-side aggregators would be too great.
 57  
      */
 58  0
     public static boolean AGGRESIVE_PROBING_ENABLED = false;
 59  
     
 60  
     /** If true, then after discovering what a site's blog provider is we
 61  
      *  probe in select locations for feeds based on the provider.  This
 62  
      *  is useful if autodiscovery is not enabled on this blog and we don't
 63  
      *  want to do the full aggresive probing.
 64  
      * 
 65  
      *  The default value for this should be false.  This should only 
 66  
      *  be used on server-side aggregators that generate few requests, 
 67  
      *  and _never_ on client-side aggregators.  The level of traffic 
 68  
      *  for client-side aggregators would be too great.
 69  
      */
 70  0
     public static boolean BLOG_SERVICE_PROBING_ENABLED = false;
 71  
 
 72  
     
 73  
     /**
 74  
      *
 75  
      * 
 76  
      */
 77  
     public static final List locate( String resource, String content, FeedList list )
 78  
         throws Exception {
 79  0
         log.debug("ProbeLocator, resource="+resource+", list="+list);
 80  
 
 81  
         // determine what blog service we are dealing with
 82  0
         BlogService blogService = BlogServiceDiscovery.discover( resource, content );  
 83  0
         log.debug("blogService="+blogService);
 84  0
         log.debug("blogService.hasValidAutoDiscovery="+blogService.hasValidAutoDiscovery());
 85  
         // fail-fast if we already have some results and if we determine that
 86  
         // we can trust the results (TextAmerica has invalid autodiscovery,
 87  
         // for example)
 88  0
         if ( list.size() > 0 && blogService.hasValidAutoDiscovery() )
 89  0
             return list;
 90  0
         else if ( blogService.hasValidAutoDiscovery() == false ) {
 91  
             // clear out the list so far since we can't trust the results
 92  0
             list.clear();
 93  
         }
 94  
 
 95  0
         if ( BLOG_SERVICE_PROBING_ENABLED || AGGRESIVE_PROBING_ENABLED ) {
 96  0
             log.debug("PROBING!!");
 97  0
             List servicesToTry = new ArrayList();
 98  0
             servicesToTry.add(blogService);
 99  
             // only try the Unknown service if we want aggresive probing
 100  0
             if (AGGRESIVE_PROBING_ENABLED)
 101  0
                 servicesToTry.add(new Unknown());
 102  0
             Iterator iter = servicesToTry.iterator();
 103  0
             Set previousAttempts = new HashSet();
 104  
             
 105  0
             while (iter.hasNext() && list.size() == 0) {
 106  0
                 BlogService currentService = (BlogService)iter.next();
 107  0
                 FeedReference[] mapping = currentService.getFeedLocations(resource, content);
 108  0
                 log.debug( "mapping = " + mapping );
 109  
             
 110  
                 // try out each mapping
 111  0
                 for (int i = 0; i < mapping.length; i++) {
 112  0
                     String baseFeedPath = currentService.getBaseFeedPath(resource);
 113  
                     String pathToTest ;
 114  
                     // build up our path to test differently if we are a
 115  
                     // relative or an exact path; needed because some
 116  
                     // blog services rewrite the domain name, such as
 117  
                     // Yahoo Groups
 118  0
                     if (mapping[i].isRelative())
 119  0
                         pathToTest = baseFeedPath + mapping[i].resource;
 120  
                     else
 121  0
                         pathToTest = mapping[i].resource;
 122  
                     
 123  0
                     log.debug( "pathToTest = " + pathToTest );
 124  
 
 125  0
                     if ( !previousAttempts.contains( pathToTest ) 
 126  
                          && feedExists( pathToTest, currentService ) ) {
 127  0
                         log.debug("Feed exists");
 128  0
                         FeedReference feedReference = new FeedReference( pathToTest,
 129  
                                                                          mapping[i].type );
 130  0
                         feedReference.method = FeedReference.METHOD_PROBE_DISCOVERY;       
 131  0
                         previousAttempts.add( pathToTest );
 132  0
                         onFeedReference( feedReference, list );
 133  
                     }
 134  
                 
 135  
                     // record this attempt so we don't repeat it again if
 136  
                     // we are doing aggresive probing
 137  0
                     previousAttempts.add( pathToTest );
 138  
                 }
 139  0
             }
 140  
 
 141  0
             log.info( "Using aggresive probing, found the following:" );
 142  0
             log.info( "Blog service: " + blogService );
 143  
         }
 144  
 
 145  0
         log.info( "List: " + list );
 146  0
         log.info( "RSS feed: " + list.getAdRSSFeed() );
 147  0
         log.info( "Atom feed: " + list.getAdAtomFeed() );
 148  0
         return list;
 149  
 
 150  
     }
 151  
 
 152  
     /**
 153  
      * Called each time we find a feed so that we can set the Ad method.
 154  
      * 
 155  
      * FIXME: This doesn't seem like the right place for this.  Can you
 156  
      * document this more? It's cryptic.  Brad Neuberg, bkn3@columbia.edu.
 157  
      * 
 158  
      */
 159  
     private static void onFeedReference( FeedReference ref, FeedList list ) {
 160  
 
 161  0
         if ( list.getAdAtomFeed() == null &&
 162  
              FeedReference.ATOM_MEDIA_TYPE.equals( ref.type ) ) {
 163  
 
 164  0
             list.setAdAtomFeed( ref );
 165  
 
 166  0
         } else if ( list.getAdRSSFeed() == null &&
 167  
                     FeedReference.RSS_MEDIA_TYPE.equals( ref.type ) ) {
 168  
 
 169  0
             list.setAdRSSFeed( ref );
 170  
 
 171  
         }
 172  
 
 173  0
         list.add( ref );
 174  
         
 175  0
     }
 176  
 
 177  
     /** Does an HTTP HEAD to see if the given resource exists.
 178  
      * 
 179  
      *  @param resource The full URI to the resource to check for.
 180  
      * 
 181  
      * 
 182  
      */
 183  
     protected static boolean feedExists( String resource,
 184  
                                          BlogService blogService) 
 185  
         throws Exception {
 186  
         
 187  0
         log.debug("feedExists, resource="+resource);
 188  0
         ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource );
 189  
 
 190  0
         request.setRequestMethod( "HEAD" );
 191  
         
 192  
         // Some services need to follow redirects; others block if you do.
 193  
         // Ask the blog service itself what to do.
 194  0
         request.setFollowRedirects( blogService.followRedirects() );
 195  
         
 196  
         // the call below actually causes the connection to be made
 197  0
         request.getContentLength();
 198  
         
 199  0
         long response = request.getResponseCode();
 200  0
         log.debug("response="+response);
 201  
 
 202  0
         return response == 200;
 203  
     }
 204  
     
 205  
     
 206  
 
 207  
 }