Coverage Report - org.apache.commons.feedparser.locate.blogservice.BlogService
 
Classes in this File Line Coverage Branch Coverage Complexity
BlogService
0%
0/59
0%
0/22
2
 
 1  
 /*
 2  
  * Copyright 1999,2004 The Apache Software Foundation.
 3  
  * 
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  * 
 8  
  *      http://www.apache.org/licenses/LICENSE-2.0
 9  
  * 
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 
 17  
 package org.apache.commons.feedparser.locate.blogservice;
 18  
 
 19  
 import java.net.*;
 20  
 import java.util.*;
 21  
 import java.util.regex.*;
 22  
 
 23  
 import org.apache.commons.feedparser.*;
 24  
 import org.apache.commons.feedparser.locate.*;
 25  
 
 26  
 /**
 27  
  * Models the different kinds of blog services that are available.  This
 28  
  * is needed for two reasons.  First, sometimes it is useful to simply
 29  
  * know what provider a given weblog is being hosted by, such as Blogger
 30  
  * or PMachine, in order to use special, non-standard capabilities.  Second,
 31  
  * many services have "quirks" that don't follow the standards, such as
 32  
  * supporting autodiscovery or supporting it in an incorrect way, and we
 33  
  * therefore need to know what service we are dealing with so that we
 34  
  * can find its feed.
 35  
  * 
 36  
  * The BlogService object encapsulates how to determine if a given
 37  
  * weblog is of that type and how to find its feeds.  Concrete subclasses,
 38  
  * such as org.apache.commons.feedparser.locate.blogservice.Blogger,
 39  
  * fill in this class and provide the actual way to determine these
 40  
  * things for each blog service type.
 41  
  * 
 42  
  * @author Brad Neuberg, bkn3@columbia.edu
 43  
  */
 44  0
 public abstract class BlogService {
 45  0
     protected static List blogServices = new ArrayList();
 46  
     
 47  
     /** Subclasses should have a static block similar to the following:
 48  
      *  <code>
 49  
      *      {
 50  
      *          BlogService.addBlogService(new MyBlogService());
 51  
      *      }
 52  
      *  </code>
 53  
      */
 54  
     
 55  
     /** Locates all the generator meta tags
 56  
      *  (i.e. <meta content="generator" content="someGenerator"/>)
 57  
      */
 58  0
     protected static Pattern metaTagsPattern = 
 59  
                 Pattern.compile("<[\\s]*meta[\\w\\s=\"']*name=['\" ]generator[\"' ][\\w\\s=\"']*[^>]*",
 60  
                                 Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
 61  
     
 62  
     /**
 63  
      * A regex to find any trailing filename and strip it
 64  
      */
 65  0
     protected static Pattern patternToStrip = Pattern.compile("[^/](/\\w*\\.\\w*$)"); 
 66  
         
 67  
     /** Returns whether we can trust the results of this blog service's 
 68  
      *  autodiscovery links.  For example, TextAmerica returns invalid 
 69  
      *  autodiscovery results.
 70  
      */
 71  
     public abstract boolean hasValidAutoDiscovery();
 72  
     
 73  
     /** Returns whether we should follow HTTP redirects for this blog service.
 74  
      *  Some services don't implement HTTP redirects correctly, while others,
 75  
      *  like Xanga, require it.
 76  
      */
 77  
     public abstract boolean followRedirects();
 78  
     
 79  
     /** Determines if the weblog at the given resource and with the given
 80  
      *  content is this blog service.
 81  
      * @param resource A full URI to this resource, such as 
 82  
      * "http://www.codinginparadise.org".
 83  
      * @param content The full HTML content at the resource's URL.
 84  
      * @throws FeedParserException Thrown if an error occurs while 
 85  
      * determining the type of this weblog.
 86  
      */
 87  
     public abstract boolean isThisService(String resource, String content)
 88  
                                                 throws FeedParserException;
 89  
 
 90  
     /**
 91  
      * Returns an array of FeedReferences that contains information on the
 92  
      * usual locations this blog service contains its feed.  The feeds should
 93  
      * be ordered by quality, so that higher quality feeds come before lower
 94  
      * quality ones (i.e. you would want to have an Atom FeedReference
 95  
      * object come before an RSS 0.91 FeedReference object in this list).
 96  
      * @param resource A URL to the given weblog that might be used to build
 97  
      * up where feeds are usually located.
 98  
      * @param content The full content of the resource URL, which might
 99  
      * be useful to determine where feeds are usually located.  This can be
 100  
      * null.
 101  
      * @throws FeedParserException Thrown if an error occurs while trying
 102  
      * to determine the usual locations of feeds for this service.
 103  
      */
 104  
     public abstract FeedReference[] getFeedLocations(String resource,
 105  
                                                      String content)
 106  
                                                 throws FeedParserException;
 107  
     
 108  
     /** Determines if the weblog at the given resource is this blog service.
 109  
      *  @param resource A full URI to this resource, such as 
 110  
      *  "http://www.codinginparadise.org".
 111  
      *  @throws FeedParserException Thrown if an error occurs while 
 112  
      *  determining the type of this weblog.
 113  
      */
 114  
     public boolean isThisService(String resource) throws FeedParserException {
 115  0
         return isThisService(resource, null);
 116  
     }
 117  
     
 118  
     /** This method takes a resource, such as "http://www.codinginparadise.org/myweblog.php",
 119  
      *  and gets the path necessary to build up a feed, such as 
 120  
      *  "http://www.codinginparadise.org/".  Basicly it appends a slash 
 121  
      *  to the end if there is not one, and removes any file names that 
 122  
      *  might be at the end, such as "myweblog.php".
 123  
      *
 124  
      *  There is a special exception for some Blosxom blogs,
 125  
      *  which have things inside of a cgi-script and 'hang' their RSS files
 126  
      *  off of this cgi-bin.  For example, 
 127  
      *  http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi has its RSS file
 128  
      *  at http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi/index.rss, so
 129  
      *  we must return the blosxom.cgi at the end as well for this method.
 130  
      * 
 131  
      *  @throws MalformedURLException Thrown if the given resource's URL is 
 132  
      *  incorrectly formatted.
 133  
      */
 134  
     public String getBaseFeedPath( String resource ) {
 135  
         // strip off any query string or anchors
 136  0
         int end = resource.lastIndexOf( "#" );
 137  
         
 138  0
         if ( end != -1 )
 139  0
             resource = resource.substring( 0, end );
 140  
 
 141  0
         end = resource.lastIndexOf( "?" );
 142  
 
 143  0
         if ( end != -1 )
 144  0
             resource = resource.substring( 0, end );
 145  
 
 146  0
         Matcher fileMatcher = patternToStrip.matcher(resource);
 147  0
         if (fileMatcher.find()) {
 148  0
             String stringToStrip = fileMatcher.group(1);
 149  0
             int startStrip = resource.indexOf(stringToStrip);
 150  0
             resource = resource.substring(0, startStrip);
 151  
         }
 152  
         
 153  0
         if ( ! resource.endsWith( "/" ) ) {
 154  0
             resource = resource + "/";
 155  
         }
 156  
         
 157  0
         return resource;
 158  
     }
 159  
 
 160  
     public String toString() {
 161  0
         return this.getClass().getName();
 162  
     }
 163  
     
 164  
     public boolean equals(Object obj) {
 165  0
         if (obj == null)
 166  0
             return false;
 167  
         
 168  0
         if (obj instanceof BlogService == false)
 169  0
             return false;
 170  
         
 171  0
         return (obj.getClass().equals(this.getClass()));
 172  
     }
 173  
     
 174  
     public int hashCode() {
 175  0
         return this.getClass().hashCode();
 176  
     }
 177  
     
 178  
     /** Gets an array of all of the available BlogService implementations. */
 179  
     public static BlogService[] getBlogServices() {
 180  0
         if (blogServices.size() == 0)
 181  0
             initializeBlogServices();
 182  
         
 183  0
         BlogService[] results = new BlogService[blogServices.size()];
 184  
         
 185  0
         return (BlogService[])blogServices.toArray(results);
 186  
     }
 187  
 
 188  
     // **** util code ***********************************************************
 189  
     // These methods are useful for non-abstract subclasses of this object
 190  
     // to actually implement their functionality.
 191  
     
 192  
     /** Determines if the given resource contains the given domain name
 193  
      *  fragment.
 194  
      */
 195  
     protected boolean containsDomain(String resource, String domain) {
 196  0
         return (resource.indexOf(domain) != -1);
 197  
     }
 198  
     
 199  
     /**
 200  
      * Determines if the given content was generated by the given generator.
 201  
 
 202  
      * Example. This document contains a meta tag with name="generator" and
 203  
      * content equal to the generatorType).
 204  
      */
 205  
     protected boolean hasGenerator(String content, String generatorType) {
 206  0
         if (content == null) {
 207  0
             return false;
 208  
         }
 209  
         
 210  0
         Matcher metaTagsMatcher = metaTagsPattern.matcher(content);
 211  0
         if (metaTagsMatcher.find()) {
 212  0
             String metaTag = metaTagsMatcher.group(0).toLowerCase();
 213  0
             generatorType = generatorType.toLowerCase();
 214  0
             return (metaTag.indexOf(generatorType) != -1);
 215  
         }
 216  
         else {
 217  0
             return false;
 218  
         }
 219  
     }
 220  
     
 221  
     protected static void initializeBlogServices() {
 222  0
         blogServices.add(new AOLJournal());
 223  0
         blogServices.add(new Blogger());
 224  0
         blogServices.add(new Blosxom());
 225  0
         blogServices.add(new DiaryLand());
 226  0
         blogServices.add(new ExpressionEngine());
 227  0
         blogServices.add(new Flickr());
 228  0
         blogServices.add(new GreyMatter());
 229  0
         blogServices.add(new iBlog());
 230  0
         blogServices.add(new LiveJournal());
 231  0
         blogServices.add(new Manila());
 232  0
         blogServices.add(new MovableType());
 233  0
         blogServices.add(new PMachine());
 234  0
         blogServices.add(new RadioUserland());
 235  0
         blogServices.add(new TextAmerica());
 236  0
         blogServices.add(new TextPattern());
 237  0
         blogServices.add(new Typepad());
 238  0
         blogServices.add(new WordPress());
 239  0
         blogServices.add(new Xanga());
 240  0
         blogServices.add(new YahooGroups());
 241  0
     }
 242  
 }