Coverage Report

Coverage Report - org.apache.commons.feedparser.locate.blogservice.BlogService

Classes in this File

0/59

0/22

 /*
  * Copyright 1999,2004 The Apache Software Foundation.
  * 
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  * 
  *      http://www.apache.org/licenses/LICENSE-2.0
  * 
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
 package org.apache.commons.feedparser.locate.blogservice;
 
 import java.net.*;
 import java.util.*;
 import java.util.regex.*;
 
 import org.apache.commons.feedparser.*;
 import org.apache.commons.feedparser.locate.*;
 
 /**
  * Models the different kinds of blog services that are available.  This
  * is needed for two reasons.  First, sometimes it is useful to simply
  * know what provider a given weblog is being hosted by, such as Blogger
  * or PMachine, in order to use special, non-standard capabilities.  Second,
  * many services have "quirks" that don't follow the standards, such as
  * supporting autodiscovery or supporting it in an incorrect way, and we
  * therefore need to know what service we are dealing with so that we
  * can find its feed.
  * 
  * The BlogService object encapsulates how to determine if a given
  * weblog is of that type and how to find its feeds.  Concrete subclasses,
  * such as org.apache.commons.feedparser.locate.blogservice.Blogger,
  * fill in this class and provide the actual way to determine these
  * things for each blog service type.
  * 
  * @author Brad Neuberg, bkn3@columbia.edu
  */
 public abstract class BlogService {
     protected static List blogServices = new ArrayList();
     
     /** Subclasses should have a static block similar to the following:
      *  <code>
      *      {
      *          BlogService.addBlogService(new MyBlogService());
      *      }
      *  </code>
      */
     
     /** Locates all the generator meta tags
      *  (i.e. <meta content="generator" content="someGenerator"/>)
      */
     protected static Pattern metaTagsPattern = 
                 Pattern.compile("<[\\s]*meta[\\w\\s=\"']*name=['\" ]generator[\"' ][\\w\\s=\"']*[^>]*",
                                 Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
     
     /**
      * A regex to find any trailing filename and strip it
      */
     protected static Pattern patternToStrip = Pattern.compile("[^/](/\\w*\\.\\w*$)"); 
         
     /** Returns whether we can trust the results of this blog service's 
      *  autodiscovery links.  For example, TextAmerica returns invalid 
      *  autodiscovery results.
      */
     public abstract boolean hasValidAutoDiscovery();
     
     /** Returns whether we should follow HTTP redirects for this blog service.
      *  Some services don't implement HTTP redirects correctly, while others,
      *  like Xanga, require it.
      */
     public abstract boolean followRedirects();
     
     /** Determines if the weblog at the given resource and with the given
      *  content is this blog service.
      * @param resource A full URI to this resource, such as 
      * "http://www.codinginparadise.org".
      * @param content The full HTML content at the resource's URL.
      * @throws FeedParserException Thrown if an error occurs while 
      * determining the type of this weblog.
      */
     public abstract boolean isThisService(String resource, String content)
                                                 throws FeedParserException;
 
     /**
      * Returns an array of FeedReferences that contains information on the
      * usual locations this blog service contains its feed.  The feeds should
      * be ordered by quality, so that higher quality feeds come before lower
      * quality ones (i.e. you would want to have an Atom FeedReference
      * object come before an RSS 0.91 FeedReference object in this list).
      * @param resource A URL to the given weblog that might be used to build
      * up where feeds are usually located.
      * @param content The full content of the resource URL, which might
      * be useful to determine where feeds are usually located.  This can be
      * null.
      * @throws FeedParserException Thrown if an error occurs while trying
      * to determine the usual locations of feeds for this service.
      */
     public abstract FeedReference[] getFeedLocations(String resource,
                                                      String content)
                                                 throws FeedParserException;
     
     /** Determines if the weblog at the given resource is this blog service.
      *  @param resource A full URI to this resource, such as 
      *  "http://www.codinginparadise.org".
      *  @throws FeedParserException Thrown if an error occurs while 
      *  determining the type of this weblog.
      */
     public boolean isThisService(String resource) throws FeedParserException {
         return isThisService(resource, null);
     }
     
     /** This method takes a resource, such as "http://www.codinginparadise.org/myweblog.php",
      *  and gets the path necessary to build up a feed, such as 
      *  "http://www.codinginparadise.org/".  Basicly it appends a slash 
      *  to the end if there is not one, and removes any file names that 
      *  might be at the end, such as "myweblog.php".
      *
      *  There is a special exception for some Blosxom blogs,
      *  which have things inside of a cgi-script and 'hang' their RSS files
      *  off of this cgi-bin.  For example, 
      *  http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi has its RSS file
      *  at http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi/index.rss, so
      *  we must return the blosxom.cgi at the end as well for this method.
      * 
      *  @throws MalformedURLException Thrown if the given resource's URL is 
      *  incorrectly formatted.
      */
     public String getBaseFeedPath( String resource ) {
         // strip off any query string or anchors
         int end = resource.lastIndexOf( "#" );
         
         if ( end != -1 )
             resource = resource.substring( 0, end );
 
         end = resource.lastIndexOf( "?" );
 
         if ( end != -1 )
             resource = resource.substring( 0, end );
 
         Matcher fileMatcher = patternToStrip.matcher(resource);
         if (fileMatcher.find()) {
             String stringToStrip = fileMatcher.group(1);
             int startStrip = resource.indexOf(stringToStrip);
             resource = resource.substring(0, startStrip);
         }
         
         if ( ! resource.endsWith( "/" ) ) {
             resource = resource + "/";
         }
         
         return resource;
     }
 
     public String toString() {
         return this.getClass().getName();
     }
     
     public boolean equals(Object obj) {
         if (obj == null)
             return false;
         
         if (obj instanceof BlogService == false)
             return false;
         
         return (obj.getClass().equals(this.getClass()));
     }
     
     public int hashCode() {
         return this.getClass().hashCode();
     }
     
     /** Gets an array of all of the available BlogService implementations. */
     public static BlogService[] getBlogServices() {
         if (blogServices.size() == 0)
             initializeBlogServices();
         
         BlogService[] results = new BlogService[blogServices.size()];
         
         return (BlogService[])blogServices.toArray(results);
     }
 
     // **** util code ***********************************************************
     // These methods are useful for non-abstract subclasses of this object
     // to actually implement their functionality.
     
     /** Determines if the given resource contains the given domain name
      *  fragment.
      */
     protected boolean containsDomain(String resource, String domain) {
         return (resource.indexOf(domain) != -1);
     }
     
     /**
      * Determines if the given content was generated by the given generator.
 
      * Example. This document contains a meta tag with name="generator" and
      * content equal to the generatorType).
      */
     protected boolean hasGenerator(String content, String generatorType) {
         if (content == null) {
             return false;
         }
         
         Matcher metaTagsMatcher = metaTagsPattern.matcher(content);
         if (metaTagsMatcher.find()) {
             String metaTag = metaTagsMatcher.group(0).toLowerCase();
             generatorType = generatorType.toLowerCase();
             return (metaTag.indexOf(generatorType) != -1);
         }
         else {
             return false;
         }
     }
     
     protected static void initializeBlogServices() {
         blogServices.add(new AOLJournal());
         blogServices.add(new Blogger());
         blogServices.add(new Blosxom());
         blogServices.add(new DiaryLand());
         blogServices.add(new ExpressionEngine());
         blogServices.add(new Flickr());
         blogServices.add(new GreyMatter());
         blogServices.add(new iBlog());
         blogServices.add(new LiveJournal());
         blogServices.add(new Manila());
         blogServices.add(new MovableType());
         blogServices.add(new PMachine());
         blogServices.add(new RadioUserland());
         blogServices.add(new TextAmerica());
         blogServices.add(new TextPattern());
         blogServices.add(new Typepad());
         blogServices.add(new WordPress());
         blogServices.add(new Xanga());
         blogServices.add(new YahooGroups());
     }
 }

1		/*
2		* Copyright 1999,2004 The Apache Software Foundation.
3		*
4		* Licensed under the Apache License, Version 2.0 (the "License");
5		* you may not use this file except in compliance with the License.
6		* You may obtain a copy of the License at
7		*
8		* http://www.apache.org/licenses/LICENSE-2.0
9		*
10		* Unless required by applicable law or agreed to in writing, software
11		* distributed under the License is distributed on an "AS IS" BASIS,
12		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13		* See the License for the specific language governing permissions and
14		* limitations under the License.
15		*/
16
17		package org.apache.commons.feedparser.locate.blogservice;
18
19		import java.net.*;
20		import java.util.*;
21		import java.util.regex.*;
22
23		import org.apache.commons.feedparser.*;
24		import org.apache.commons.feedparser.locate.*;
25
26		/**
27		* Models the different kinds of blog services that are available. This
28		* is needed for two reasons. First, sometimes it is useful to simply
29		* know what provider a given weblog is being hosted by, such as Blogger
30		* or PMachine, in order to use special, non-standard capabilities. Second,
31		* many services have "quirks" that don't follow the standards, such as
32		* supporting autodiscovery or supporting it in an incorrect way, and we
33		* therefore need to know what service we are dealing with so that we
34		* can find its feed.
35		*
36		* The BlogService object encapsulates how to determine if a given
37		* weblog is of that type and how to find its feeds. Concrete subclasses,
38		* such as org.apache.commons.feedparser.locate.blogservice.Blogger,
39		* fill in this class and provide the actual way to determine these
40		* things for each blog service type.
41		*
42		* @author Brad Neuberg, bkn3@columbia.edu
43		*/
44	0	public abstract class BlogService {
45	0	protected static List blogServices = new ArrayList();
46
47		/** Subclasses should have a static block similar to the following:
48		* <code>
49		* {
50		* BlogService.addBlogService(new MyBlogService());
51		* }
52		* </code>
53		*/
54
55		/** Locates all the generator meta tags
56		* (i.e. <meta content="generator" content="someGenerator"/>)
57		*/
58	0	protected static Pattern metaTagsPattern =
59		Pattern.compile("<[\\s]meta[\\w\\s=\"']name=['\" ]generator[\"' ][\\w\\s=\"'][^>]",
60		Pattern.CASE_INSENSITIVE \| Pattern.MULTILINE);
61
62		/**
63		* A regex to find any trailing filename and strip it
64		*/
65	0	protected static Pattern patternToStrip = Pattern.compile("[^/](/\\w\\.\\w$)");
66
67		/** Returns whether we can trust the results of this blog service's
68		* autodiscovery links. For example, TextAmerica returns invalid
69		* autodiscovery results.
70		*/
71		public abstract boolean hasValidAutoDiscovery();
72
73		/** Returns whether we should follow HTTP redirects for this blog service.
74		* Some services don't implement HTTP redirects correctly, while others,
75		* like Xanga, require it.
76		*/
77		public abstract boolean followRedirects();
78
79		/** Determines if the weblog at the given resource and with the given
80		* content is this blog service.
81		* @param resource A full URI to this resource, such as
82		* "http://www.codinginparadise.org".
83		* @param content The full HTML content at the resource's URL.
84		* @throws FeedParserException Thrown if an error occurs while
85		* determining the type of this weblog.
86		*/
87		public abstract boolean isThisService(String resource, String content)
88		throws FeedParserException;
89
90		/**
91		* Returns an array of FeedReferences that contains information on the
92		* usual locations this blog service contains its feed. The feeds should
93		* be ordered by quality, so that higher quality feeds come before lower
94		* quality ones (i.e. you would want to have an Atom FeedReference
95		* object come before an RSS 0.91 FeedReference object in this list).
96		* @param resource A URL to the given weblog that might be used to build
97		* up where feeds are usually located.
98		* @param content The full content of the resource URL, which might
99		* be useful to determine where feeds are usually located. This can be
100		* null.
101		* @throws FeedParserException Thrown if an error occurs while trying
102		* to determine the usual locations of feeds for this service.
103		*/
104		public abstract FeedReference[] getFeedLocations(String resource,
105		String content)
106		throws FeedParserException;
107
108		/** Determines if the weblog at the given resource is this blog service.
109		* @param resource A full URI to this resource, such as
110		* "http://www.codinginparadise.org".
111		* @throws FeedParserException Thrown if an error occurs while
112		* determining the type of this weblog.
113		*/
114		public boolean isThisService(String resource) throws FeedParserException {
115	0	return isThisService(resource, null);
116		}
117
118		/** This method takes a resource, such as "http://www.codinginparadise.org/myweblog.php",
119		* and gets the path necessary to build up a feed, such as
120		* "http://www.codinginparadise.org/". Basicly it appends a slash
121		* to the end if there is not one, and removes any file names that
122		* might be at the end, such as "myweblog.php".
123		*
124		* There is a special exception for some Blosxom blogs,
125		* which have things inside of a cgi-script and 'hang' their RSS files
126		* off of this cgi-bin. For example,
127		* http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi has its RSS file
128		* at http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi/index.rss, so
129		* we must return the blosxom.cgi at the end as well for this method.
130		*
131		* @throws MalformedURLException Thrown if the given resource's URL is
132		* incorrectly formatted.
133		*/
134		public String getBaseFeedPath( String resource ) {
135		// strip off any query string or anchors
136	0	int end = resource.lastIndexOf( "#" );
137
138	0	if ( end != -1 )
139	0	resource = resource.substring( 0, end );
140
141	0	end = resource.lastIndexOf( "?" );
142
143	0	if ( end != -1 )
144	0	resource = resource.substring( 0, end );
145
146	0	Matcher fileMatcher = patternToStrip.matcher(resource);
147	0	if (fileMatcher.find()) {
148	0	String stringToStrip = fileMatcher.group(1);
149	0	int startStrip = resource.indexOf(stringToStrip);
150	0	resource = resource.substring(0, startStrip);
151		}
152
153	0	if ( ! resource.endsWith( "/" ) ) {
154	0	resource = resource + "/";
155		}
156
157	0	return resource;
158		}
159
160		public String toString() {
161	0	return this.getClass().getName();
162		}
163
164		public boolean equals(Object obj) {
165	0	if (obj == null)
166	0	return false;
167
168	0	if (obj instanceof BlogService == false)
169	0	return false;
170
171	0	return (obj.getClass().equals(this.getClass()));
172		}
173
174		public int hashCode() {
175	0	return this.getClass().hashCode();
176		}
177
178		/** Gets an array of all of the available BlogService implementations. */
179		public static BlogService[] getBlogServices() {
180	0	if (blogServices.size() == 0)
181	0	initializeBlogServices();
182
183	0	BlogService[] results = new BlogService[blogServices.size()];
184
185	0	return (BlogService[])blogServices.toArray(results);
186		}
187
188		// ** util code *********************************************************
189		// These methods are useful for non-abstract subclasses of this object
190		// to actually implement their functionality.
191
192		/** Determines if the given resource contains the given domain name
193		* fragment.
194		*/
195		protected boolean containsDomain(String resource, String domain) {
196	0	return (resource.indexOf(domain) != -1);
197		}
198
199		/**
200		* Determines if the given content was generated by the given generator.
201
202		* Example. This document contains a meta tag with name="generator" and
203		* content equal to the generatorType).
204		*/
205		protected boolean hasGenerator(String content, String generatorType) {
206	0	if (content == null) {
207	0	return false;
208		}
209
210	0	Matcher metaTagsMatcher = metaTagsPattern.matcher(content);
211	0	if (metaTagsMatcher.find()) {
212	0	String metaTag = metaTagsMatcher.group(0).toLowerCase();
213	0	generatorType = generatorType.toLowerCase();
214	0	return (metaTag.indexOf(generatorType) != -1);
215		}
216		else {
217	0	return false;
218		}
219		}
220
221		protected static void initializeBlogServices() {
222	0	blogServices.add(new AOLJournal());
223	0	blogServices.add(new Blogger());
224	0	blogServices.add(new Blosxom());
225	0	blogServices.add(new DiaryLand());
226	0	blogServices.add(new ExpressionEngine());
227	0	blogServices.add(new Flickr());
228	0	blogServices.add(new GreyMatter());
229	0	blogServices.add(new iBlog());
230	0	blogServices.add(new LiveJournal());
231	0	blogServices.add(new Manila());
232	0	blogServices.add(new MovableType());
233	0	blogServices.add(new PMachine());
234	0	blogServices.add(new RadioUserland());
235	0	blogServices.add(new TextAmerica());
236	0	blogServices.add(new TextPattern());
237	0	blogServices.add(new Typepad());
238	0	blogServices.add(new WordPress());
239	0	blogServices.add(new Xanga());
240	0	blogServices.add(new YahooGroups());
241	0	}
242		}