Coverage Report

Coverage Report - org.apache.commons.feedparser.locate.ResourceExpander

Classes in this File

Line Coverage

Branch Coverage

Complexity

ResourceExpander

0/80

0/50

5.125

 /*
  * Copyright 1999,2004 The Apache Software Foundation.
  * 
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  * 
  *      http://www.apache.org/licenses/LICENSE-2.0
  * 
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
 package org.apache.commons.feedparser.locate;
 
 import java.util.regex.Pattern;
 
 import org.apache.log4j.Logger;
 
 /**
  *
  * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
  * @version $Id: ResourceExpander.java 373622 2006-01-30 22:53:00Z mvdb $
  */
 public class ResourceExpander {
 
     private static Logger log = Logger.getLogger( ResourceExpander.class );
 
     /** A regexp to determine if a URL has a scheme, such as "http://foo.com".
      */
     protected static Pattern schemePattern = Pattern.compile("^\\w*://.*");
     
     /**
      * Expand a link relavant to the current site.  This takes care of links
      * such as
      *
      * /foo.html -> http://site.com/base/foo.html
      *
      * foo.html -> http://site.com/base/foo.html
      *
      * Links should *always* be expanded before they are used.
      *
      * This is because if we use the URL http://site.com/base then we don't know
      * if it's a directory or a file.  http://site.com/base/ would be a directory.
      * 
      * Note that all resource URLs will have correct trailing slashes.  If the URL
      * does not end with / then it is a file URL and not a directory.
      * 
      * @param resource The absolute base URL that will be used to expand the
      * link, such as "http://www.codinginparadise.org".
      * @param link The link to possibly expand, such as "/index.rdf" or
      * "http://www.somehost.com/somepage.html".
      *
      * 
      */
     public static String expand( String resource, String link ) {
 
         if ( link == null )
             return null;
 
         //make sure we can use this.
         if ( !isValidScheme( link ) )
             return link;
 
         //nothing if ALREADY relativized
         if ( isExpanded( link ) )
             return link;
 
         //    From: http://www.w3.org/Addressing/rfc1808.txt
         //
         //    If the parse string begins with a double-slash "//", then the
         //    substring of characters after the double-slash and up to, but not
         //    including, the next slash "/" character is the network
         //    location/login (<net_loc>) of the URL.  If no trailing slash "/"
         //    is present, the entire remaining parse string is assigned to
         //    <net_loc>.  The double- slash and <net_loc> are removed from the
         //    parse string before
         //FIXME: What happens if resource is a "file://" scheme?
         if ( link.startsWith( "//" ) ) {
 
             return "http:" + link;
 
         }
 
         //keep going
         if ( link.startsWith( "/" ) ) {
 
             link = getSite( resource ) + link;
 
             return link;
 
         } else if ( link.startsWith( "#" ) ) {
 
             link = resource + link;
 
             return link;
 
         } else if ( link.startsWith( ".." ) ) {
 
             //ok.  We need to get rid of these .. directories.
 
             String base = getBase( resource ) + "/";
 
             while ( link.startsWith( ".." ) ) {
 
                 //get rid of the first previous dir in the link
                 int begin = 2;
                 if ( link.length() > 2 && link.charAt( 2 ) == '/' )
                     begin = 3;
 
                 link = link.substring( begin, link.length() );
 
                 //get rid of the last directory in the resource
 
                 int end = base.length();
 
                 if ( base.endsWith( "/" ) )
                      --end;
 
                 base = base.substring( 0, base.lastIndexOf( "/", end - 1 ) );
 
             }
 
             link = base + "/" + link;
 
             return link;
 
         }
 
         // If the resource ends with a common file ending, then chop
         // off the file ending before adding the link
         // Is this rfc1808 compliant? Brad Neuberg, bkn3@columbia.edu
         resource = getBase(resource);
         if ( link.startsWith( "http://" ) == false ) {
 
             link = resource + "/" + link;
             log.debug("link="+link);
 
         }
 
         return link;
 
     }
 
     /**
      * Return true if the given link is ALREADY relativized..
      *
      * 
      */
     public static boolean isExpanded( String resource ) {
         return (resource.startsWith( "http://" ) ||
                 resource.startsWith( "file://" ));
     }
     
     /**
      * Return true if this is an valid scheme and should be expanded.
      *
      * 
      */
     public static boolean isValidScheme( String resource ) {
         if (hasScheme(resource) == false)
             return true;
         
         //only on file: and http:
 
         if ( resource.startsWith( "http:" ) )
             return true;
 
         if ( resource.startsWith( "file:" ) )
             return true;
 
         return false;
         
     }
     
     /**
      * Determines if the given resource has a scheme. (i.e. does it start with
      * "http://foo.com" or does it just have "foo.com").
      */
     public static boolean hasScheme( String resource ) {
         return schemePattern.matcher( resource ).matches();
         
     }
 
     /**
      * Get the site for this resource.  For example:
      *
      * http://www.foo.com/directory/index.html
      *
      * we will return
      *
      * http://www.foo.com
      *
      * for file: URLs we return file://
      *
      * 
      */
     public static String getSite( String resource ) {
 
         if ( resource.startsWith( "file:" ) ) {
             return "file://";
         } 
 
         //start at 8 which is the width of http://
         int end = resource.indexOf( "/", 8 );
 
         if ( end == -1 ) {
 
             end = resource.length();
 
         } 
 
         return resource.substring( 0, end );
 
     }
 
     /**
      * Given a URL get the domain name.  
      *
      * 
      */
     public static String getDomain( String resource ) {
 
         String site = getSite( resource );
 
         int firstIndex = -1;
         int indexCount = 0;
 
         int index = site.length();
 
         while ( (index = site.lastIndexOf( ".", index-1 )) != -1 ) {
 
             ++indexCount;
 
             if ( indexCount == 2 )
                 break;
 
         }
 
         int begin = 7; // http:// length
         if ( indexCount >= 2 )
             begin = index + 1;
 
         return site.substring( begin, site.length() );
         
     }
     
     /**
      * Get the base of this URL.  For example if we are given:
      *
      * http://www.foo.com/directory/index.html
      *
      * we will return
      *
      * http://www.foo.com/directory
      *
      *
      * 
      */
     public static String getBase( String resource ) {
 
         //FIXME: Brad says this method is totally broken.
         if ( resource == null )
             return null;
         
         int begin = "http://".length() + 1;
         
         int end = resource.lastIndexOf( "/" );
         
         if ( end == -1 || end <= begin ) {
             //probaby a URL like http://www.cnn.com
             
             end = resource.length();
             
         } 
         return resource.substring( 0, end );
         
     } 
 
     public static void main( String[] args ) throws Exception {
 
         System.out.println( expand( "http://peerfear.org/foo/bar/", "../../blog" ) );
 
         System.out.println( expand( "http://peerfear.org/foo/bar/", "../../index.html" ) );
 
         System.out.println( expand( "http://peerfear.org/blog/", ".." ) );
 
         System.out.println( expand( "http://peerfear.org", "/blog" ) );
         System.out.println( expand( "http://peerfear.org", "http://peerfear.org" ) );
 
         System.out.println( expand( "http://peerfear.org", "blog" ) );
         System.out.println( expand( "http://peerfear.org/blog", "foo/bar" ) );
 
         System.out.println( expand( "file://projects/newsmonster/", "blog" ) );
 
         System.out.println( expand( "file:/projects/ksa/src/java/ksa/test/TestFeedTask_WithRelativePath.rss"
                                       , "/blog" ) );        
     }
 
 }
 

1		/*
2		* Copyright 1999,2004 The Apache Software Foundation.
3		*
4		* Licensed under the Apache License, Version 2.0 (the "License");
5		* you may not use this file except in compliance with the License.
6		* You may obtain a copy of the License at
7		*
8		* http://www.apache.org/licenses/LICENSE-2.0
9		*
10		* Unless required by applicable law or agreed to in writing, software
11		* distributed under the License is distributed on an "AS IS" BASIS,
12		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13		* See the License for the specific language governing permissions and
14		* limitations under the License.
15		*/
16
17		package org.apache.commons.feedparser.locate;
18
19		import java.util.regex.Pattern;
20
21		import org.apache.log4j.Logger;
22
23		/**
24		*
25		* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
26		* @version $Id: ResourceExpander.java 373622 2006-01-30 22:53:00Z mvdb $
27		*/
28	0	public class ResourceExpander {
29
30	0	private static Logger log = Logger.getLogger( ResourceExpander.class );
31
32		/** A regexp to determine if a URL has a scheme, such as "http://foo.com".
33		*/
34	0	protected static Pattern schemePattern = Pattern.compile("^\\w://.");
35
36		/**
37		* Expand a link relavant to the current site. This takes care of links
38		* such as
39		*
40		* /foo.html -> http://site.com/base/foo.html
41		*
42		* foo.html -> http://site.com/base/foo.html
43		*
44		* Links should always be expanded before they are used.
45		*
46		* This is because if we use the URL http://site.com/base then we don't know
47		* if it's a directory or a file. http://site.com/base/ would be a directory.
48		*
49		* Note that all resource URLs will have correct trailing slashes. If the URL
50		* does not end with / then it is a file URL and not a directory.
51		*
52		* @param resource The absolute base URL that will be used to expand the
53		* link, such as "http://www.codinginparadise.org".
54		* @param link The link to possibly expand, such as "/index.rdf" or
55		* "http://www.somehost.com/somepage.html".
56		*
57		*
58		*/
59		public static String expand( String resource, String link ) {
60
61	0	if ( link == null )
62	0	return null;
63
64		//make sure we can use this.
65	0	if ( !isValidScheme( link ) )
66	0	return link;
67
68		//nothing if ALREADY relativized
69	0	if ( isExpanded( link ) )
70	0	return link;
71
72		// From: http://www.w3.org/Addressing/rfc1808.txt
73		//
74		// If the parse string begins with a double-slash "//", then the
75		// substring of characters after the double-slash and up to, but not
76		// including, the next slash "/" character is the network
77		// location/login (<net_loc>) of the URL. If no trailing slash "/"
78		// is present, the entire remaining parse string is assigned to
79		// <net_loc>. The double- slash and <net_loc> are removed from the
80		// parse string before
81		//FIXME: What happens if resource is a "file://" scheme?
82	0	if ( link.startsWith( "//" ) ) {
83
84	0	return "http:" + link;
85
86		}
87
88		//keep going
89	0	if ( link.startsWith( "/" ) ) {
90
91	0	link = getSite( resource ) + link;
92
93	0	return link;
94
95	0	} else if ( link.startsWith( "#" ) ) {
96
97	0	link = resource + link;
98
99	0	return link;
100
101	0	} else if ( link.startsWith( ".." ) ) {
102
103		//ok. We need to get rid of these .. directories.
104
105	0	String base = getBase( resource ) + "/";
106
107	0	while ( link.startsWith( ".." ) ) {
108
109		//get rid of the first previous dir in the link
110	0	int begin = 2;
111	0	if ( link.length() > 2 && link.charAt( 2 ) == '/' )
112	0	begin = 3;
113
114	0	link = link.substring( begin, link.length() );
115
116		//get rid of the last directory in the resource
117
118	0	int end = base.length();
119
120	0	if ( base.endsWith( "/" ) )
121	0	--end;
122
123	0	base = base.substring( 0, base.lastIndexOf( "/", end - 1 ) );
124
125	0	}
126
127	0	link = base + "/" + link;
128
129	0	return link;
130
131		}
132
133		// If the resource ends with a common file ending, then chop
134		// off the file ending before adding the link
135		// Is this rfc1808 compliant? Brad Neuberg, bkn3@columbia.edu
136	0	resource = getBase(resource);
137	0	if ( link.startsWith( "http://" ) == false ) {
138
139	0	link = resource + "/" + link;
140	0	log.debug("link="+link);
141
142		}
143
144	0	return link;
145
146		}
147
148		/**
149		* Return true if the given link is ALREADY relativized..
150		*
151		*
152		*/
153		public static boolean isExpanded( String resource ) {
154	0	return (resource.startsWith( "http://" ) \|\|
155		resource.startsWith( "file://" ));
156		}
157
158		/**
159		* Return true if this is an valid scheme and should be expanded.
160		*
161		*
162		*/
163		public static boolean isValidScheme( String resource ) {
164	0	if (hasScheme(resource) == false)
165	0	return true;
166
167		//only on file: and http:
168
169	0	if ( resource.startsWith( "http:" ) )
170	0	return true;
171
172	0	if ( resource.startsWith( "file:" ) )
173	0	return true;
174
175	0	return false;
176
177		}
178
179		/**
180		* Determines if the given resource has a scheme. (i.e. does it start with
181		* "http://foo.com" or does it just have "foo.com").
182		*/
183		public static boolean hasScheme( String resource ) {
184	0	return schemePattern.matcher( resource ).matches();
185
186		}
187
188		/**
189		* Get the site for this resource. For example:
190		*
191		* http://www.foo.com/directory/index.html
192		*
193		* we will return
194		*
195		* http://www.foo.com
196		*
197		* for file: URLs we return file://
198		*
199		*
200		*/
201		public static String getSite( String resource ) {
202
203	0	if ( resource.startsWith( "file:" ) ) {
204	0	return "file://";
205		}
206
207		//start at 8 which is the width of http://
208	0	int end = resource.indexOf( "/", 8 );
209
210	0	if ( end == -1 ) {
211
212	0	end = resource.length();
213
214		}
215
216	0	return resource.substring( 0, end );
217
218		}
219
220		/**
221		* Given a URL get the domain name.
222		*
223		*
224		*/
225		public static String getDomain( String resource ) {
226
227	0	String site = getSite( resource );
228
229	0	int firstIndex = -1;
230	0	int indexCount = 0;
231
232	0	int index = site.length();
233
234	0	while ( (index = site.lastIndexOf( ".", index-1 )) != -1 ) {
235
236	0	++indexCount;
237
238	0	if ( indexCount == 2 )
239	0	break;
240
241		}
242
243	0	int begin = 7; // http:// length
244	0	if ( indexCount >= 2 )
245	0	begin = index + 1;
246
247	0	return site.substring( begin, site.length() );
248
249		}
250
251		/**
252		* Get the base of this URL. For example if we are given:
253		*
254		* http://www.foo.com/directory/index.html
255		*
256		* we will return
257		*
258		* http://www.foo.com/directory
259		*
260		*
261		*
262		*/
263		public static String getBase( String resource ) {
264
265		//FIXME: Brad says this method is totally broken.
266	0	if ( resource == null )
267	0	return null;
268
269	0	int begin = "http://".length() + 1;
270
271	0	int end = resource.lastIndexOf( "/" );
272
273	0	if ( end == -1 \|\| end <= begin ) {
274		//probaby a URL like http://www.cnn.com
275
276	0	end = resource.length();
277
278		}
279	0	return resource.substring( 0, end );
280
281		}
282
283		public static void main( String[] args ) throws Exception {
284
285	0	System.out.println( expand( "http://peerfear.org/foo/bar/", "../../blog" ) );
286
287	0	System.out.println( expand( "http://peerfear.org/foo/bar/", "../../index.html" ) );
288
289	0	System.out.println( expand( "http://peerfear.org/blog/", ".." ) );
290
291	0	System.out.println( expand( "http://peerfear.org", "/blog" ) );
292	0	System.out.println( expand( "http://peerfear.org", "http://peerfear.org" ) );
293
294	0	System.out.println( expand( "http://peerfear.org", "blog" ) );
295	0	System.out.println( expand( "http://peerfear.org/blog", "foo/bar" ) );
296
297	0	System.out.println( expand( "file://projects/newsmonster/", "blog" ) );
298
299	0	System.out.println( expand( "file:/projects/ksa/src/java/ksa/test/TestFeedTask_WithRelativePath.rss"
300		, "/blog" ) );
301	0	}
302
303		}
304