Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
ResourceExpander |
|
| 5.125;5.125 |
1 | /* | |
2 | * Copyright 1999,2004 The Apache Software Foundation. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | package org.apache.commons.feedparser.locate; | |
18 | ||
19 | import java.util.regex.Pattern; | |
20 | ||
21 | import org.apache.log4j.Logger; | |
22 | ||
23 | /** | |
24 | * | |
25 | * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a> | |
26 | * @version $Id: ResourceExpander.java 373622 2006-01-30 22:53:00Z mvdb $ | |
27 | */ | |
28 | 0 | public class ResourceExpander { |
29 | ||
30 | 0 | private static Logger log = Logger.getLogger( ResourceExpander.class ); |
31 | ||
32 | /** A regexp to determine if a URL has a scheme, such as "http://foo.com". | |
33 | */ | |
34 | 0 | protected static Pattern schemePattern = Pattern.compile("^\\w*://.*"); |
35 | ||
36 | /** | |
37 | * Expand a link relavant to the current site. This takes care of links | |
38 | * such as | |
39 | * | |
40 | * /foo.html -> http://site.com/base/foo.html | |
41 | * | |
42 | * foo.html -> http://site.com/base/foo.html | |
43 | * | |
44 | * Links should *always* be expanded before they are used. | |
45 | * | |
46 | * This is because if we use the URL http://site.com/base then we don't know | |
47 | * if it's a directory or a file. http://site.com/base/ would be a directory. | |
48 | * | |
49 | * Note that all resource URLs will have correct trailing slashes. If the URL | |
50 | * does not end with / then it is a file URL and not a directory. | |
51 | * | |
52 | * @param resource The absolute base URL that will be used to expand the | |
53 | * link, such as "http://www.codinginparadise.org". | |
54 | * @param link The link to possibly expand, such as "/index.rdf" or | |
55 | * "http://www.somehost.com/somepage.html". | |
56 | * | |
57 | * | |
58 | */ | |
59 | public static String expand( String resource, String link ) { | |
60 | ||
61 | 0 | if ( link == null ) |
62 | 0 | return null; |
63 | ||
64 | //make sure we can use this. | |
65 | 0 | if ( !isValidScheme( link ) ) |
66 | 0 | return link; |
67 | ||
68 | //nothing if ALREADY relativized | |
69 | 0 | if ( isExpanded( link ) ) |
70 | 0 | return link; |
71 | ||
72 | // From: http://www.w3.org/Addressing/rfc1808.txt | |
73 | // | |
74 | // If the parse string begins with a double-slash "//", then the | |
75 | // substring of characters after the double-slash and up to, but not | |
76 | // including, the next slash "/" character is the network | |
77 | // location/login (<net_loc>) of the URL. If no trailing slash "/" | |
78 | // is present, the entire remaining parse string is assigned to | |
79 | // <net_loc>. The double- slash and <net_loc> are removed from the | |
80 | // parse string before | |
81 | //FIXME: What happens if resource is a "file://" scheme? | |
82 | 0 | if ( link.startsWith( "//" ) ) { |
83 | ||
84 | 0 | return "http:" + link; |
85 | ||
86 | } | |
87 | ||
88 | //keep going | |
89 | 0 | if ( link.startsWith( "/" ) ) { |
90 | ||
91 | 0 | link = getSite( resource ) + link; |
92 | ||
93 | 0 | return link; |
94 | ||
95 | 0 | } else if ( link.startsWith( "#" ) ) { |
96 | ||
97 | 0 | link = resource + link; |
98 | ||
99 | 0 | return link; |
100 | ||
101 | 0 | } else if ( link.startsWith( ".." ) ) { |
102 | ||
103 | //ok. We need to get rid of these .. directories. | |
104 | ||
105 | 0 | String base = getBase( resource ) + "/"; |
106 | ||
107 | 0 | while ( link.startsWith( ".." ) ) { |
108 | ||
109 | //get rid of the first previous dir in the link | |
110 | 0 | int begin = 2; |
111 | 0 | if ( link.length() > 2 && link.charAt( 2 ) == '/' ) |
112 | 0 | begin = 3; |
113 | ||
114 | 0 | link = link.substring( begin, link.length() ); |
115 | ||
116 | //get rid of the last directory in the resource | |
117 | ||
118 | 0 | int end = base.length(); |
119 | ||
120 | 0 | if ( base.endsWith( "/" ) ) |
121 | 0 | --end; |
122 | ||
123 | 0 | base = base.substring( 0, base.lastIndexOf( "/", end - 1 ) ); |
124 | ||
125 | 0 | } |
126 | ||
127 | 0 | link = base + "/" + link; |
128 | ||
129 | 0 | return link; |
130 | ||
131 | } | |
132 | ||
133 | // If the resource ends with a common file ending, then chop | |
134 | // off the file ending before adding the link | |
135 | // Is this rfc1808 compliant? Brad Neuberg, bkn3@columbia.edu | |
136 | 0 | resource = getBase(resource); |
137 | 0 | if ( link.startsWith( "http://" ) == false ) { |
138 | ||
139 | 0 | link = resource + "/" + link; |
140 | 0 | log.debug("link="+link); |
141 | ||
142 | } | |
143 | ||
144 | 0 | return link; |
145 | ||
146 | } | |
147 | ||
148 | /** | |
149 | * Return true if the given link is ALREADY relativized.. | |
150 | * | |
151 | * | |
152 | */ | |
153 | public static boolean isExpanded( String resource ) { | |
154 | 0 | return (resource.startsWith( "http://" ) || |
155 | resource.startsWith( "file://" )); | |
156 | } | |
157 | ||
158 | /** | |
159 | * Return true if this is an valid scheme and should be expanded. | |
160 | * | |
161 | * | |
162 | */ | |
163 | public static boolean isValidScheme( String resource ) { | |
164 | 0 | if (hasScheme(resource) == false) |
165 | 0 | return true; |
166 | ||
167 | //only on file: and http: | |
168 | ||
169 | 0 | if ( resource.startsWith( "http:" ) ) |
170 | 0 | return true; |
171 | ||
172 | 0 | if ( resource.startsWith( "file:" ) ) |
173 | 0 | return true; |
174 | ||
175 | 0 | return false; |
176 | ||
177 | } | |
178 | ||
179 | /** | |
180 | * Determines if the given resource has a scheme. (i.e. does it start with | |
181 | * "http://foo.com" or does it just have "foo.com"). | |
182 | */ | |
183 | public static boolean hasScheme( String resource ) { | |
184 | 0 | return schemePattern.matcher( resource ).matches(); |
185 | ||
186 | } | |
187 | ||
188 | /** | |
189 | * Get the site for this resource. For example: | |
190 | * | |
191 | * http://www.foo.com/directory/index.html | |
192 | * | |
193 | * we will return | |
194 | * | |
195 | * http://www.foo.com | |
196 | * | |
197 | * for file: URLs we return file:// | |
198 | * | |
199 | * | |
200 | */ | |
201 | public static String getSite( String resource ) { | |
202 | ||
203 | 0 | if ( resource.startsWith( "file:" ) ) { |
204 | 0 | return "file://"; |
205 | } | |
206 | ||
207 | //start at 8 which is the width of http:// | |
208 | 0 | int end = resource.indexOf( "/", 8 ); |
209 | ||
210 | 0 | if ( end == -1 ) { |
211 | ||
212 | 0 | end = resource.length(); |
213 | ||
214 | } | |
215 | ||
216 | 0 | return resource.substring( 0, end ); |
217 | ||
218 | } | |
219 | ||
220 | /** | |
221 | * Given a URL get the domain name. | |
222 | * | |
223 | * | |
224 | */ | |
225 | public static String getDomain( String resource ) { | |
226 | ||
227 | 0 | String site = getSite( resource ); |
228 | ||
229 | 0 | int firstIndex = -1; |
230 | 0 | int indexCount = 0; |
231 | ||
232 | 0 | int index = site.length(); |
233 | ||
234 | 0 | while ( (index = site.lastIndexOf( ".", index-1 )) != -1 ) { |
235 | ||
236 | 0 | ++indexCount; |
237 | ||
238 | 0 | if ( indexCount == 2 ) |
239 | 0 | break; |
240 | ||
241 | } | |
242 | ||
243 | 0 | int begin = 7; // http:// length |
244 | 0 | if ( indexCount >= 2 ) |
245 | 0 | begin = index + 1; |
246 | ||
247 | 0 | return site.substring( begin, site.length() ); |
248 | ||
249 | } | |
250 | ||
251 | /** | |
252 | * Get the base of this URL. For example if we are given: | |
253 | * | |
254 | * http://www.foo.com/directory/index.html | |
255 | * | |
256 | * we will return | |
257 | * | |
258 | * http://www.foo.com/directory | |
259 | * | |
260 | * | |
261 | * | |
262 | */ | |
263 | public static String getBase( String resource ) { | |
264 | ||
265 | //FIXME: Brad says this method is totally broken. | |
266 | 0 | if ( resource == null ) |
267 | 0 | return null; |
268 | ||
269 | 0 | int begin = "http://".length() + 1; |
270 | ||
271 | 0 | int end = resource.lastIndexOf( "/" ); |
272 | ||
273 | 0 | if ( end == -1 || end <= begin ) { |
274 | //probaby a URL like http://www.cnn.com | |
275 | ||
276 | 0 | end = resource.length(); |
277 | ||
278 | } | |
279 | 0 | return resource.substring( 0, end ); |
280 | ||
281 | } | |
282 | ||
283 | public static void main( String[] args ) throws Exception { | |
284 | ||
285 | 0 | System.out.println( expand( "http://peerfear.org/foo/bar/", "../../blog" ) ); |
286 | ||
287 | 0 | System.out.println( expand( "http://peerfear.org/foo/bar/", "../../index.html" ) ); |
288 | ||
289 | 0 | System.out.println( expand( "http://peerfear.org/blog/", ".." ) ); |
290 | ||
291 | 0 | System.out.println( expand( "http://peerfear.org", "/blog" ) ); |
292 | 0 | System.out.println( expand( "http://peerfear.org", "http://peerfear.org" ) ); |
293 | ||
294 | 0 | System.out.println( expand( "http://peerfear.org", "blog" ) ); |
295 | 0 | System.out.println( expand( "http://peerfear.org/blog", "foo/bar" ) ); |
296 | ||
297 | 0 | System.out.println( expand( "file://projects/newsmonster/", "blog" ) ); |
298 | ||
299 | 0 | System.out.println( expand( "file:/projects/ksa/src/java/ksa/test/TestFeedTask_WithRelativePath.rss" |
300 | , "/blog" ) ); | |
301 | 0 | } |
302 | ||
303 | } | |
304 |