1 | |
package org.apache.maven.doxia.linkcheck.validation; |
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
import java.io.IOException; |
23 | |
|
24 | |
import java.net.URL; |
25 | |
import java.util.Iterator; |
26 | |
import java.util.Map; |
27 | |
|
28 | |
import org.apache.commons.httpclient.Credentials; |
29 | |
import org.apache.commons.httpclient.Header; |
30 | |
import org.apache.commons.httpclient.HostConfiguration; |
31 | |
import org.apache.commons.httpclient.HttpClient; |
32 | |
import org.apache.commons.httpclient.HttpException; |
33 | |
import org.apache.commons.httpclient.HttpMethod; |
34 | |
import org.apache.commons.httpclient.HttpState; |
35 | |
import org.apache.commons.httpclient.HttpStatus; |
36 | |
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; |
37 | |
import org.apache.commons.httpclient.NTCredentials; |
38 | |
import org.apache.commons.httpclient.StatusLine; |
39 | |
import org.apache.commons.httpclient.UsernamePasswordCredentials; |
40 | |
import org.apache.commons.httpclient.auth.AuthScope; |
41 | |
import org.apache.commons.httpclient.methods.GetMethod; |
42 | |
import org.apache.commons.httpclient.methods.HeadMethod; |
43 | |
import org.apache.commons.httpclient.params.HttpClientParams; |
44 | |
import org.apache.commons.httpclient.params.HttpMethodParams; |
45 | |
|
46 | |
import org.apache.commons.logging.Log; |
47 | |
import org.apache.commons.logging.LogFactory; |
48 | |
import org.apache.maven.doxia.linkcheck.HttpBean; |
49 | |
import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult; |
50 | |
import org.codehaus.plexus.util.StringUtils; |
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
|
58 | |
|
59 | |
|
60 | |
public final class OnlineHTTPLinkValidator |
61 | |
extends HTTPLinkValidator |
62 | |
{ |
63 | |
|
64 | 4 | private static final Log LOG = LogFactory.getLog( OnlineHTTPLinkValidator.class ); |
65 | |
|
66 | |
|
67 | |
private static final int MAX_NB_REDIRECT = 10; |
68 | |
|
69 | |
|
70 | |
private static final String GET_METHOD = "get"; |
71 | |
|
72 | |
|
73 | |
private static final String HEAD_METHOD = "head"; |
74 | |
|
75 | |
|
76 | |
private HttpBean http; |
77 | |
|
78 | |
|
79 | |
private String baseURL; |
80 | |
|
81 | |
|
82 | |
private transient HttpClient cl; |
83 | |
|
84 | |
|
85 | |
|
86 | |
|
87 | |
public OnlineHTTPLinkValidator() |
88 | |
{ |
89 | 2 | this( new HttpBean() ); |
90 | 2 | } |
91 | |
|
92 | |
|
93 | |
|
94 | |
|
95 | |
|
96 | |
|
97 | |
public OnlineHTTPLinkValidator( HttpBean bean ) |
98 | 6 | { |
99 | 6 | if ( bean == null ) |
100 | |
{ |
101 | 4 | bean = new HttpBean(); |
102 | |
} |
103 | |
|
104 | 6 | if ( LOG.isDebugEnabled() ) |
105 | |
{ |
106 | 0 | LOG.debug( "Will use method : [" + bean.getMethod() + "]" ); |
107 | |
} |
108 | |
|
109 | 6 | this.http = bean; |
110 | |
|
111 | 6 | initHttpClient(); |
112 | 6 | } |
113 | |
|
114 | |
|
115 | |
|
116 | |
|
117 | |
|
118 | |
|
119 | |
public String getBaseURL() |
120 | |
{ |
121 | 0 | return this.baseURL; |
122 | |
} |
123 | |
|
124 | |
|
125 | |
|
126 | |
|
127 | |
|
128 | |
|
129 | |
public void setBaseURL( String url ) |
130 | |
{ |
131 | 0 | this.baseURL = url; |
132 | 0 | } |
133 | |
|
134 | |
|
135 | |
public LinkValidationResult validateLink( LinkValidationItem lvi ) |
136 | |
{ |
137 | 4 | if ( this.cl == null ) |
138 | |
{ |
139 | 0 | initHttpClient(); |
140 | |
} |
141 | |
|
142 | 4 | if ( this.http.getHttpClientParameters() != null ) |
143 | |
{ |
144 | 0 | for ( Iterator it = this.http.getHttpClientParameters().entrySet().iterator(); it.hasNext(); ) |
145 | |
{ |
146 | 0 | Map.Entry entry = (Map.Entry) it.next(); |
147 | |
|
148 | 0 | if ( entry.getValue() != null ) |
149 | |
{ |
150 | 0 | System.setProperty( entry.getKey().toString(), entry.getValue().toString() ); |
151 | |
} |
152 | 0 | } |
153 | |
} |
154 | |
|
155 | |
|
156 | 4 | System.setProperty( HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" ); |
157 | 4 | this.cl.getParams().setParameter( HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" ); |
158 | |
|
159 | 4 | String link = lvi.getLink(); |
160 | 4 | String anchor = ""; |
161 | 4 | int idx = link.indexOf( '#' ); |
162 | 4 | if ( idx != -1 ) |
163 | |
{ |
164 | 0 | anchor = link.substring( idx + 1 ); |
165 | 0 | link = link.substring( 0, idx ); |
166 | |
} |
167 | |
|
168 | |
try |
169 | |
{ |
170 | 4 | if ( link.startsWith( "/" ) ) |
171 | |
{ |
172 | 0 | if ( getBaseURL() == null ) |
173 | |
{ |
174 | 0 | if ( LOG.isWarnEnabled() ) |
175 | |
{ |
176 | 0 | LOG.warn( "Cannot check link [" + link + "] in page [" + lvi.getSource() |
177 | |
+ "], as no base URL has been set!" ); |
178 | |
} |
179 | |
|
180 | 0 | return new LinkValidationResult( LinkcheckFileResult.WARNING_LEVEL, false, |
181 | |
"No base URL specified" ); |
182 | |
} |
183 | |
|
184 | 0 | link = getBaseURL() + link; |
185 | |
} |
186 | |
|
187 | 4 | HttpMethod hm = null; |
188 | |
try |
189 | |
{ |
190 | 4 | hm = checkLink( link, 0 ); |
191 | |
} |
192 | 2 | catch ( Throwable t ) |
193 | |
{ |
194 | 2 | if ( LOG.isDebugEnabled() ) |
195 | |
{ |
196 | 0 | LOG.debug( "Received: [" + t + "] for [" + link + "] in page [" + lvi.getSource() + "]", t ); |
197 | |
} |
198 | |
|
199 | 2 | return new LinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, t.getClass().getName() |
200 | |
+ " : " + t.getMessage() ); |
201 | 2 | } |
202 | |
|
203 | 2 | if ( hm == null ) |
204 | |
{ |
205 | 0 | return new LinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, |
206 | |
"Cannot retreive HTTP Status" ); |
207 | |
} |
208 | |
|
209 | 2 | if ( hm.getStatusCode() == HttpStatus.SC_OK ) |
210 | |
{ |
211 | |
|
212 | 2 | if ( anchor.length() > 0 ) |
213 | |
{ |
214 | 0 | String content = hm.getResponseBodyAsString(); |
215 | |
|
216 | 0 | if ( !Anchors.matchesAnchor( content, anchor ) ) |
217 | |
{ |
218 | 0 | return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, false, |
219 | |
"Missing anchor '" + anchor + "'" ); |
220 | |
} |
221 | |
} |
222 | 2 | return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, true, hm.getStatusCode(), |
223 | |
hm.getStatusText() ); |
224 | |
} |
225 | |
|
226 | 0 | String msg = |
227 | |
"Received: [" + hm.getStatusCode() + "] for [" + link + "] in page [" + lvi.getSource() + "]"; |
228 | |
|
229 | 0 | if ( hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY |
230 | |
|| hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY |
231 | |
|| hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT ) |
232 | |
{ |
233 | 0 | LOG.warn( msg ); |
234 | |
|
235 | 0 | return new HTTPLinkValidationResult( LinkcheckFileResult.WARNING_LEVEL, true, hm.getStatusCode(), |
236 | |
hm.getStatusText() ); |
237 | |
} |
238 | |
|
239 | 0 | LOG.debug( msg ); |
240 | |
|
241 | 0 | return new HTTPLinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, hm.getStatusCode(), |
242 | |
hm.getStatusText() ); |
243 | |
} |
244 | 0 | catch ( Throwable t ) |
245 | |
{ |
246 | 0 | String msg = "Received: [" + t + "] for [" + link + "] in page [" + lvi.getSource() + "]"; |
247 | 0 | if ( LOG.isDebugEnabled() ) |
248 | |
{ |
249 | 0 | LOG.debug( msg, t ); |
250 | |
} |
251 | |
else |
252 | |
{ |
253 | 0 | LOG.error( msg ); |
254 | |
} |
255 | |
|
256 | 0 | return new LinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, t.getMessage() ); |
257 | |
} |
258 | |
finally |
259 | |
{ |
260 | 4 | System.getProperties().remove( HttpMethodParams.USER_AGENT ); |
261 | |
|
262 | 4 | if ( this.http.getHttpClientParameters() != null ) |
263 | |
{ |
264 | 0 | for ( Iterator it = this.http.getHttpClientParameters().entrySet().iterator(); it.hasNext(); ) |
265 | |
{ |
266 | 0 | Map.Entry entry = (Map.Entry) it.next(); |
267 | |
|
268 | 0 | if ( entry.getValue() != null ) |
269 | |
{ |
270 | 0 | System.getProperties().remove( entry.getKey().toString() ); |
271 | |
} |
272 | 0 | } |
273 | |
} |
274 | |
} |
275 | |
} |
276 | |
|
277 | |
|
278 | |
private void initHttpClient() |
279 | |
{ |
280 | 6 | LOG.debug( "A new HttpClient instance is needed ..." ); |
281 | |
|
282 | 6 | this.cl = new HttpClient( new MultiThreadedHttpConnectionManager() ); |
283 | |
|
284 | |
|
285 | 6 | if ( this.http.getTimeout() != 0 ) |
286 | |
{ |
287 | 6 | this.cl.getHttpConnectionManager().getParams().setConnectionTimeout( this.http.getTimeout() ); |
288 | 6 | this.cl.getHttpConnectionManager().getParams().setSoTimeout( this.http.getTimeout() ); |
289 | |
} |
290 | 6 | this.cl.getParams().setBooleanParameter( HttpClientParams.ALLOW_CIRCULAR_REDIRECTS, true ); |
291 | |
|
292 | 6 | HostConfiguration hc = new HostConfiguration(); |
293 | |
|
294 | 6 | HttpState state = new HttpState(); |
295 | 6 | if ( StringUtils.isNotEmpty( this.http.getProxyHost() ) ) |
296 | |
{ |
297 | 0 | hc.setProxy( this.http.getProxyHost(), this.http.getProxyPort() ); |
298 | |
|
299 | 0 | if ( LOG.isDebugEnabled() ) |
300 | |
{ |
301 | 0 | LOG.debug( "Proxy Host:" + this.http.getProxyHost() ); |
302 | 0 | LOG.debug( "Proxy Port:" + this.http.getProxyPort() ); |
303 | |
} |
304 | |
|
305 | 0 | if ( StringUtils.isNotEmpty( this.http.getProxyUser() ) && this.http.getProxyPassword() != null ) |
306 | |
{ |
307 | 0 | if ( LOG.isDebugEnabled() ) |
308 | |
{ |
309 | 0 | LOG.debug( "Proxy User:" + this.http.getProxyUser() ); |
310 | |
} |
311 | |
|
312 | |
Credentials credentials; |
313 | 0 | if ( StringUtils.isNotEmpty( this.http.getProxyNtlmHost() ) ) |
314 | |
{ |
315 | 0 | credentials = |
316 | |
new NTCredentials( this.http.getProxyUser(), this.http.getProxyPassword(), |
317 | |
this.http.getProxyNtlmHost(), this.http.getProxyNtlmDomain() ); |
318 | |
} |
319 | |
else |
320 | |
{ |
321 | 0 | credentials = |
322 | |
new UsernamePasswordCredentials( this.http.getProxyUser(), this.http.getProxyPassword() ); |
323 | |
} |
324 | |
|
325 | 0 | state.setProxyCredentials( AuthScope.ANY, credentials ); |
326 | 0 | } |
327 | |
} |
328 | |
else |
329 | |
{ |
330 | 6 | LOG.debug( "Not using a proxy" ); |
331 | |
} |
332 | |
|
333 | 6 | this.cl.setHostConfiguration( hc ); |
334 | 6 | this.cl.setState( state ); |
335 | |
|
336 | 6 | LOG.debug( "New HttpClient instance created." ); |
337 | 6 | } |
338 | |
|
339 | |
|
340 | |
|
341 | |
|
342 | |
|
343 | |
|
344 | |
|
345 | |
|
346 | |
|
347 | |
private HttpMethod checkLink( String link, int nbRedirect ) |
348 | |
throws IOException |
349 | |
{ |
350 | 4 | int max = MAX_NB_REDIRECT; |
351 | 4 | if ( this.http.getHttpClientParameters() != null |
352 | |
&& this.http.getHttpClientParameters().get( HttpClientParams.MAX_REDIRECTS ) != null ) |
353 | |
{ |
354 | |
try |
355 | |
{ |
356 | 0 | max = |
357 | |
Integer.valueOf( |
358 | |
this.http.getHttpClientParameters().get( HttpClientParams.MAX_REDIRECTS ) |
359 | |
.toString() ).intValue(); |
360 | |
} |
361 | 0 | catch ( NumberFormatException e ) |
362 | |
{ |
363 | 0 | if ( LOG.isWarnEnabled() ) |
364 | |
{ |
365 | 0 | LOG.warn( "HttpClient parameter '" + HttpClientParams.MAX_REDIRECTS |
366 | |
+ "' is not a number. Ignoring!" ); |
367 | |
} |
368 | 0 | } |
369 | |
} |
370 | 4 | if ( nbRedirect > max ) |
371 | |
{ |
372 | 0 | throw new HttpException( "Maximum number of redirections (" + max + ") exceeded" ); |
373 | |
} |
374 | |
|
375 | |
HttpMethod hm; |
376 | 4 | if ( HEAD_METHOD.equalsIgnoreCase( this.http.getMethod() ) ) |
377 | |
{ |
378 | 4 | hm = new HeadMethod( link ); |
379 | |
} |
380 | 0 | else if ( GET_METHOD.equalsIgnoreCase( this.http.getMethod() ) ) |
381 | |
{ |
382 | 0 | hm = new GetMethod( link ); |
383 | |
} |
384 | |
else |
385 | |
{ |
386 | 0 | if ( LOG.isErrorEnabled() ) |
387 | |
{ |
388 | 0 | LOG.error( "Unsupported method: " + this.http.getMethod() + ", using 'get'." ); |
389 | |
} |
390 | 0 | hm = new GetMethod( link ); |
391 | |
} |
392 | |
|
393 | |
|
394 | 2 | hm.setFollowRedirects( this.http.isFollowRedirects() ); |
395 | |
|
396 | |
try |
397 | |
{ |
398 | 2 | URL url = new URL( link ); |
399 | |
|
400 | 2 | cl.getHostConfiguration().setHost( url.getHost(), url.getPort(), url.getProtocol() ); |
401 | |
|
402 | 2 | cl.executeMethod( hm ); |
403 | |
|
404 | 2 | StatusLine sl = hm.getStatusLine(); |
405 | 2 | if ( sl == null ) |
406 | |
{ |
407 | 0 | if ( LOG.isErrorEnabled() ) |
408 | |
{ |
409 | 0 | LOG.error( "Unknown error validating link : " + link ); |
410 | |
} |
411 | |
|
412 | 0 | return null; |
413 | |
} |
414 | |
|
415 | 2 | if ( hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY |
416 | |
|| hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY |
417 | |
|| hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT ) |
418 | |
{ |
419 | 0 | Header locationHeader = hm.getResponseHeader( "location" ); |
420 | |
|
421 | 0 | if ( locationHeader == null ) |
422 | |
{ |
423 | 0 | LOG.error( "Site sent redirect, but did not set Location header" ); |
424 | |
|
425 | 0 | return hm; |
426 | |
} |
427 | |
|
428 | 0 | String newLink = locationHeader.getValue(); |
429 | |
|
430 | |
|
431 | 0 | if ( !newLink.startsWith( "http://" ) && !newLink.startsWith( "https://" ) ) |
432 | |
{ |
433 | 0 | if ( newLink.startsWith( "/" ) ) |
434 | |
{ |
435 | 0 | URL oldUrl = new URL( link ); |
436 | |
|
437 | 0 | newLink = |
438 | |
oldUrl.getProtocol() + "://" + oldUrl.getHost() |
439 | |
+ ( oldUrl.getPort() > 0 ? ":" + oldUrl.getPort() : "" ) + newLink; |
440 | 0 | } |
441 | |
else |
442 | |
{ |
443 | 0 | newLink = link + newLink; |
444 | |
} |
445 | |
} |
446 | |
|
447 | 0 | HttpMethod oldHm = hm; |
448 | |
|
449 | 0 | if ( LOG.isDebugEnabled() ) |
450 | |
{ |
451 | 0 | LOG.debug( "[" + link + "] is redirected to [" + newLink + "]" ); |
452 | |
} |
453 | |
|
454 | 0 | oldHm.releaseConnection(); |
455 | |
|
456 | 0 | hm = checkLink( newLink, nbRedirect + 1 ); |
457 | |
|
458 | |
|
459 | |
|
460 | 0 | if ( hm.getStatusCode() == HttpStatus.SC_OK && nbRedirect == 0 ) |
461 | |
{ |
462 | 0 | return oldHm; |
463 | |
} |
464 | |
} |
465 | |
|
466 | |
} |
467 | |
finally |
468 | |
{ |
469 | 0 | hm.releaseConnection(); |
470 | 2 | } |
471 | |
|
472 | 2 | return hm; |
473 | |
} |
474 | |
} |