1 package org.apache.maven.doxia.linkcheck.validation;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.IOException;
23
24 import java.net.URL;
25 import java.util.Iterator;
26 import java.util.Map;
27
28 import org.apache.commons.httpclient.Credentials;
29 import org.apache.commons.httpclient.Header;
30 import org.apache.commons.httpclient.HostConfiguration;
31 import org.apache.commons.httpclient.HttpClient;
32 import org.apache.commons.httpclient.HttpException;
33 import org.apache.commons.httpclient.HttpMethod;
34 import org.apache.commons.httpclient.HttpState;
35 import org.apache.commons.httpclient.HttpStatus;
36 import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
37 import org.apache.commons.httpclient.NTCredentials;
38 import org.apache.commons.httpclient.StatusLine;
39 import org.apache.commons.httpclient.UsernamePasswordCredentials;
40 import org.apache.commons.httpclient.auth.AuthScope;
41 import org.apache.commons.httpclient.methods.GetMethod;
42 import org.apache.commons.httpclient.methods.HeadMethod;
43 import org.apache.commons.httpclient.params.HttpClientParams;
44 import org.apache.commons.httpclient.params.HttpMethodParams;
45
46 import org.apache.commons.logging.Log;
47 import org.apache.commons.logging.LogFactory;
48 import org.apache.maven.doxia.linkcheck.HttpBean;
49 import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult;
50 import org.codehaus.plexus.util.StringUtils;
51
52
53
54
55
56
57
58
59
60 public final class OnlineHTTPLinkValidator
61 extends HTTPLinkValidator
62 {
63
64 private static final Log LOG = LogFactory.getLog( OnlineHTTPLinkValidator.class );
65
66
67 private static final int MAX_NB_REDIRECT = 10;
68
69
70 private static final String GET_METHOD = "get";
71
72
73 private static final String HEAD_METHOD = "head";
74
75
76 private HttpBean http;
77
78
79 private String baseURL;
80
81
82 private transient HttpClient cl;
83
84
85
86
87 public OnlineHTTPLinkValidator()
88 {
89 this( new HttpBean() );
90 }
91
92
93
94
95
96
97 public OnlineHTTPLinkValidator( HttpBean bean )
98 {
99 if ( bean == null )
100 {
101 bean = new HttpBean();
102 }
103
104 if ( LOG.isDebugEnabled() )
105 {
106 LOG.debug( "Will use method : [" + bean.getMethod() + "]" );
107 }
108
109 this.http = bean;
110
111 initHttpClient();
112 }
113
114
115
116
117
118
119 public String getBaseURL()
120 {
121 return this.baseURL;
122 }
123
124
125
126
127
128
129 public void setBaseURL( String url )
130 {
131 this.baseURL = url;
132 }
133
134
135 public LinkValidationResult validateLink( LinkValidationItem lvi )
136 {
137 if ( this.cl == null )
138 {
139 initHttpClient();
140 }
141
142 if ( this.http.getHttpClientParameters() != null )
143 {
144 for ( Iterator it = this.http.getHttpClientParameters().entrySet().iterator(); it.hasNext(); )
145 {
146 Map.Entry entry = (Map.Entry) it.next();
147
148 if ( entry.getValue() != null )
149 {
150 System.setProperty( entry.getKey().toString(), entry.getValue().toString() );
151 }
152 }
153 }
154
155
156 System.setProperty( HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" );
157 this.cl.getParams().setParameter( HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" );
158
159 String link = lvi.getLink();
160 String anchor = "";
161 int idx = link.indexOf( '#' );
162 if ( idx != -1 )
163 {
164 anchor = link.substring( idx + 1 );
165 link = link.substring( 0, idx );
166 }
167
168 try
169 {
170 if ( link.startsWith( "/" ) )
171 {
172 if ( getBaseURL() == null )
173 {
174 if ( LOG.isWarnEnabled() )
175 {
176 LOG.warn( "Cannot check link [" + link + "] in page [" + lvi.getSource()
177 + "], as no base URL has been set!" );
178 }
179
180 return new LinkValidationResult( LinkcheckFileResult.WARNING_LEVEL, false,
181 "No base URL specified" );
182 }
183
184 link = getBaseURL() + link;
185 }
186
187 HttpMethod hm = null;
188 try
189 {
190 hm = checkLink( link, 0 );
191 }
192 catch ( Throwable t )
193 {
194 if ( LOG.isDebugEnabled() )
195 {
196 LOG.debug( "Received: [" + t + "] for [" + link + "] in page [" + lvi.getSource() + "]", t );
197 }
198
199 return new LinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, t.getClass().getName()
200 + " : " + t.getMessage() );
201 }
202
203 if ( hm == null )
204 {
205 return new LinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false,
206 "Cannot retreive HTTP Status" );
207 }
208
209 if ( hm.getStatusCode() == HttpStatus.SC_OK )
210 {
211
212 if ( anchor.length() > 0 )
213 {
214 String content = hm.getResponseBodyAsString();
215
216 if ( !Anchors.matchesAnchor( content, anchor ) )
217 {
218 return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, false,
219 "Missing anchor '" + anchor + "'" );
220 }
221 }
222 return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, true, hm.getStatusCode(),
223 hm.getStatusText() );
224 }
225
226 String msg =
227 "Received: [" + hm.getStatusCode() + "] for [" + link + "] in page [" + lvi.getSource() + "]";
228
229 if ( hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY
230 || hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY
231 || hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT )
232 {
233 LOG.warn( msg );
234
235 return new HTTPLinkValidationResult( LinkcheckFileResult.WARNING_LEVEL, true, hm.getStatusCode(),
236 hm.getStatusText() );
237 }
238
239 LOG.debug( msg );
240
241 return new HTTPLinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, hm.getStatusCode(),
242 hm.getStatusText() );
243 }
244 catch ( Throwable t )
245 {
246 String msg = "Received: [" + t + "] for [" + link + "] in page [" + lvi.getSource() + "]";
247 if ( LOG.isDebugEnabled() )
248 {
249 LOG.debug( msg, t );
250 }
251 else
252 {
253 LOG.error( msg );
254 }
255
256 return new LinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, t.getMessage() );
257 }
258 finally
259 {
260 System.getProperties().remove( HttpMethodParams.USER_AGENT );
261
262 if ( this.http.getHttpClientParameters() != null )
263 {
264 for ( Iterator it = this.http.getHttpClientParameters().entrySet().iterator(); it.hasNext(); )
265 {
266 Map.Entry entry = (Map.Entry) it.next();
267
268 if ( entry.getValue() != null )
269 {
270 System.getProperties().remove( entry.getKey().toString() );
271 }
272 }
273 }
274 }
275 }
276
277
278 private void initHttpClient()
279 {
280 LOG.debug( "A new HttpClient instance is needed ..." );
281
282 this.cl = new HttpClient( new MultiThreadedHttpConnectionManager() );
283
284
285 if ( this.http.getTimeout() != 0 )
286 {
287 this.cl.getHttpConnectionManager().getParams().setConnectionTimeout( this.http.getTimeout() );
288 this.cl.getHttpConnectionManager().getParams().setSoTimeout( this.http.getTimeout() );
289 }
290 this.cl.getParams().setBooleanParameter( HttpClientParams.ALLOW_CIRCULAR_REDIRECTS, true );
291
292 HostConfiguration hc = new HostConfiguration();
293
294 HttpState state = new HttpState();
295 if ( StringUtils.isNotEmpty( this.http.getProxyHost() ) )
296 {
297 hc.setProxy( this.http.getProxyHost(), this.http.getProxyPort() );
298
299 if ( LOG.isDebugEnabled() )
300 {
301 LOG.debug( "Proxy Host:" + this.http.getProxyHost() );
302 LOG.debug( "Proxy Port:" + this.http.getProxyPort() );
303 }
304
305 if ( StringUtils.isNotEmpty( this.http.getProxyUser() ) && this.http.getProxyPassword() != null )
306 {
307 if ( LOG.isDebugEnabled() )
308 {
309 LOG.debug( "Proxy User:" + this.http.getProxyUser() );
310 }
311
312 Credentials credentials;
313 if ( StringUtils.isNotEmpty( this.http.getProxyNtlmHost() ) )
314 {
315 credentials =
316 new NTCredentials( this.http.getProxyUser(), this.http.getProxyPassword(),
317 this.http.getProxyNtlmHost(), this.http.getProxyNtlmDomain() );
318 }
319 else
320 {
321 credentials =
322 new UsernamePasswordCredentials( this.http.getProxyUser(), this.http.getProxyPassword() );
323 }
324
325 state.setProxyCredentials( AuthScope.ANY, credentials );
326 }
327 }
328 else
329 {
330 LOG.debug( "Not using a proxy" );
331 }
332
333 this.cl.setHostConfiguration( hc );
334 this.cl.setState( state );
335
336 LOG.debug( "New HttpClient instance created." );
337 }
338
339
340
341
342
343
344
345
346
347 private HttpMethod checkLink( String link, int nbRedirect )
348 throws IOException
349 {
350 int max = MAX_NB_REDIRECT;
351 if ( this.http.getHttpClientParameters() != null
352 && this.http.getHttpClientParameters().get( HttpClientParams.MAX_REDIRECTS ) != null )
353 {
354 try
355 {
356 max =
357 Integer.valueOf(
358 this.http.getHttpClientParameters().get( HttpClientParams.MAX_REDIRECTS )
359 .toString() ).intValue();
360 }
361 catch ( NumberFormatException e )
362 {
363 if ( LOG.isWarnEnabled() )
364 {
365 LOG.warn( "HttpClient parameter '" + HttpClientParams.MAX_REDIRECTS
366 + "' is not a number. Ignoring!" );
367 }
368 }
369 }
370 if ( nbRedirect > max )
371 {
372 throw new HttpException( "Maximum number of redirections (" + max + ") exceeded" );
373 }
374
375 HttpMethod hm;
376 if ( HEAD_METHOD.equalsIgnoreCase( this.http.getMethod() ) )
377 {
378 hm = new HeadMethod( link );
379 }
380 else if ( GET_METHOD.equalsIgnoreCase( this.http.getMethod() ) )
381 {
382 hm = new GetMethod( link );
383 }
384 else
385 {
386 if ( LOG.isErrorEnabled() )
387 {
388 LOG.error( "Unsupported method: " + this.http.getMethod() + ", using 'get'." );
389 }
390 hm = new GetMethod( link );
391 }
392
393
394 hm.setFollowRedirects( this.http.isFollowRedirects() );
395
396 try
397 {
398 URL url = new URL( link );
399
400 cl.getHostConfiguration().setHost( url.getHost(), url.getPort(), url.getProtocol() );
401
402 cl.executeMethod( hm );
403
404 StatusLine sl = hm.getStatusLine();
405 if ( sl == null )
406 {
407 if ( LOG.isErrorEnabled() )
408 {
409 LOG.error( "Unknown error validating link : " + link );
410 }
411
412 return null;
413 }
414
415 if ( hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY
416 || hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY
417 || hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT )
418 {
419 Header locationHeader = hm.getResponseHeader( "location" );
420
421 if ( locationHeader == null )
422 {
423 LOG.error( "Site sent redirect, but did not set Location header" );
424
425 return hm;
426 }
427
428 String newLink = locationHeader.getValue();
429
430
431 if ( !newLink.startsWith( "http://" ) && !newLink.startsWith( "https://" ) )
432 {
433 if ( newLink.startsWith( "/" ) )
434 {
435 URL oldUrl = new URL( link );
436
437 newLink =
438 oldUrl.getProtocol() + "://" + oldUrl.getHost()
439 + ( oldUrl.getPort() > 0 ? ":" + oldUrl.getPort() : "" ) + newLink;
440 }
441 else
442 {
443 newLink = link + newLink;
444 }
445 }
446
447 HttpMethod oldHm = hm;
448
449 if ( LOG.isDebugEnabled() )
450 {
451 LOG.debug( "[" + link + "] is redirected to [" + newLink + "]" );
452 }
453
454 oldHm.releaseConnection();
455
456 hm = checkLink( newLink, nbRedirect + 1 );
457
458
459
460 if ( hm.getStatusCode() == HttpStatus.SC_OK && nbRedirect == 0 )
461 {
462 return oldHm;
463 }
464 }
465
466 }
467 finally
468 {
469 hm.releaseConnection();
470 }
471
472 return hm;
473 }
474 }