1 | |
package org.apache.maven.doxia.linkcheck; |
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
import java.io.File; |
23 | |
import java.io.IOException; |
24 | |
import java.io.Writer; |
25 | |
import java.nio.charset.Charset; |
26 | |
import java.nio.charset.UnsupportedCharsetException; |
27 | |
import java.util.Arrays; |
28 | |
import java.util.Iterator; |
29 | |
import java.util.LinkedList; |
30 | |
import java.util.Set; |
31 | |
|
32 | |
import org.apache.commons.logging.Log; |
33 | |
import org.apache.commons.logging.LogFactory; |
34 | |
|
35 | |
import org.apache.maven.doxia.linkcheck.model.LinkcheckFile; |
36 | |
import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult; |
37 | |
import org.apache.maven.doxia.linkcheck.model.LinkcheckModel; |
38 | |
import org.apache.maven.doxia.linkcheck.model.io.xpp3.LinkcheckModelXpp3Writer; |
39 | |
import org.apache.maven.doxia.linkcheck.validation.FileLinkValidator; |
40 | |
import org.apache.maven.doxia.linkcheck.validation.HTTPLinkValidationResult; |
41 | |
import org.apache.maven.doxia.linkcheck.validation.LinkValidationItem; |
42 | |
import org.apache.maven.doxia.linkcheck.validation.LinkValidationResult; |
43 | |
import org.apache.maven.doxia.linkcheck.validation.LinkValidatorManager; |
44 | |
import org.apache.maven.doxia.linkcheck.validation.MailtoLinkValidator; |
45 | |
import org.apache.maven.doxia.linkcheck.validation.OfflineHTTPLinkValidator; |
46 | |
import org.apache.maven.doxia.linkcheck.validation.OnlineHTTPLinkValidator; |
47 | |
|
48 | |
import org.codehaus.plexus.util.FileUtils; |
49 | |
import org.codehaus.plexus.util.IOUtil; |
50 | |
import org.codehaus.plexus.util.ReaderFactory; |
51 | |
import org.codehaus.plexus.util.StringUtils; |
52 | |
import org.codehaus.plexus.util.WriterFactory; |
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
|
58 | |
|
59 | |
|
60 | |
|
61 | |
|
62 | |
|
63 | |
|
64 | |
|
65 | 4 | public final class DefaultLinkCheck |
66 | |
implements LinkCheck |
67 | |
{ |
68 | |
|
69 | 4 | private static final Log LOG = LogFactory.getLog( DefaultLinkCheck.class ); |
70 | |
|
71 | |
|
72 | |
private static final long MEG = 1024 * 1024; |
73 | |
|
74 | |
|
75 | |
private File basedir; |
76 | |
|
77 | |
|
78 | |
private File linkCheckCache; |
79 | |
|
80 | |
|
81 | |
|
82 | |
|
83 | |
|
84 | 4 | private String[] excludedLinks = null; |
85 | |
|
86 | |
|
87 | 4 | private String[] excludedPages = null; |
88 | |
|
89 | |
|
90 | |
|
91 | |
|
92 | |
|
93 | |
|
94 | 4 | private int[] excludedHttpStatusErrors = null; |
95 | |
|
96 | |
|
97 | |
|
98 | |
|
99 | |
|
100 | |
|
101 | 4 | private int[] excludedHttpStatusWarnings = null; |
102 | |
|
103 | |
|
104 | |
private boolean online; |
105 | |
|
106 | |
|
107 | |
private HttpBean http; |
108 | |
|
109 | |
|
110 | 4 | private LinkValidatorManager lvm = null; |
111 | |
|
112 | |
|
113 | |
private File reportOutput; |
114 | |
|
115 | |
|
116 | 4 | private String reportOutputEncoding = "UTF-8"; |
117 | |
|
118 | |
|
119 | |
private String baseURL; |
120 | |
|
121 | |
|
122 | 4 | private String encoding = ReaderFactory.UTF_8; |
123 | |
|
124 | |
|
125 | |
|
126 | |
|
127 | |
|
128 | |
|
129 | |
public void setBasedir( File base ) |
130 | |
{ |
131 | 4 | this.basedir = base; |
132 | 4 | } |
133 | |
|
134 | |
|
135 | |
public void setBaseURL( String url ) |
136 | |
{ |
137 | 0 | this.baseURL = url; |
138 | 0 | } |
139 | |
|
140 | |
|
141 | |
public void setExcludedHttpStatusErrors( int[] excl ) |
142 | |
{ |
143 | 0 | this.excludedHttpStatusErrors = excl; |
144 | 0 | } |
145 | |
|
146 | |
|
147 | |
public void setExcludedHttpStatusWarnings( int[] excl ) |
148 | |
{ |
149 | 0 | this.excludedHttpStatusWarnings = excl; |
150 | 0 | } |
151 | |
|
152 | |
|
153 | |
public void setExcludedLinks( String[] excl ) |
154 | |
{ |
155 | 4 | this.excludedLinks = excl; |
156 | 4 | } |
157 | |
|
158 | |
|
159 | |
public void setExcludedPages( String[] excl ) |
160 | |
{ |
161 | 0 | this.excludedPages = excl; |
162 | 0 | } |
163 | |
|
164 | |
|
165 | |
public void setHttp( HttpBean http ) |
166 | |
{ |
167 | 0 | this.http = http; |
168 | 0 | } |
169 | |
|
170 | |
|
171 | |
public void setLinkCheckCache( File cacheFile ) |
172 | |
{ |
173 | 4 | this.linkCheckCache = cacheFile; |
174 | 4 | } |
175 | |
|
176 | |
|
177 | |
public void setOnline( boolean onLine ) |
178 | |
{ |
179 | 4 | this.online = onLine; |
180 | 4 | } |
181 | |
|
182 | |
|
183 | |
public void setReportOutput( File file ) |
184 | |
{ |
185 | 4 | this.reportOutput = file; |
186 | 4 | } |
187 | |
|
188 | |
|
189 | |
public void setReportOutputEncoding( String encoding ) |
190 | |
{ |
191 | 4 | this.reportOutputEncoding = encoding; |
192 | 4 | } |
193 | |
|
194 | |
|
195 | |
public LinkcheckModel execute() |
196 | |
throws LinkCheckException |
197 | |
{ |
198 | 4 | if ( this.basedir == null ) |
199 | |
{ |
200 | 0 | LOG.error( "No base directory specified!" ); |
201 | |
|
202 | 0 | throw new NullPointerException( "The basedir can't be null!" ); |
203 | |
} |
204 | |
|
205 | 4 | if ( this.reportOutput == null ) |
206 | |
{ |
207 | 0 | LOG.warn( "No output file specified! Results will not be written!" ); |
208 | |
} |
209 | |
|
210 | 4 | LinkcheckModel model = new LinkcheckModel(); |
211 | 4 | model.setModelEncoding( reportOutputEncoding ); |
212 | 4 | model.setFiles( new LinkedList() ); |
213 | |
|
214 | 4 | displayMemoryConsumption(); |
215 | |
|
216 | 4 | LinkValidatorManager validator = getLinkValidatorManager(); |
217 | |
try |
218 | |
{ |
219 | 4 | validator.loadCache( this.linkCheckCache ); |
220 | |
} |
221 | 0 | catch ( IOException e ) |
222 | |
{ |
223 | 0 | throw new LinkCheckException( "Could not load cache: " + e.getMessage(), e ); |
224 | 4 | } |
225 | |
|
226 | 4 | displayMemoryConsumption(); |
227 | |
|
228 | 4 | LOG.info( "Begin to check links in files..." ); |
229 | |
|
230 | |
try |
231 | |
{ |
232 | 4 | findAndCheckFiles( this.basedir, model ); |
233 | |
} |
234 | 0 | catch ( IOException e ) |
235 | |
{ |
236 | 0 | throw new LinkCheckException( "Could not scan base directory: " + basedir.getAbsolutePath(), e ); |
237 | 4 | } |
238 | |
|
239 | 4 | LOG.info( "Links checked." ); |
240 | |
|
241 | 4 | displayMemoryConsumption(); |
242 | |
|
243 | |
try |
244 | |
{ |
245 | 4 | createDocument( model ); |
246 | |
} |
247 | 0 | catch ( IOException e ) |
248 | |
{ |
249 | 0 | throw new LinkCheckException( "Could not write the linkcheck document: " + e.getMessage(), e ); |
250 | 4 | } |
251 | |
|
252 | |
try |
253 | |
{ |
254 | 4 | validator.saveCache( this.linkCheckCache ); |
255 | |
} |
256 | 0 | catch ( IOException e ) |
257 | |
{ |
258 | 0 | throw new LinkCheckException( "Could not save cache: " + e.getMessage(), e ); |
259 | 4 | } |
260 | |
|
261 | 4 | displayMemoryConsumption(); |
262 | |
|
263 | 4 | return model; |
264 | |
} |
265 | |
|
266 | |
|
267 | |
public void setEncoding( String encoding ) |
268 | |
{ |
269 | 0 | if ( StringUtils.isEmpty( encoding ) ) |
270 | |
{ |
271 | 0 | throw new IllegalArgumentException( "encoding is required" ); |
272 | |
} |
273 | |
try |
274 | |
{ |
275 | 0 | Charset.forName( encoding ); |
276 | |
} |
277 | 0 | catch ( UnsupportedCharsetException e ) |
278 | |
{ |
279 | 0 | throw new IllegalArgumentException( "encoding '" + encoding + "' is unsupported" ); |
280 | 0 | } |
281 | |
|
282 | 0 | this.encoding = encoding; |
283 | 0 | } |
284 | |
|
285 | |
|
286 | |
|
287 | |
|
288 | |
|
289 | |
|
290 | |
|
291 | |
|
292 | |
|
293 | |
|
294 | |
private boolean isOnline() |
295 | |
{ |
296 | 4 | return this.online; |
297 | |
} |
298 | |
|
299 | |
|
300 | |
|
301 | |
|
302 | |
|
303 | |
|
304 | |
|
305 | |
|
306 | |
private String[] getExcludedLinks() |
307 | |
{ |
308 | 8 | return this.excludedLinks; |
309 | |
} |
310 | |
|
311 | |
|
312 | |
|
313 | |
|
314 | |
|
315 | |
|
316 | |
private String getExcludedPages() |
317 | |
{ |
318 | 4 | LinkedList patternList = new LinkedList( FileUtils.getDefaultExcludesAsList() ); |
319 | |
|
320 | 4 | if ( excludedPages != null ) |
321 | |
{ |
322 | 0 | patternList.addAll( Arrays.asList( excludedPages ) ); |
323 | |
} |
324 | |
|
325 | 4 | return StringUtils.join( patternList.iterator(), "," ); |
326 | |
} |
327 | |
|
328 | |
|
329 | |
|
330 | |
|
331 | |
|
332 | |
|
333 | |
private String getIncludedPages() |
334 | |
{ |
335 | 4 | return "**/*.html,**/*.htm"; |
336 | |
} |
337 | |
|
338 | |
|
339 | |
|
340 | |
|
341 | |
|
342 | |
|
343 | |
|
344 | |
private int[] getExcludedHttpStatusErrors() |
345 | |
{ |
346 | 0 | return this.excludedHttpStatusErrors; |
347 | |
} |
348 | |
|
349 | |
|
350 | |
|
351 | |
|
352 | |
|
353 | |
|
354 | |
|
355 | |
private int[] getExcludedHttpStatusWarnings() |
356 | |
{ |
357 | 0 | return this.excludedHttpStatusWarnings; |
358 | |
} |
359 | |
|
360 | |
|
361 | |
|
362 | |
|
363 | |
|
364 | |
|
365 | |
|
366 | |
|
367 | |
private LinkValidatorManager getLinkValidatorManager() |
368 | |
{ |
369 | 4 | if ( this.lvm == null ) |
370 | |
{ |
371 | 4 | initDefaultLinkValidatorManager(); |
372 | |
} |
373 | |
|
374 | 4 | return this.lvm; |
375 | |
} |
376 | |
|
377 | |
|
378 | |
|
379 | |
|
380 | |
private void initDefaultLinkValidatorManager() |
381 | |
{ |
382 | 4 | this.lvm = new LinkValidatorManager(); |
383 | |
|
384 | 4 | if ( getExcludedLinks() != null ) |
385 | |
{ |
386 | 4 | this.lvm.setExcludedLinks( getExcludedLinks() ); |
387 | |
} |
388 | |
|
389 | 4 | this.lvm.addLinkValidator( new FileLinkValidator( encoding ) ); |
390 | |
|
391 | 4 | if ( isOnline() ) |
392 | |
{ |
393 | 4 | OnlineHTTPLinkValidator olv = new OnlineHTTPLinkValidator( http ); |
394 | |
|
395 | 4 | if ( this.baseURL != null ) |
396 | |
{ |
397 | 0 | olv.setBaseURL( baseURL ); |
398 | |
} |
399 | |
|
400 | 4 | this.lvm.addLinkValidator( olv ); |
401 | 4 | } |
402 | |
else |
403 | |
{ |
404 | 0 | this.lvm.addLinkValidator( new OfflineHTTPLinkValidator() ); |
405 | |
} |
406 | |
|
407 | 4 | this.lvm.addLinkValidator( new MailtoLinkValidator() ); |
408 | 4 | } |
409 | |
|
410 | |
|
411 | |
|
412 | |
|
413 | |
|
414 | |
|
415 | |
|
416 | |
private void findAndCheckFiles( File base, LinkcheckModel model ) |
417 | |
throws IOException |
418 | |
{ |
419 | 4 | Iterator files = FileUtils.getFiles( base, getIncludedPages(), getExcludedPages() ).iterator(); |
420 | |
|
421 | 26 | while( files.hasNext() ) |
422 | |
{ |
423 | 22 | checkFile( (File) files.next(), model ); |
424 | |
} |
425 | 4 | } |
426 | |
|
427 | |
private void checkFile( File file, LinkcheckModel model ) |
428 | |
{ |
429 | 22 | if ( LOG.isDebugEnabled() ) |
430 | |
{ |
431 | 0 | LOG.debug( " File - " + file ); |
432 | |
} |
433 | |
|
434 | 22 | String fileRelativePath = file.getAbsolutePath(); |
435 | |
|
436 | 22 | if ( fileRelativePath.startsWith( this.basedir.getAbsolutePath() ) ) |
437 | |
{ |
438 | 22 | fileRelativePath = fileRelativePath.substring( this.basedir.getAbsolutePath().length() + 1 ); |
439 | |
} |
440 | |
|
441 | 22 | fileRelativePath = fileRelativePath.replace( '\\', '/' ); |
442 | |
|
443 | 22 | LinkcheckFile linkcheckFile = new LinkcheckFile(); |
444 | 22 | linkcheckFile.setAbsolutePath( file.getAbsolutePath() ); |
445 | 22 | linkcheckFile.setRelativePath( fileRelativePath ); |
446 | |
|
447 | 22 | check( linkcheckFile ); |
448 | |
|
449 | 22 | model.addFile( linkcheckFile ); |
450 | |
|
451 | 22 | if ( ( model.getFiles().size() % 100 == 0 ) && LOG.isInfoEnabled() ) |
452 | |
{ |
453 | 0 | LOG.info( "Found " + model.getFiles().size() + " files so far." ); |
454 | |
} |
455 | 22 | } |
456 | |
|
457 | |
|
458 | |
|
459 | |
|
460 | |
|
461 | |
|
462 | |
private void check( LinkcheckFile linkcheckFile ) |
463 | |
{ |
464 | 22 | linkcheckFile.setSuccessful( 0 ); |
465 | |
|
466 | 22 | linkcheckFile.setUnsuccessful( 0 ); |
467 | |
|
468 | 22 | if ( LOG.isDebugEnabled() ) |
469 | |
{ |
470 | 0 | LOG.debug( "Validating " + linkcheckFile.getRelativePath() ); |
471 | |
} |
472 | |
|
473 | |
final Set hrefs; |
474 | |
|
475 | |
try |
476 | |
{ |
477 | 22 | hrefs = LinkMatcher.match( new File( linkcheckFile.getAbsolutePath() ), encoding ); |
478 | |
} |
479 | 0 | catch ( Throwable t ) |
480 | |
{ |
481 | |
|
482 | |
|
483 | |
|
484 | 0 | LOG.error( "Received: [" + t + "] in page [" + linkcheckFile.getRelativePath() + "]" ); |
485 | 0 | LOG.debug( t.getMessage(), t ); |
486 | |
|
487 | 0 | LinkcheckFileResult lcr = new LinkcheckFileResult(); |
488 | |
|
489 | 0 | lcr.setStatus( "PARSE FAILURE" ); |
490 | |
|
491 | 0 | lcr.setTarget( "N/A" ); |
492 | |
|
493 | 0 | linkcheckFile.addResult( lcr ); |
494 | |
|
495 | 0 | return; |
496 | 22 | } |
497 | |
|
498 | |
String href; |
499 | |
LinkcheckFileResult lcr; |
500 | |
LinkValidationItem lvi; |
501 | |
LinkValidationResult result; |
502 | |
|
503 | 22 | for ( Iterator iter = hrefs.iterator(); iter.hasNext(); ) |
504 | |
{ |
505 | 1632 | href = (String) iter.next(); |
506 | |
|
507 | 1632 | lcr = new LinkcheckFileResult(); |
508 | 1632 | lvi = new LinkValidationItem( new File( linkcheckFile.getAbsolutePath() ), href ); |
509 | 1632 | result = lvm.validateLink( lvi ); |
510 | 1632 | lcr.setTarget( href ); |
511 | 1632 | lcr.setErrorMessage( result.getErrorMessage() ); |
512 | |
|
513 | 1632 | switch ( result.getStatus() ) |
514 | |
{ |
515 | |
case LinkcheckFileResult.VALID_LEVEL: |
516 | 60 | linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 ); |
517 | |
|
518 | 60 | lcr.setStatus( LinkcheckFileResult.VALID ); |
519 | |
|
520 | |
|
521 | 60 | linkcheckFile.addResult( lcr ); |
522 | |
|
523 | 60 | break; |
524 | |
case LinkcheckFileResult.ERROR_LEVEL: |
525 | 1572 | boolean ignoredError = false; |
526 | 1572 | if ( result instanceof HTTPLinkValidationResult ) |
527 | |
{ |
528 | 0 | HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result; |
529 | |
|
530 | 0 | if ( httpResult.getHttpStatusCode() > 0 |
531 | |
&& getExcludedHttpStatusErrors() != null |
532 | |
&& StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ), |
533 | |
toStringArray( getExcludedHttpStatusErrors() ) ) >= 0 ) |
534 | |
{ |
535 | 0 | ignoredError = true; |
536 | |
} |
537 | |
} |
538 | |
|
539 | 1572 | if ( ignoredError ) |
540 | |
{ |
541 | 0 | linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 ); |
542 | |
} |
543 | |
else |
544 | |
{ |
545 | 1572 | linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 ); |
546 | |
} |
547 | |
|
548 | 1572 | lcr.setStatus( ignoredError ? LinkcheckFileResult.VALID : LinkcheckFileResult.ERROR ); |
549 | |
|
550 | 1572 | linkcheckFile.addResult( lcr ); |
551 | |
|
552 | 1572 | break; |
553 | |
case LinkcheckFileResult.WARNING_LEVEL: |
554 | 0 | boolean ignoredWarning = false; |
555 | 0 | if ( result instanceof HTTPLinkValidationResult ) |
556 | |
{ |
557 | 0 | HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result; |
558 | |
|
559 | 0 | if ( httpResult.getHttpStatusCode() > 0 |
560 | |
&& getExcludedHttpStatusWarnings() != null |
561 | |
&& StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ), |
562 | |
toStringArray( getExcludedHttpStatusWarnings() ) ) >= 0 ) |
563 | |
{ |
564 | 0 | ignoredWarning = true; |
565 | |
} |
566 | |
} |
567 | |
|
568 | 0 | if ( ignoredWarning ) |
569 | |
{ |
570 | 0 | linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 ); |
571 | |
} |
572 | |
else |
573 | |
{ |
574 | 0 | linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 ); |
575 | |
} |
576 | |
|
577 | 0 | lcr.setStatus( ignoredWarning ? LinkcheckFileResult.VALID : LinkcheckFileResult.WARNING ); |
578 | |
|
579 | 0 | linkcheckFile.addResult( lcr ); |
580 | |
|
581 | 0 | break; |
582 | |
case LinkcheckFileResult.UNKNOWN_LEVEL: |
583 | |
default: |
584 | 0 | linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 ); |
585 | |
|
586 | 0 | lcr.setStatus( LinkcheckFileResult.UNKNOWN ); |
587 | |
|
588 | 0 | linkcheckFile.addResult( lcr ); |
589 | |
|
590 | 1632 | break; |
591 | |
} |
592 | |
} |
593 | |
|
594 | 22 | href = null; |
595 | 22 | lcr = null; |
596 | 22 | lvi = null; |
597 | 22 | result = null; |
598 | 22 | } |
599 | |
|
600 | |
|
601 | |
|
602 | |
|
603 | |
private void displayMemoryConsumption() |
604 | |
{ |
605 | 16 | if ( LOG.isDebugEnabled() ) |
606 | |
{ |
607 | 0 | Runtime r = Runtime.getRuntime(); |
608 | 0 | LOG.debug( "Memory: " + ( r.totalMemory() - r.freeMemory() ) / MEG + "M/" + r.totalMemory() / MEG |
609 | |
+ "M" ); |
610 | |
} |
611 | 16 | } |
612 | |
|
613 | |
|
614 | |
|
615 | |
|
616 | |
|
617 | |
|
618 | |
private void createDocument( LinkcheckModel model ) |
619 | |
throws IOException |
620 | |
{ |
621 | 4 | if ( this.reportOutput == null ) |
622 | |
{ |
623 | 0 | return; |
624 | |
} |
625 | |
|
626 | 4 | File dir = this.reportOutput.getParentFile(); |
627 | 4 | if ( dir != null ) |
628 | |
{ |
629 | 4 | dir.mkdirs(); |
630 | |
} |
631 | |
|
632 | 4 | Writer writer = null; |
633 | 4 | LinkcheckModelXpp3Writer xpp3Writer = new LinkcheckModelXpp3Writer(); |
634 | |
try |
635 | |
{ |
636 | 4 | writer = WriterFactory.newXmlWriter( this.reportOutput ); |
637 | 4 | xpp3Writer.write( writer, model ); |
638 | |
} |
639 | 0 | catch ( IllegalStateException e ) |
640 | |
{ |
641 | 0 | IOException ioe = |
642 | |
new IOException( e.getMessage() + " Maybe try to specify an other encoding instead of '" |
643 | |
+ encoding + "'." ); |
644 | 0 | ioe.initCause( e ); |
645 | 0 | throw ioe; |
646 | |
} |
647 | |
finally |
648 | |
{ |
649 | 4 | IOUtil.close( writer ); |
650 | 4 | } |
651 | |
|
652 | 4 | dir = null; |
653 | 4 | } |
654 | |
|
655 | |
private static String[] toStringArray( int[] array ) |
656 | |
{ |
657 | 0 | if ( array == null ) |
658 | |
{ |
659 | 0 | throw new IllegalArgumentException( "array could not be null" ); |
660 | |
} |
661 | |
|
662 | 0 | String[] result = new String[array.length]; |
663 | 0 | for ( int i = 0; i < array.length; i++ ) |
664 | |
{ |
665 | 0 | result[i] = String.valueOf( array[i] ); |
666 | |
} |
667 | 0 | return result; |
668 | |
} |
669 | |
} |