1 | |
package org.apache.maven.doxia.linkcheck; |
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
import java.io.File; |
23 | |
import java.io.IOException; |
24 | |
import java.io.Reader; |
25 | |
import java.util.Locale; |
26 | |
import java.util.Set; |
27 | |
import java.util.TreeSet; |
28 | |
import java.util.regex.Matcher; |
29 | |
import java.util.regex.Pattern; |
30 | |
|
31 | |
import org.codehaus.plexus.util.IOUtil; |
32 | |
import org.codehaus.plexus.util.ReaderFactory; |
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
class LinkMatcher |
47 | |
{ |
48 | |
|
49 | 2 | private static final Pattern MATCH_PATTERN = |
50 | |
Pattern.compile( "<(?>link|a|img|script)[^>]*?(?>href|src)\\s*?=\\s*?[\\\"'](.*?)[\\\"'][^>]*?", |
51 | |
Pattern.CASE_INSENSITIVE ); |
52 | |
|
53 | |
/** No need to create a new object each time a file is processed. Just clear it. */ |
54 | 2 | private static final Set LINK_LIST = new TreeSet(); |
55 | |
|
56 | |
private LinkMatcher() |
57 | 0 | { |
58 | |
// nop |
59 | 0 | } |
60 | |
|
61 | |
/** |
62 | |
* Reads a file and returns its contents without any XML comments. |
63 | |
* |
64 | |
* @param file the file we are reading |
65 | |
* @param encoding the encoding file used |
66 | |
* @return a StringBuffer with file's contents. |
67 | |
* @throws IOException if something goes wrong. |
68 | |
* @see ReaderFactory#newReader(File, String) |
69 | |
* @see IOUtil#toString(Reader) |
70 | |
*/ |
71 | |
private static String toString( File file, String encoding ) |
72 | |
throws IOException |
73 | |
{ |
74 | |
String content; |
75 | 22 | Reader reader = null; |
76 | |
try |
77 | |
{ |
78 | 22 | reader = ReaderFactory.newReader( file, encoding ); |
79 | |
|
80 | 22 | content = IOUtil.toString( reader ); |
81 | |
} |
82 | |
finally |
83 | |
{ |
84 | 22 | IOUtil.close( reader ); |
85 | 22 | } |
86 | |
|
87 | |
// some link could be in comments, remove them |
88 | 22 | return content.replaceAll( "(?s)<!--.*?-->", "" ); |
89 | |
} |
90 | |
|
91 | |
/** |
92 | |
* Performs the actual matching. |
93 | |
* |
94 | |
* @param file the file to check |
95 | |
* @param encoding the encoding file used |
96 | |
* @return a set with all links to check |
97 | |
* @throws IOException if something goes wrong |
98 | |
*/ |
99 | |
static Set match( File file, String encoding ) |
100 | |
throws IOException |
101 | |
{ |
102 | 22 | LINK_LIST.clear(); |
103 | |
|
104 | 22 | final Matcher m = MATCH_PATTERN.matcher( toString( file, encoding ) ); |
105 | |
|
106 | |
String link; |
107 | |
|
108 | 3240 | while ( m.find() ) |
109 | |
{ |
110 | 3218 | link = m.group( 1 ).trim(); |
111 | |
|
112 | 3218 | if ( link.length() < 1 ) |
113 | |
{ |
114 | 0 | continue; |
115 | |
} |
116 | 3218 | else if ( link.toLowerCase( Locale.ENGLISH ).indexOf( "javascript" ) != -1 ) |
117 | |
{ |
118 | 0 | continue; |
119 | |
} |
120 | |
// TODO: Review dead code and delete if not needed |
121 | |
// else if (link.toLowerCase( Locale.ENGLISH ).indexOf("mailto:") != -1) { |
122 | |
// continue; |
123 | |
// } |
124 | |
|
125 | 3218 | LINK_LIST.add( link ); |
126 | |
} |
127 | |
|
128 | 22 | return LINK_LIST; |
129 | |
} |
130 | |
} |