1 package org.apache.maven.linkcheck;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import java.io.BufferedInputStream;
21 import java.io.ByteArrayOutputStream;
22 import java.io.File;
23 import java.io.FileInputStream;
24 import java.io.FileNotFoundException;
25 import java.io.InputStream;
26 import java.io.OutputStream;
27 import java.io.PrintWriter;
28 import java.util.Iterator;
29 import java.util.LinkedList;
30 import java.util.List;
31 import java.util.Set;
32 import java.util.TreeSet;
33
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.maven.linkcheck.validation.LinkValidationItem;
37 import org.apache.maven.linkcheck.validation.LinkValidationResult;
38 import org.apache.maven.linkcheck.validation.LinkValidatorManager;
39 import org.dom4j.Document;
40 import org.dom4j.Node;
41 import org.dom4j.io.DOMReader;
42 import org.w3c.tidy.Tidy;
43
44 /***
45 * @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
46 * @version $Id: FileToCheck.java 170200 2005-05-15 06:24:19Z brett $
47 */
48 public class FileToCheck
49 {
50 /***
51 * Log for debug output
52 */
53 private static Log LOG = LogFactory.getLog(FileToCheck.class);
54
55 private String base;
56 private File fileToCheck;
57 private String status = STATUS_OK;
58 private String message = "";
59 private int successful;
60 private int unsuccessful;
61 private List links = new LinkedList();
62
63 public static final String STATUS_UNKNOWN = null;
64 public static final String STATUS_JTIDY_FAILURE = "Unable to tidy source";
65 public static final String STATUS_OK = "OK";
66
67 public FileToCheck(File baseFile, File fileToCheck)
68 {
69 this.base = baseFile.getAbsolutePath();
70 this.fileToCheck = fileToCheck;
71
72 }
73
74 private void addResult(LinkCheckResult lcr)
75 {
76 this.links.add(lcr);
77 }
78
79 public void check(LinkValidatorManager lvm) throws Exception
80 {
81 successful = 0;
82 unsuccessful = 0;
83 status = STATUS_OK;
84 message = "";
85
86 try
87 {
88 final Set hrefs;
89 try
90 {
91 hrefs = getLinks();
92 }
93 catch (Throwable e)
94 {
95
96
97 LOG.info("Caught " + e.toString() + " processing " + getName());
98 LOG.info("Exception Message: " + e.getLocalizedMessage());
99 LinkCheckResult lcr = new LinkCheckResult();
100 lcr.setStatus("PARSE FAILURE");
101 lcr.setTarget("N/A");
102 addResult(lcr);
103 return;
104 }
105
106 for (Iterator iter = hrefs.iterator(); iter.hasNext(); )
107 {
108 String href = (String) iter.next();
109
110 LOG.debug("Link Found: " + href);
111
112 LinkCheckResult lcr = new LinkCheckResult();
113
114 LinkValidationItem lvi = new LinkValidationItem(fileToCheck, href);
115 LinkValidationResult result = lvm.validateLink(lvi);
116 lcr.setTarget(href);
117
118 switch (result.getStatus())
119 {
120 case LinkValidationResult.VALID :
121 successful++;
122 lcr.setStatus("OK");
123 addResult(lcr);
124 break;
125 case LinkValidationResult.UNKNOWN :
126 unsuccessful++;
127 lcr.setStatus("UNKNOWN REF");
128 addResult(lcr);
129 break;
130 case LinkValidationResult.INVALID :
131 unsuccessful++;
132 lcr.setStatus("NOT FOUND");
133 addResult(lcr);
134 break;
135 }
136
137 }
138 }
139 catch (Exception e)
140 {
141 System.err.println(message);
142 throw (e);
143 }
144 }
145
146 private Set getLinks() throws FileNotFoundException
147 {
148 ByteArrayOutputStream baos = new ByteArrayOutputStream();
149 PrintWriter errOut = new PrintWriter(baos);
150 BufferedInputStream bin = new BufferedInputStream(new FileInputStream(fileToCheck));
151 try
152 {
153 Tidy tidy = getTidy();
154 tidy.setErrout(errOut);
155 LOG.debug("Processing:" + fileToCheck);
156 org.w3c.dom.Document domDocument = tidy.parseDOM(bin, null);
157
158
159
160 final DOMReader domReader = new DOMReader();
161 final Document doc = domReader.read(domDocument);
162
163 LOG.debug(baos.toString());
164
165 return findUniqueLinks(doc);
166 }
167 finally
168 {
169 close(bin);
170 close(baos);
171 }
172 }
173
174 private void close(InputStream is)
175 {
176 try
177 {
178 is.close();
179 }
180 catch (Exception e)
181 {
182
183 }
184 }
185
186 private void close(OutputStream os)
187 {
188 try
189 {
190 os.close();
191 }
192 catch (Exception e)
193 {
194
195 }
196 }
197
198 private Set findUniqueLinks(Document doc)
199 {
200 List xpathResults = new LinkedList();
201
202 xpathResults.addAll(doc.selectNodes("//a/@href"));
203 xpathResults.addAll(doc.selectNodes("//img/@src"));
204
205
206 xpathResults.addAll(doc.selectNodes("//link/@href"));
207
208
209 xpathResults.addAll(doc.selectNodes("//script/@src"));
210
211 Set results = new TreeSet();
212 Iterator linkIter = xpathResults.iterator();
213 while (linkIter.hasNext())
214 {
215 Node node = (Node) linkIter.next();
216 String href = node.getText();
217 results.add(href);
218 }
219
220 return results;
221 }
222
223 private Tidy getTidy()
224 {
225 Tidy tidy = new Tidy();
226 tidy.setMakeClean(true);
227 tidy.setXmlTags(true);
228 tidy.setXmlOut(true);
229 tidy.setXHTML(true);
230 tidy.setQuiet(true);
231 tidy.setShowWarnings(false);
232 return tidy;
233 }
234
235 /***
236 * Returns the message.
237 * @return String
238 */
239 public String getMessage()
240 {
241 return message;
242 }
243
244 /***
245 * Returns the status.
246 * @return int
247 */
248 public String getStatus()
249 {
250 return status;
251 }
252
253 /***
254 * Sets the message.
255 * @param message The message to set
256 */
257 public void setMessage(String message)
258 {
259 this.message = message;
260 }
261
262 public List getResults()
263 {
264 return links;
265 }
266
267 /***
268 * Returns the successful.
269 * @return int
270 */
271 public int getSuccessful()
272 {
273 return successful;
274 }
275
276 /***
277 * Returns the unsuccessful.
278 * @return int
279 */
280 public int getUnsuccessful()
281 {
282 return unsuccessful;
283 }
284
285 public String getName()
286 {
287 String fileName = fileToCheck.getAbsolutePath();
288 if (fileName.startsWith(base)) {
289 fileName = fileName.substring(base.length() + 1);
290 }
291
292 fileName = fileName.replace('//', '/');
293 return fileName;
294 }
295
296 public String toXML()
297 {
298 StringBuffer buf = new StringBuffer();
299
300 buf.append(" <file>\n");
301 buf.append(" <name><![CDATA[" + getName() + "]]></name>\n");
302 buf.append(" <successful>" + getSuccessful() + "</successful>\n");
303 buf.append(" <unsuccessful>" + getUnsuccessful() + "</unsuccessful>\n");
304
305 Iterator iter = getResults().iterator();
306 while (iter.hasNext())
307 {
308 LinkCheckResult result = (LinkCheckResult) iter.next();
309 buf.append(result.toXML());
310 }
311
312 buf.append(" </file>\n");
313
314 return buf.toString();
315 }
316
317 }