View Javadoc

1   package org.apache.maven.linkcheck;
2   
3   /* ====================================================================
4    *   Copyright 2001-2004 The Apache Software Foundation.
5    *
6    *   Licensed under the Apache License, Version 2.0 (the "License");
7    *   you may not use this file except in compliance with the License.
8    *   You may obtain a copy of the License at
9    *
10   *       http://www.apache.org/licenses/LICENSE-2.0
11   *
12   *   Unless required by applicable law or agreed to in writing, software
13   *   distributed under the License is distributed on an "AS IS" BASIS,
14   *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   *   See the License for the specific language governing permissions and
16   *   limitations under the License.
17   * ====================================================================
18   */
19  
20  import java.io.BufferedInputStream;
21  import java.io.ByteArrayOutputStream;
22  import java.io.File;
23  import java.io.FileInputStream;
24  import java.io.FileNotFoundException;
25  import java.io.InputStream;
26  import java.io.OutputStream;
27  import java.io.PrintWriter;
28  import java.util.Iterator;
29  import java.util.LinkedList;
30  import java.util.List;
31  import java.util.Set;
32  import java.util.TreeSet;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.maven.linkcheck.validation.LinkValidationItem;
37  import org.apache.maven.linkcheck.validation.LinkValidationResult;
38  import org.apache.maven.linkcheck.validation.LinkValidatorManager;
39  import org.dom4j.Document;
40  import org.dom4j.Node;
41  import org.dom4j.io.DOMReader;
42  import org.w3c.tidy.Tidy;
43  
44  /***
45   * @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
46   * @version $Id: FileToCheck.java 170200 2005-05-15 06:24:19Z brett $
47   */
48  public class FileToCheck
49  {
50      /***
51       * Log for debug output
52       */
53      private static Log LOG = LogFactory.getLog(FileToCheck.class);
54  
55      private String base;
56      private File fileToCheck;
57      private String status = STATUS_OK;
58      private String message = "";
59      private int successful;
60      private int unsuccessful;
61      private List links = new LinkedList();
62  
63      public static final String STATUS_UNKNOWN = null;
64      public static final String STATUS_JTIDY_FAILURE = "Unable to tidy source";
65      public static final String STATUS_OK = "OK";
66  
67      public FileToCheck(File baseFile, File fileToCheck)
68      {
69          this.base = baseFile.getAbsolutePath();
70          this.fileToCheck = fileToCheck;
71  
72      }
73  
74      private void addResult(LinkCheckResult lcr)
75      {
76          this.links.add(lcr);
77      }
78  
79      public void check(LinkValidatorManager lvm) throws Exception
80      {
81          successful = 0;
82          unsuccessful = 0;
83          status = STATUS_OK;
84          message = "";
85  
86          try
87          {
88              final Set hrefs;
89              try
90              {
91                  hrefs = getLinks();
92              }
93              catch (Throwable e)
94              {
95                  //We catch Throwable, because there is a chance that the domReader will throw
96                  //a stack overflow exception for some files
97                  LOG.info("Caught " + e.toString() + " processing " + getName());
98                  LOG.info("Exception Message: " + e.getLocalizedMessage());
99                  LinkCheckResult lcr = new LinkCheckResult();
100                 lcr.setStatus("PARSE FAILURE");
101                 lcr.setTarget("N/A");
102                 addResult(lcr);
103                 return;
104             }
105 
106             for (Iterator iter = hrefs.iterator(); iter.hasNext(); )
107             {
108                 String href = (String) iter.next();
109 
110                 LOG.debug("Link Found: " + href);
111 
112                 LinkCheckResult lcr = new LinkCheckResult();
113 
114                 LinkValidationItem lvi = new LinkValidationItem(fileToCheck, href);
115                 LinkValidationResult result = lvm.validateLink(lvi);
116                 lcr.setTarget(href);
117 
118                 switch (result.getStatus())
119                 {
120                     case LinkValidationResult.VALID :
121                         successful++;
122                         lcr.setStatus("OK");
123                         addResult(lcr); //At some point we won't want to store valid links.  The tests require that we do at present
124                         break;
125                     case LinkValidationResult.UNKNOWN :
126                         unsuccessful++;
127                         lcr.setStatus("UNKNOWN REF");
128                         addResult(lcr);
129                         break;
130                     case LinkValidationResult.INVALID :
131                         unsuccessful++;
132                         lcr.setStatus("NOT FOUND");
133                         addResult(lcr);
134                         break;
135                 }
136 
137             }
138         }
139         catch (Exception e)
140         {
141             System.err.println(message);
142             throw (e);
143         }
144     }
145 
146     private Set getLinks() throws FileNotFoundException
147     {
148         ByteArrayOutputStream baos = new ByteArrayOutputStream();
149         PrintWriter errOut = new PrintWriter(baos);
150         BufferedInputStream bin = new BufferedInputStream(new FileInputStream(fileToCheck));
151         try
152         {
153             Tidy tidy = getTidy();
154             tidy.setErrout(errOut);
155             LOG.debug("Processing:" + fileToCheck);
156             org.w3c.dom.Document domDocument = tidy.parseDOM(bin, null);
157 
158             // now read a dom4j document from
159             // JTidy's W3C DOM object
160             final DOMReader domReader = new DOMReader();
161             final Document doc = domReader.read(domDocument);
162 
163             LOG.debug(baos.toString());
164 
165             return findUniqueLinks(doc);
166         }
167         finally
168         {
169             close(bin);
170             close(baos);
171         }
172     }
173 
174     private void close(InputStream is)
175     {
176         try
177         {
178             is.close();
179         }
180         catch (Exception e)
181         {
182             //Don't really care.
183         }
184     }
185 
186     private void close(OutputStream os)
187     {
188         try
189         {
190             os.close();
191         }
192         catch (Exception e)
193         {
194             //Don't really care.
195         }
196     }
197 
198     private Set findUniqueLinks(Document doc)
199     {
200         List xpathResults = new LinkedList();
201 
202         xpathResults.addAll(doc.selectNodes("//a/@href"));
203         xpathResults.addAll(doc.selectNodes("//img/@src"));
204 
205         //<link rel="stylesheet" href="...">
206         xpathResults.addAll(doc.selectNodes("//link/@href"));
207 
208         //<script src="http://ar.atwola.com/file/adsWrapper.js">
209         xpathResults.addAll(doc.selectNodes("//script/@src"));
210 
211         Set results = new TreeSet();
212         Iterator linkIter = xpathResults.iterator();
213         while (linkIter.hasNext())
214         {
215             Node node = (Node) linkIter.next();
216             String href = node.getText();
217             results.add(href);
218         }
219 
220         return results;
221     }
222 
223     private Tidy getTidy()
224     {
225         Tidy tidy = new Tidy();
226         tidy.setMakeClean(true);
227         tidy.setXmlTags(true);
228         tidy.setXmlOut(true);
229         tidy.setXHTML(true);
230         tidy.setQuiet(true);
231         tidy.setShowWarnings(false);
232         return tidy;
233     }
234 
235     /***
236      * Returns the message.
237      * @return String
238      */
239     public String getMessage()
240     {
241         return message;
242     }
243 
244     /***
245      * Returns the status.
246      * @return int
247      */
248     public String getStatus()
249     {
250         return status;
251     }
252 
253     /***
254      * Sets the message.
255      * @param message The message to set
256      */
257     public void setMessage(String message)
258     {
259         this.message = message;
260     }
261 
262     public List getResults()
263     {
264         return links;
265     }
266 
267     /***
268      * Returns the successful.
269      * @return int
270      */
271     public int getSuccessful()
272     {
273         return successful;
274     }
275 
276     /***
277      * Returns the unsuccessful.
278      * @return int
279      */
280     public int getUnsuccessful()
281     {
282         return unsuccessful;
283     }
284 
285     public String getName()
286     {
287         String fileName = fileToCheck.getAbsolutePath();
288         if (fileName.startsWith(base)) {
289             fileName = fileName.substring(base.length() + 1);
290         }
291 
292         fileName = fileName.replace('//', '/');
293         return fileName;
294     }
295 
296     public String toXML()
297     {
298         StringBuffer buf = new StringBuffer();
299 
300         buf.append("  <file>\n");
301         buf.append("    <name><![CDATA[" + getName() + "]]></name>\n");
302         buf.append("    <successful>" + getSuccessful() + "</successful>\n");
303         buf.append("    <unsuccessful>" + getUnsuccessful() + "</unsuccessful>\n");
304 
305         Iterator iter = getResults().iterator();
306         while (iter.hasNext())
307         {
308             LinkCheckResult result = (LinkCheckResult) iter.next();
309             buf.append(result.toXML());
310         }
311 
312         buf.append("  </file>\n");
313 
314         return buf.toString();
315     }
316 
317 }