Coverage Report - org.apache.maven.doxia.linkcheck.LinkMatcher
 
Classes in this File Line Coverage Branch Coverage Complexity
LinkMatcher
80%
16/20
67%
4/6
2
 
 1  
 package org.apache.maven.doxia.linkcheck;
 2  
 
 3  
 /*
 4  
  * Licensed to the Apache Software Foundation (ASF) under one
 5  
  * or more contributor license agreements.  See the NOTICE file
 6  
  * distributed with this work for additional information
 7  
  * regarding copyright ownership.  The ASF licenses this file
 8  
  * to you under the Apache License, Version 2.0 (the
 9  
  * "License"); you may not use this file except in compliance
 10  
  * with the License.  You may obtain a copy of the License at
 11  
  *
 12  
  *   http://www.apache.org/licenses/LICENSE-2.0
 13  
  *
 14  
  * Unless required by applicable law or agreed to in writing,
 15  
  * software distributed under the License is distributed on an
 16  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 17  
  * KIND, either express or implied.  See the License for the
 18  
  * specific language governing permissions and limitations
 19  
  * under the License.
 20  
  */
 21  
 
 22  
 import java.io.File;
 23  
 import java.io.IOException;
 24  
 import java.io.Reader;
 25  
 import java.util.Locale;
 26  
 import java.util.Set;
 27  
 import java.util.TreeSet;
 28  
 import java.util.regex.Matcher;
 29  
 import java.util.regex.Pattern;
 30  
 
 31  
 import org.codehaus.plexus.util.IOUtil;
 32  
 import org.codehaus.plexus.util.ReaderFactory;
 33  
 
 34  
 /**
 35  
  * Link matcher. Reads the contents of a file and tries to match the following:
 36  
  * <pre>
 37  
  * &lt;a href="".../&gt;
 38  
  * &lt;link href="".../&gt;
 39  
  * &lt;img src="".../&gt;
 40  
  * &lt;script src="".../&gt;
 41  
  * </pre>
 42  
  *
 43  
  * @author <a href="mailto:mac@apache.org">Ignacio G. Mac Dowell </a>
 44  
  * @version $Id: LinkMatcher.java 800044 2009-08-02 12:28:50Z vsiveton $
 45  
  */
 46  
 class LinkMatcher
 47  
 {
 48  
     /** Regexp for link matching. */
 49  2
     private static final Pattern MATCH_PATTERN =
 50  
         Pattern.compile( "<(?>link|a|img|script)[^>]*?(?>href|src)\\s*?=\\s*?[\\\"'](.*?)[\\\"'][^>]*?",
 51  
                          Pattern.CASE_INSENSITIVE );
 52  
 
 53  
     /** No need to create a new object each time a file is processed. Just clear it. */
 54  2
     private static final Set LINK_LIST = new TreeSet();
 55  
 
 56  
     private LinkMatcher()
 57  0
     {
 58  
         // nop
 59  0
     }
 60  
 
 61  
     /**
 62  
      * Reads a file and returns its contents without any XML comments.
 63  
      *
 64  
      * @param file the file we are reading
 65  
      * @param encoding the encoding file used
 66  
      * @return a StringBuffer with file's contents.
 67  
      * @throws IOException if something goes wrong.
 68  
      * @see ReaderFactory#newReader(File, String)
 69  
      * @see IOUtil#toString(Reader)
 70  
      */
 71  
     private static String toString( File file, String encoding )
 72  
         throws IOException
 73  
     {
 74  
         String content;
 75  22
         Reader reader = null;
 76  
         try
 77  
         {
 78  22
             reader = ReaderFactory.newReader( file, encoding );
 79  
 
 80  22
             content = IOUtil.toString( reader );
 81  
         }
 82  
         finally
 83  
         {
 84  22
             IOUtil.close( reader );
 85  22
         }
 86  
 
 87  
         // some link could be in comments, remove them
 88  22
         return content.replaceAll( "(?s)<!--.*?-->", "" );
 89  
     }
 90  
 
 91  
     /**
 92  
      * Performs the actual matching.
 93  
      *
 94  
      * @param file the file to check
 95  
      * @param encoding the encoding file used
 96  
      * @return a set with all links to check
 97  
      * @throws IOException if something goes wrong
 98  
      */
 99  
     static Set match( File file, String encoding )
 100  
         throws IOException
 101  
     {
 102  22
         LINK_LIST.clear();
 103  
 
 104  22
         final Matcher m = MATCH_PATTERN.matcher( toString( file, encoding ) );
 105  
 
 106  
         String link;
 107  
 
 108  3240
         while ( m.find() )
 109  
         {
 110  3218
             link = m.group( 1 ).trim();
 111  
 
 112  3218
             if ( link.length() < 1 )
 113  
             {
 114  0
                 continue;
 115  
             }
 116  3218
             else if ( link.toLowerCase( Locale.ENGLISH ).indexOf( "javascript" ) != -1 )
 117  
             {
 118  0
                 continue;
 119  
             }
 120  
             // TODO: Review dead code and delete if not needed
 121  
             // else if (link.toLowerCase( Locale.ENGLISH ).indexOf("mailto:") != -1) {
 122  
             // continue;
 123  
             // }
 124  
 
 125  3218
             LINK_LIST.add( link );
 126  
         }
 127  
 
 128  22
         return LINK_LIST;
 129  
     }
 130  
 }