Coverage Report - org.apache.maven.doxia.linkcheck.validation.OnlineHTTPLinkValidator
 
Classes in this File Line Coverage Branch Coverage Complexity
OnlineHTTPLinkValidator
41%
59/143
21%
21/98
8,857
 
 1  
 package org.apache.maven.doxia.linkcheck.validation;
 2  
 
 3  
 /*
 4  
  * Licensed to the Apache Software Foundation (ASF) under one
 5  
  * or more contributor license agreements.  See the NOTICE file
 6  
  * distributed with this work for additional information
 7  
  * regarding copyright ownership.  The ASF licenses this file
 8  
  * to you under the Apache License, Version 2.0 (the
 9  
  * "License"); you may not use this file except in compliance
 10  
  * with the License.  You may obtain a copy of the License at
 11  
  *
 12  
  *   http://www.apache.org/licenses/LICENSE-2.0
 13  
  *
 14  
  * Unless required by applicable law or agreed to in writing,
 15  
  * software distributed under the License is distributed on an
 16  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 17  
  * KIND, either express or implied.  See the License for the
 18  
  * specific language governing permissions and limitations
 19  
  * under the License.
 20  
  */
 21  
 
 22  
 import java.io.IOException;
 23  
 
 24  
 import java.net.URL;
 25  
 import java.util.Iterator;
 26  
 import java.util.Map;
 27  
 
 28  
 import org.apache.commons.httpclient.Credentials;
 29  
 import org.apache.commons.httpclient.Header;
 30  
 import org.apache.commons.httpclient.HostConfiguration;
 31  
 import org.apache.commons.httpclient.HttpClient;
 32  
 import org.apache.commons.httpclient.HttpException;
 33  
 import org.apache.commons.httpclient.HttpMethod;
 34  
 import org.apache.commons.httpclient.HttpState;
 35  
 import org.apache.commons.httpclient.HttpStatus;
 36  
 import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
 37  
 import org.apache.commons.httpclient.NTCredentials;
 38  
 import org.apache.commons.httpclient.StatusLine;
 39  
 import org.apache.commons.httpclient.UsernamePasswordCredentials;
 40  
 import org.apache.commons.httpclient.auth.AuthScope;
 41  
 import org.apache.commons.httpclient.methods.GetMethod;
 42  
 import org.apache.commons.httpclient.methods.HeadMethod;
 43  
 import org.apache.commons.httpclient.params.HttpClientParams;
 44  
 import org.apache.commons.httpclient.params.HttpMethodParams;
 45  
 
 46  
 import org.apache.commons.logging.Log;
 47  
 import org.apache.commons.logging.LogFactory;
 48  
 import org.apache.maven.doxia.linkcheck.HttpBean;
 49  
 import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult;
 50  
 import org.codehaus.plexus.util.StringUtils;
 51  
 
 52  
 /**
 53  
  * Checks links which are normal URLs
 54  
  *
 55  
  * @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
 56  
  * @author <a href="mailto:aheritier@apache.org">Arnaud Heritier</a>
 57  
  * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
 58  
  * @version $Id: OnlineHTTPLinkValidator.java 1030039 2010-11-02 13:33:03Z ltheussl $
 59  
  */
 60  
 public final class OnlineHTTPLinkValidator
 61  
     extends HTTPLinkValidator
 62  
 {
 63  
     /** Log for debug output. */
 64  4
     private static final Log LOG = LogFactory.getLog( OnlineHTTPLinkValidator.class );
 65  
 
 66  
     /** The maximum number of redirections for a link. */
 67  
     private static final int MAX_NB_REDIRECT = 10;
 68  
 
 69  
     /** Use the get method to test pages. */
 70  
     private static final String GET_METHOD = "get";
 71  
 
 72  
     /** Use the head method to test pages. */
 73  
     private static final String HEAD_METHOD = "head";
 74  
 
 75  
     /** The http bean encapsuling all http parameters supported. */
 76  
     private HttpBean http;
 77  
 
 78  
     /** The base URL for links that start with '/'. */
 79  
     private String baseURL;
 80  
 
 81  
     /** The HttpClient. */
 82  
     private transient HttpClient cl;
 83  
 
 84  
     /**
 85  
      * Constructor: initialize settings, use "head" method.
 86  
      */
 87  
     public OnlineHTTPLinkValidator()
 88  
     {
 89  2
         this( new HttpBean() );
 90  2
     }
 91  
 
 92  
     /**
 93  
      * Constructor: initialize settings.
 94  
      *
 95  
      * @param bean The http bean encapsuling all HTTP parameters supported.
 96  
      */
 97  
     public OnlineHTTPLinkValidator( HttpBean bean )
 98  6
     {
 99  6
         if ( bean == null )
 100  
         {
 101  4
             bean = new HttpBean();
 102  
         }
 103  
 
 104  6
         if ( LOG.isDebugEnabled() )
 105  
         {
 106  0
             LOG.debug( "Will use method : [" + bean.getMethod() + "]" );
 107  
         }
 108  
 
 109  6
         this.http = bean;
 110  
 
 111  6
         initHttpClient();
 112  6
     }
 113  
 
 114  
     /**
 115  
      * The base URL.
 116  
      *
 117  
      * @return the base URL.
 118  
      */
 119  
     public String getBaseURL()
 120  
     {
 121  0
         return this.baseURL;
 122  
     }
 123  
 
 124  
     /**
 125  
      * Sets the base URL. This is pre-pended to links that start with '/'.
 126  
      *
 127  
      * @param url the base URL.
 128  
      */
 129  
     public void setBaseURL( String url )
 130  
     {
 131  0
         this.baseURL = url;
 132  0
     }
 133  
 
 134  
     /** {@inheritDoc} */
 135  
     public LinkValidationResult validateLink( LinkValidationItem lvi )
 136  
     {
 137  4
         if ( this.cl == null )
 138  
         {
 139  0
             initHttpClient();
 140  
         }
 141  
 
 142  4
         if ( this.http.getHttpClientParameters() != null )
 143  
         {
 144  0
             for ( Iterator it = this.http.getHttpClientParameters().entrySet().iterator(); it.hasNext(); )
 145  
             {
 146  0
                 Map.Entry entry = (Map.Entry) it.next();
 147  
 
 148  0
                 if ( entry.getValue() != null )
 149  
                 {
 150  0
                     System.setProperty( entry.getKey().toString(), entry.getValue().toString() );
 151  
                 }
 152  0
             }
 153  
         }
 154  
 
 155  
         // Some web servers don't allow the default user-agent sent by httpClient
 156  4
         System.setProperty( HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" );
 157  4
         this.cl.getParams().setParameter( HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" );
 158  
 
 159  4
         String link = lvi.getLink();
 160  4
         String anchor = "";
 161  4
         int idx = link.indexOf( '#' );
 162  4
         if ( idx != -1 )
 163  
         {
 164  0
             anchor = link.substring( idx + 1 );
 165  0
             link = link.substring( 0, idx );
 166  
         }
 167  
 
 168  
         try
 169  
         {
 170  4
             if ( link.startsWith( "/" ) )
 171  
             {
 172  0
                 if ( getBaseURL() == null )
 173  
                 {
 174  0
                     if ( LOG.isWarnEnabled() )
 175  
                     {
 176  0
                         LOG.warn( "Cannot check link [" + link + "] in page [" + lvi.getSource()
 177  
                             + "], as no base URL has been set!" );
 178  
                     }
 179  
 
 180  0
                     return new LinkValidationResult( LinkcheckFileResult.WARNING_LEVEL, false,
 181  
                                                      "No base URL specified" );
 182  
                 }
 183  
 
 184  0
                 link = getBaseURL() + link;
 185  
             }
 186  
 
 187  4
             HttpMethod hm = null;
 188  
             try
 189  
             {
 190  4
                 hm = checkLink( link, 0 );
 191  
             }
 192  2
             catch ( Throwable t )
 193  
             {
 194  2
                 if ( LOG.isDebugEnabled() )
 195  
                 {
 196  0
                     LOG.debug( "Received: [" + t + "] for [" + link + "] in page [" + lvi.getSource() + "]", t );
 197  
                 }
 198  
 
 199  2
                 return new LinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, t.getClass().getName()
 200  
                     + " : " + t.getMessage() );
 201  2
             }
 202  
 
 203  2
             if ( hm == null )
 204  
             {
 205  0
                 return new LinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false,
 206  
                                                  "Cannot retreive HTTP Status" );
 207  
             }
 208  
 
 209  2
             if ( hm.getStatusCode() == HttpStatus.SC_OK )
 210  
             {
 211  
                 // lets check if the anchor is present
 212  2
                 if ( anchor.length() > 0 )
 213  
                 {
 214  0
                     String content = hm.getResponseBodyAsString();
 215  
 
 216  0
                     if ( !Anchors.matchesAnchor( content, anchor ) )
 217  
                     {
 218  0
                         return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, false,
 219  
                             "Missing anchor '" + anchor + "'" );
 220  
                     }
 221  
                 }
 222  2
                 return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, true, hm.getStatusCode(),
 223  
                                                      hm.getStatusText() );
 224  
             }
 225  
 
 226  0
             String msg =
 227  
                 "Received: [" + hm.getStatusCode() + "] for [" + link + "] in page [" + lvi.getSource() + "]";
 228  
             // If there's a redirection ... add a warning
 229  0
             if ( hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY
 230  
                 || hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY
 231  
                 || hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT )
 232  
             {
 233  0
                 LOG.warn( msg );
 234  
 
 235  0
                 return new HTTPLinkValidationResult( LinkcheckFileResult.WARNING_LEVEL, true, hm.getStatusCode(),
 236  
                                                      hm.getStatusText() );
 237  
             }
 238  
 
 239  0
             LOG.debug( msg );
 240  
 
 241  0
             return new HTTPLinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, hm.getStatusCode(),
 242  
                                                  hm.getStatusText() );
 243  
         }
 244  0
         catch ( Throwable t )
 245  
         {
 246  0
             String msg = "Received: [" + t + "] for [" + link + "] in page [" + lvi.getSource() + "]";
 247  0
             if ( LOG.isDebugEnabled() )
 248  
             {
 249  0
                 LOG.debug( msg, t );
 250  
             }
 251  
             else
 252  
             {
 253  0
                 LOG.error( msg );
 254  
             }
 255  
 
 256  0
             return new LinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, t.getMessage() );
 257  
         }
 258  
         finally
 259  
         {
 260  4
             System.getProperties().remove( HttpMethodParams.USER_AGENT );
 261  
 
 262  4
             if ( this.http.getHttpClientParameters() != null )
 263  
             {
 264  0
                 for ( Iterator it = this.http.getHttpClientParameters().entrySet().iterator(); it.hasNext(); )
 265  
                 {
 266  0
                     Map.Entry entry = (Map.Entry) it.next();
 267  
 
 268  0
                     if ( entry.getValue() != null )
 269  
                     {
 270  0
                         System.getProperties().remove( entry.getKey().toString() );
 271  
                     }
 272  0
                 }
 273  
             }
 274  
         }
 275  
     }
 276  
 
 277  
     /** Initialize the HttpClient. */
 278  
     private void initHttpClient()
 279  
     {
 280  6
         LOG.debug( "A new HttpClient instance is needed ..." );
 281  
 
 282  6
         this.cl = new HttpClient( new MultiThreadedHttpConnectionManager() );
 283  
 
 284  
         // Default params
 285  6
         if ( this.http.getTimeout() != 0 )
 286  
         {
 287  6
             this.cl.getHttpConnectionManager().getParams().setConnectionTimeout( this.http.getTimeout() );
 288  6
             this.cl.getHttpConnectionManager().getParams().setSoTimeout( this.http.getTimeout() );
 289  
         }
 290  6
         this.cl.getParams().setBooleanParameter( HttpClientParams.ALLOW_CIRCULAR_REDIRECTS, true );
 291  
 
 292  6
         HostConfiguration hc = new HostConfiguration();
 293  
 
 294  6
         HttpState state = new HttpState();
 295  6
         if ( StringUtils.isNotEmpty( this.http.getProxyHost() ) )
 296  
         {
 297  0
             hc.setProxy( this.http.getProxyHost(), this.http.getProxyPort() );
 298  
 
 299  0
             if ( LOG.isDebugEnabled() )
 300  
             {
 301  0
                 LOG.debug( "Proxy Host:" + this.http.getProxyHost() );
 302  0
                 LOG.debug( "Proxy Port:" + this.http.getProxyPort() );
 303  
             }
 304  
 
 305  0
             if ( StringUtils.isNotEmpty( this.http.getProxyUser() ) && this.http.getProxyPassword() != null )
 306  
             {
 307  0
                 if ( LOG.isDebugEnabled() )
 308  
                 {
 309  0
                     LOG.debug( "Proxy User:" + this.http.getProxyUser() );
 310  
                 }
 311  
 
 312  
                 Credentials credentials;
 313  0
                 if ( StringUtils.isNotEmpty( this.http.getProxyNtlmHost() ) )
 314  
                 {
 315  0
                     credentials =
 316  
                         new NTCredentials( this.http.getProxyUser(), this.http.getProxyPassword(),
 317  
                                            this.http.getProxyNtlmHost(), this.http.getProxyNtlmDomain() );
 318  
                 }
 319  
                 else
 320  
                 {
 321  0
                     credentials =
 322  
                         new UsernamePasswordCredentials( this.http.getProxyUser(), this.http.getProxyPassword() );
 323  
                 }
 324  
 
 325  0
                 state.setProxyCredentials( AuthScope.ANY, credentials );
 326  0
             }
 327  
         }
 328  
         else
 329  
         {
 330  6
             LOG.debug( "Not using a proxy" );
 331  
         }
 332  
 
 333  6
         this.cl.setHostConfiguration( hc );
 334  6
         this.cl.setState( state );
 335  
 
 336  6
         LOG.debug( "New HttpClient instance created." );
 337  6
     }
 338  
 
 339  
     /**
 340  
      * Checks the given link.
 341  
      *
 342  
      * @param link the link to check.
 343  
      * @param nbRedirect the number of current redirects.
 344  
      * @return HttpMethod
 345  
      * @throws IOException if something goes wrong.
 346  
      */
 347  
     private HttpMethod checkLink( String link, int nbRedirect )
 348  
         throws IOException
 349  
     {
 350  4
         int max = MAX_NB_REDIRECT;
 351  4
         if ( this.http.getHttpClientParameters() != null
 352  
             && this.http.getHttpClientParameters().get( HttpClientParams.MAX_REDIRECTS ) != null )
 353  
         {
 354  
             try
 355  
             {
 356  0
                 max =
 357  
                     Integer.valueOf(
 358  
                                      this.http.getHttpClientParameters().get( HttpClientParams.MAX_REDIRECTS )
 359  
                                               .toString() ).intValue();
 360  
             }
 361  0
             catch ( NumberFormatException e )
 362  
             {
 363  0
                 if ( LOG.isWarnEnabled() )
 364  
                 {
 365  0
                     LOG.warn( "HttpClient parameter '" + HttpClientParams.MAX_REDIRECTS
 366  
                         + "' is not a number. Ignoring!" );
 367  
                 }
 368  0
             }
 369  
         }
 370  4
         if ( nbRedirect > max )
 371  
         {
 372  0
             throw new HttpException( "Maximum number of redirections (" + max + ") exceeded" );
 373  
         }
 374  
 
 375  
         HttpMethod hm;
 376  4
         if ( HEAD_METHOD.equalsIgnoreCase( this.http.getMethod() ) )
 377  
         {
 378  4
             hm = new HeadMethod( link );
 379  
         }
 380  0
         else if ( GET_METHOD.equalsIgnoreCase( this.http.getMethod() ) )
 381  
         {
 382  0
             hm = new GetMethod( link );
 383  
         }
 384  
         else
 385  
         {
 386  0
             if ( LOG.isErrorEnabled() )
 387  
             {
 388  0
                 LOG.error( "Unsupported method: " + this.http.getMethod() + ", using 'get'." );
 389  
             }
 390  0
             hm = new GetMethod( link );
 391  
         }
 392  
 
 393  
         // Default
 394  2
         hm.setFollowRedirects( this.http.isFollowRedirects() );
 395  
 
 396  
         try
 397  
         {
 398  2
             URL url = new URL( link );
 399  
 
 400  2
             cl.getHostConfiguration().setHost( url.getHost(), url.getPort(), url.getProtocol() );
 401  
 
 402  2
             cl.executeMethod( hm );
 403  
 
 404  2
             StatusLine sl = hm.getStatusLine();
 405  2
             if ( sl == null )
 406  
             {
 407  0
                 if ( LOG.isErrorEnabled() )
 408  
                 {
 409  0
                     LOG.error( "Unknown error validating link : " + link );
 410  
                 }
 411  
 
 412  0
                 return null;
 413  
             }
 414  
 
 415  2
             if ( hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY
 416  
                 || hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY
 417  
                 || hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT )
 418  
             {
 419  0
                 Header locationHeader = hm.getResponseHeader( "location" );
 420  
 
 421  0
                 if ( locationHeader == null )
 422  
                 {
 423  0
                     LOG.error( "Site sent redirect, but did not set Location header" );
 424  
 
 425  0
                     return hm;
 426  
                 }
 427  
 
 428  0
                 String newLink = locationHeader.getValue();
 429  
 
 430  
                 // Be careful to absolute/relative links
 431  0
                 if ( !newLink.startsWith( "http://" ) && !newLink.startsWith( "https://" ) )
 432  
                 {
 433  0
                     if ( newLink.startsWith( "/" ) )
 434  
                     {
 435  0
                         URL oldUrl = new URL( link );
 436  
 
 437  0
                         newLink =
 438  
                             oldUrl.getProtocol() + "://" + oldUrl.getHost()
 439  
                                 + ( oldUrl.getPort() > 0 ? ":" + oldUrl.getPort() : "" ) + newLink;
 440  0
                     }
 441  
                     else
 442  
                     {
 443  0
                         newLink = link + newLink;
 444  
                     }
 445  
                 }
 446  
 
 447  0
                 HttpMethod oldHm = hm;
 448  
 
 449  0
                 if ( LOG.isDebugEnabled() )
 450  
                 {
 451  0
                     LOG.debug( "[" + link + "] is redirected to [" + newLink + "]" );
 452  
                 }
 453  
 
 454  0
                 oldHm.releaseConnection();
 455  
 
 456  0
                 hm = checkLink( newLink, nbRedirect + 1 );
 457  
 
 458  
                 // Restore the hm to "Moved permanently" | "Moved temporarily" | "Temporary redirect"
 459  
                 // if the new location is found to allow us to report it
 460  0
                 if ( hm.getStatusCode() == HttpStatus.SC_OK && nbRedirect == 0 )
 461  
                 {
 462  0
                     return oldHm;
 463  
                 }
 464  
             }
 465  
 
 466  
         }
 467  
         finally
 468  
         {
 469  0
             hm.releaseConnection();
 470  2
         }
 471  
 
 472  2
         return hm;
 473  
     }
 474  
 }