Coverage Report - org.apache.any23.http.DefaultHTTPClient
 
Classes in this File Line Coverage Branch Coverage Complexity
DefaultHTTPClient
0%
0/57
0%
0/24
3.111
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.http;
 19  
 
 20  
 import org.apache.commons.httpclient.Header;
 21  
 import org.apache.commons.httpclient.HostConfiguration;
 22  
 import org.apache.commons.httpclient.HttpClient;
 23  
 import org.apache.commons.httpclient.HttpConnectionManager;
 24  
 import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
 25  
 import org.apache.commons.httpclient.methods.GetMethod;
 26  
 import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
 27  
 
 28  
 import java.io.ByteArrayInputStream;
 29  
 import java.io.IOException;
 30  
 import java.io.InputStream;
 31  
 import java.net.URI;
 32  
 import java.net.URISyntaxException;
 33  
 import java.net.URLEncoder;
 34  
 import java.util.ArrayList;
 35  
 import java.util.List;
 36  
 
 37  
 /**
 38  
  * Opens an {@link InputStream} on an HTTP URI. Is configured
 39  
  * with sane values for timeouts, default headers and so on.
 40  
  *
 41  
  * @author Paolo Capriotti
 42  
  * @author Richard Cyganiak (richard@cyganiak.de)
 43  
  */
 44  0
 public class DefaultHTTPClient implements HTTPClient {
 45  
 
 46  0
     private final MultiThreadedHttpConnectionManager manager = new MultiThreadedHttpConnectionManager();
 47  
 
 48  
     private HTTPClientConfiguration configuration;
 49  
 
 50  0
     private HttpClient client = null;
 51  
 
 52  0
     private long _contentLength = -1;
 53  
 
 54  0
     private String actualDocumentURI = null;
 55  
 
 56  0
     private String contentType = null;
 57  
 
 58  
     public void init(HTTPClientConfiguration configuration) {
 59  0
         if(configuration == null) throw new NullPointerException("Illegal configuration, cannot be null.");
 60  0
         this.configuration = configuration;
 61  0
     }
 62  
 
 63  
     /**
 64  
      *
 65  
      * Opens an {@link java.io.InputStream} from a given URI.
 66  
      * It follows redirects.
 67  
      *
 68  
      * @param uri to be opened
 69  
      * @return {@link java.io.InputStream}
 70  
      * @throws IOException
 71  
      */
 72  
     public InputStream openInputStream(String uri) throws IOException {
 73  0
         GetMethod method = null;
 74  
         try {
 75  0
             ensureClientInitialized();
 76  0
             String uriStr = null;
 77  
             try {
 78  0
                 URI uriObj = new URI(uri);
 79  
                 // [scheme:][//authority][path][?query][#fragment]
 80  0
                 final String path = uriObj.getPath();
 81  0
                 final String query = uriObj.getQuery();
 82  0
                 final String fragment = uriObj.getFragment();
 83  0
                 uriStr = String.format(
 84  
                         "%s://%s%s%s%s%s%s",
 85  
                         uriObj.getScheme(),
 86  
                         uriObj.getAuthority(),
 87  
                         path != null ? URLEncoder.encode(path, "UTF-8").replaceAll("%2F", "/") : "",
 88  
                         query == null ? "" : "?",
 89  
                         query != null ? URLEncoder.encode(query, "UTF-8")
 90  
                                 .replaceAll("%3D", "=")
 91  
                                 .replaceAll("%26", "&") 
 92  
                             :
 93  
                             "",
 94  
                         fragment == null ? "" : "#",
 95  
                         fragment != null ? URLEncoder.encode(fragment, "UTF-8") : ""
 96  
                 );
 97  0
             } catch (URISyntaxException e) {
 98  0
                 throw new IllegalArgumentException("Invalid URI string.", e);
 99  0
             }
 100  0
             method = new GetMethod(uriStr);
 101  0
             method.setFollowRedirects(true);
 102  0
             client.executeMethod(method);
 103  0
             _contentLength = method.getResponseContentLength();
 104  0
             final Header contentTypeHeader = method.getResponseHeader("Content-Type");
 105  0
             contentType = contentTypeHeader == null ? null : contentTypeHeader.getValue();
 106  0
             if (method.getStatusCode() != 200) {
 107  0
                 throw new IOException(
 108  
                         "Failed to fetch " + uri + ": " + method.getStatusCode() + " " + method.getStatusText()
 109  
                 );
 110  
             }
 111  0
             actualDocumentURI = method.getURI().toString();
 112  0
             byte[] response = method.getResponseBody();
 113  
 
 114  0
             return new ByteArrayInputStream(response);
 115  
         } finally {
 116  0
             if (method != null) {
 117  0
                 method.releaseConnection();
 118  
             }
 119  
         }
 120  
     }
 121  
 
 122  
     /**
 123  
      * Shuts down the connection manager.
 124  
      */
 125  
     public void close() {
 126  0
         manager.shutdown();
 127  0
     }
 128  
 
 129  
     public long getContentLength() {
 130  0
         return _contentLength;
 131  
     }
 132  
 
 133  
     public String getActualDocumentURI() {
 134  0
         return actualDocumentURI;
 135  
     }
 136  
 
 137  
     public String getContentType() {
 138  0
         return contentType;
 139  
     }
 140  
 
 141  
     protected int getConnectionTimeout() {
 142  0
         return configuration.getDefaultTimeout();
 143  
     }
 144  
 
 145  
     protected int getSoTimeout() {
 146  0
         return configuration.getDefaultTimeout();
 147  
     }
 148  
 
 149  
     private void ensureClientInitialized() {
 150  0
         if(configuration == null) throw new IllegalStateException("client must be initialized first.");
 151  0
         if (client != null) return;
 152  0
         client = new HttpClient(manager);
 153  0
         HttpConnectionManager connectionManager = client.getHttpConnectionManager();
 154  0
         HttpConnectionManagerParams params = connectionManager.getParams();
 155  0
         params.setConnectionTimeout(configuration.getDefaultTimeout());
 156  0
         params.setSoTimeout(configuration.getDefaultTimeout());
 157  0
         params.setMaxTotalConnections(configuration.getMaxConnections());
 158  
 
 159  0
         HostConfiguration hostConf = client.getHostConfiguration();
 160  0
         List<Header> headers = new ArrayList<Header>();
 161  0
         headers.add(new Header("User-Agent", configuration.getUserAgent()));
 162  0
         if (configuration.getAcceptHeader() != null) {
 163  0
             headers.add(new Header("Accept", configuration.getAcceptHeader()));
 164  
         }
 165  0
         headers.add(new Header("Accept-Language", "en-us,en-gb,en,*;q=0.3"));
 166  0
         headers.add(new Header("Accept-Charset", "utf-8,iso-8859-1;q=0.7,*;q=0.5"));
 167  
         // headers.add(new Header("Accept-Encoding", "x-gzip, gzip"));
 168  0
         hostConf.getParams().setParameter("http.default-headers", headers);
 169  0
     }
 170  
 
 171  
 }