1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.any23.http; |
19 | |
|
20 | |
import org.apache.commons.httpclient.Header; |
21 | |
import org.apache.commons.httpclient.HostConfiguration; |
22 | |
import org.apache.commons.httpclient.HttpClient; |
23 | |
import org.apache.commons.httpclient.HttpConnectionManager; |
24 | |
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; |
25 | |
import org.apache.commons.httpclient.methods.GetMethod; |
26 | |
import org.apache.commons.httpclient.params.HttpConnectionManagerParams; |
27 | |
|
28 | |
import java.io.ByteArrayInputStream; |
29 | |
import java.io.IOException; |
30 | |
import java.io.InputStream; |
31 | |
import java.net.URI; |
32 | |
import java.net.URISyntaxException; |
33 | |
import java.net.URLEncoder; |
34 | |
import java.util.ArrayList; |
35 | |
import java.util.List; |
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | 0 | public class DefaultHTTPClient implements HTTPClient { |
45 | |
|
46 | 0 | private final MultiThreadedHttpConnectionManager manager = new MultiThreadedHttpConnectionManager(); |
47 | |
|
48 | |
private HTTPClientConfiguration configuration; |
49 | |
|
50 | 0 | private HttpClient client = null; |
51 | |
|
52 | 0 | private long _contentLength = -1; |
53 | |
|
54 | 0 | private String actualDocumentURI = null; |
55 | |
|
56 | 0 | private String contentType = null; |
57 | |
|
58 | |
public void init(HTTPClientConfiguration configuration) { |
59 | 0 | if(configuration == null) throw new NullPointerException("Illegal configuration, cannot be null."); |
60 | 0 | this.configuration = configuration; |
61 | 0 | } |
62 | |
|
63 | |
|
64 | |
|
65 | |
|
66 | |
|
67 | |
|
68 | |
|
69 | |
|
70 | |
|
71 | |
|
72 | |
public InputStream openInputStream(String uri) throws IOException { |
73 | 0 | GetMethod method = null; |
74 | |
try { |
75 | 0 | ensureClientInitialized(); |
76 | 0 | String uriStr = null; |
77 | |
try { |
78 | 0 | URI uriObj = new URI(uri); |
79 | |
|
80 | 0 | final String path = uriObj.getPath(); |
81 | 0 | final String query = uriObj.getQuery(); |
82 | 0 | final String fragment = uriObj.getFragment(); |
83 | 0 | uriStr = String.format( |
84 | |
"%s://%s%s%s%s%s%s", |
85 | |
uriObj.getScheme(), |
86 | |
uriObj.getAuthority(), |
87 | |
path != null ? URLEncoder.encode(path, "UTF-8").replaceAll("%2F", "/") : "", |
88 | |
query == null ? "" : "?", |
89 | |
query != null ? URLEncoder.encode(query, "UTF-8") |
90 | |
.replaceAll("%3D", "=") |
91 | |
.replaceAll("%26", "&") |
92 | |
: |
93 | |
"", |
94 | |
fragment == null ? "" : "#", |
95 | |
fragment != null ? URLEncoder.encode(fragment, "UTF-8") : "" |
96 | |
); |
97 | 0 | } catch (URISyntaxException e) { |
98 | 0 | throw new IllegalArgumentException("Invalid URI string.", e); |
99 | 0 | } |
100 | 0 | method = new GetMethod(uriStr); |
101 | 0 | method.setFollowRedirects(true); |
102 | 0 | client.executeMethod(method); |
103 | 0 | _contentLength = method.getResponseContentLength(); |
104 | 0 | final Header contentTypeHeader = method.getResponseHeader("Content-Type"); |
105 | 0 | contentType = contentTypeHeader == null ? null : contentTypeHeader.getValue(); |
106 | 0 | if (method.getStatusCode() != 200) { |
107 | 0 | throw new IOException( |
108 | |
"Failed to fetch " + uri + ": " + method.getStatusCode() + " " + method.getStatusText() |
109 | |
); |
110 | |
} |
111 | 0 | actualDocumentURI = method.getURI().toString(); |
112 | 0 | byte[] response = method.getResponseBody(); |
113 | |
|
114 | 0 | return new ByteArrayInputStream(response); |
115 | |
} finally { |
116 | 0 | if (method != null) { |
117 | 0 | method.releaseConnection(); |
118 | |
} |
119 | |
} |
120 | |
} |
121 | |
|
122 | |
|
123 | |
|
124 | |
|
125 | |
public void close() { |
126 | 0 | manager.shutdown(); |
127 | 0 | } |
128 | |
|
129 | |
public long getContentLength() { |
130 | 0 | return _contentLength; |
131 | |
} |
132 | |
|
133 | |
public String getActualDocumentURI() { |
134 | 0 | return actualDocumentURI; |
135 | |
} |
136 | |
|
137 | |
public String getContentType() { |
138 | 0 | return contentType; |
139 | |
} |
140 | |
|
141 | |
protected int getConnectionTimeout() { |
142 | 0 | return configuration.getDefaultTimeout(); |
143 | |
} |
144 | |
|
145 | |
protected int getSoTimeout() { |
146 | 0 | return configuration.getDefaultTimeout(); |
147 | |
} |
148 | |
|
149 | |
private void ensureClientInitialized() { |
150 | 0 | if(configuration == null) throw new IllegalStateException("client must be initialized first."); |
151 | 0 | if (client != null) return; |
152 | 0 | client = new HttpClient(manager); |
153 | 0 | HttpConnectionManager connectionManager = client.getHttpConnectionManager(); |
154 | 0 | HttpConnectionManagerParams params = connectionManager.getParams(); |
155 | 0 | params.setConnectionTimeout(configuration.getDefaultTimeout()); |
156 | 0 | params.setSoTimeout(configuration.getDefaultTimeout()); |
157 | 0 | params.setMaxTotalConnections(configuration.getMaxConnections()); |
158 | |
|
159 | 0 | HostConfiguration hostConf = client.getHostConfiguration(); |
160 | 0 | List<Header> headers = new ArrayList<Header>(); |
161 | 0 | headers.add(new Header("User-Agent", configuration.getUserAgent())); |
162 | 0 | if (configuration.getAcceptHeader() != null) { |
163 | 0 | headers.add(new Header("Accept", configuration.getAcceptHeader())); |
164 | |
} |
165 | 0 | headers.add(new Header("Accept-Language", "en-us,en-gb,en,*;q=0.3")); |
166 | 0 | headers.add(new Header("Accept-Charset", "utf-8,iso-8859-1;q=0.7,*;q=0.5")); |
167 | |
|
168 | 0 | hostConf.getParams().setParameter("http.default-headers", headers); |
169 | 0 | } |
170 | |
|
171 | |
} |