1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 package org.apache.commons.httpclient;
32
33 import java.io.ByteArrayOutputStream;
34 import java.io.IOException;
35 import java.io.InputStream;
36
37 import org.apache.commons.httpclient.util.EncodingUtil;
38 import org.apache.commons.httpclient.util.ExceptionUtil;
39 import org.apache.commons.logging.Log;
40 import org.apache.commons.logging.LogFactory;
41
42
43 /***
44 * <p>Transparently coalesces chunks of a HTTP stream that uses
45 * Transfer-Encoding chunked.</p>
46 *
47 * <p>Note that this class NEVER closes the underlying stream, even when close
48 * gets called. Instead, it will read until the "end" of its chunking on close,
49 * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while
50 * not requiring the client to remember to read the entire contents of the
51 * response.</p>
52 *
53 * @author Ortwin Glueck
54 * @author Sean C. Sullivan
55 * @author Martin Elwin
56 * @author Eric Johnson
57 * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
58 * @author Michael Becke
59 * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
60 *
61 * @since 2.0
62 *
63 */
64 public class ChunkedInputStream extends InputStream {
65 /*** The inputstream that we're wrapping */
66 private InputStream in;
67
68 /*** The chunk size */
69 private int chunkSize;
70
71 /*** The current position within the current chunk */
72 private int pos;
73
74 /*** True if we'are at the beginning of stream */
75 private boolean bof = true;
76
77 /*** True if we've reached the end of stream */
78 private boolean eof = false;
79
80 /*** True if this stream is closed */
81 private boolean closed = false;
82
83 /*** The method that this stream came from */
84 private HttpMethod method = null;
85
86 /*** Log object for this class. */
87 private static final Log LOG = LogFactory.getLog(ChunkedInputStream.class);
88
89 /***
90 * ChunkedInputStream constructor that associates the chunked input stream with a
91 * {@link HttpMethod HTTP method}. Usually it should be the same {@link HttpMethod
92 * HTTP method} the chunked input stream originates from. If chunked input stream
93 * contains any footers (trailing headers), they will be added to the associated
94 * {@link HttpMethod HTTP method}.
95 *
96 * @param in the raw input stream
97 * @param method the HTTP method to associate this input stream with. Can be <tt>null</tt>.
98 *
99 * @throws IOException If an IO error occurs
100 */
101 public ChunkedInputStream(
102 final InputStream in, final HttpMethod method) throws IOException {
103
104 if (in == null) {
105 throw new IllegalArgumentException("InputStream parameter may not be null");
106 }
107 this.in = in;
108 this.method = method;
109 this.pos = 0;
110 }
111
112 /***
113 * ChunkedInputStream constructor
114 *
115 * @param in the raw input stream
116 *
117 * @throws IOException If an IO error occurs
118 */
119 public ChunkedInputStream(final InputStream in) throws IOException {
120 this(in, null);
121 }
122
123 /***
124 * <p> Returns all the data in a chunked stream in coalesced form. A chunk
125 * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0
126 * is detected.</p>
127 *
128 * <p> Trailer headers are read automcatically at the end of the stream and
129 * can be obtained with the getResponseFooters() method.</p>
130 *
131 * @return -1 of the end of the stream has been reached or the next data
132 * byte
133 * @throws IOException If an IO problem occurs
134 *
135 * @see HttpMethod#getResponseFooters()
136 */
137 public int read() throws IOException {
138
139 if (closed) {
140 throw new IOException("Attempted read from closed stream.");
141 }
142 if (eof) {
143 return -1;
144 }
145 if (pos >= chunkSize) {
146 nextChunk();
147 if (eof) {
148 return -1;
149 }
150 }
151 pos++;
152 return in.read();
153 }
154
155 /***
156 * Read some bytes from the stream.
157 * @param b The byte array that will hold the contents from the stream.
158 * @param off The offset into the byte array at which bytes will start to be
159 * placed.
160 * @param len the maximum number of bytes that can be returned.
161 * @return The number of bytes returned or -1 if the end of stream has been
162 * reached.
163 * @see java.io.InputStream#read(byte[], int, int)
164 * @throws IOException if an IO problem occurs.
165 */
166 public int read (byte[] b, int off, int len) throws IOException {
167
168 if (closed) {
169 throw new IOException("Attempted read from closed stream.");
170 }
171
172 if (eof) {
173 return -1;
174 }
175 if (pos >= chunkSize) {
176 nextChunk();
177 if (eof) {
178 return -1;
179 }
180 }
181 len = Math.min(len, chunkSize - pos);
182 int count = in.read(b, off, len);
183 pos += count;
184 return count;
185 }
186
187 /***
188 * Read some bytes from the stream.
189 * @param b The byte array that will hold the contents from the stream.
190 * @return The number of bytes returned or -1 if the end of stream has been
191 * reached.
192 * @see java.io.InputStream#read(byte[])
193 * @throws IOException if an IO problem occurs.
194 */
195 public int read (byte[] b) throws IOException {
196 return read(b, 0, b.length);
197 }
198
199 /***
200 * Read the CRLF terminator.
201 * @throws IOException If an IO error occurs.
202 */
203 private void readCRLF() throws IOException {
204 int cr = in.read();
205 int lf = in.read();
206 if ((cr != '\r') || (lf != '\n')) {
207 throw new IOException(
208 "CRLF expected at end of chunk: " + cr + "/" + lf);
209 }
210 }
211
212
213 /***
214 * Read the next chunk.
215 * @throws IOException If an IO error occurs.
216 */
217 private void nextChunk() throws IOException {
218 if (!bof) {
219 readCRLF();
220 }
221 chunkSize = getChunkSizeFromInputStream(in);
222 bof = false;
223 pos = 0;
224 if (chunkSize == 0) {
225 eof = true;
226 parseTrailerHeaders();
227 }
228 }
229
230 /***
231 * Expects the stream to start with a chunksize in hex with optional
232 * comments after a semicolon. The line must end with a CRLF: "a3; some
233 * comment\r\n" Positions the stream at the start of the next line.
234 *
235 * @param in The new input stream.
236 * @param required <tt>true<tt/> if a valid chunk must be present,
237 * <tt>false<tt/> otherwise.
238 *
239 * @return the chunk size as integer
240 *
241 * @throws IOException when the chunk size could not be parsed
242 */
243 private static int getChunkSizeFromInputStream(final InputStream in)
244 throws IOException {
245
246 ByteArrayOutputStream baos = new ByteArrayOutputStream();
247
248 int state = 0;
249 while (state != -1) {
250 int b = in.read();
251 if (b == -1) {
252 throw new IOException("chunked stream ended unexpectedly");
253 }
254 switch (state) {
255 case 0:
256 switch (b) {
257 case '\r':
258 state = 1;
259 break;
260 case '\"':
261 state = 2;
262
263 default:
264 baos.write(b);
265 }
266 break;
267
268 case 1:
269 if (b == '\n') {
270 state = -1;
271 } else {
272
273 throw new IOException("Protocol violation: Unexpected"
274 + " single newline character in chunk size");
275 }
276 break;
277
278 case 2:
279 switch (b) {
280 case '//':
281 b = in.read();
282 baos.write(b);
283 break;
284 case '\"':
285 state = 0;
286
287 default:
288 baos.write(b);
289 }
290 break;
291 default: throw new RuntimeException("assertion failed");
292 }
293 }
294
295
296 String dataString = EncodingUtil.getAsciiString(baos.toByteArray());
297 int separator = dataString.indexOf(';');
298 dataString = (separator > 0)
299 ? dataString.substring(0, separator).trim()
300 : dataString.trim();
301
302 int result;
303 try {
304 result = Integer.parseInt(dataString.trim(), 16);
305 } catch (NumberFormatException e) {
306 throw new IOException ("Bad chunk size: " + dataString);
307 }
308 return result;
309 }
310
311 /***
312 * Reads and stores the Trailer headers.
313 * @throws IOException If an IO problem occurs
314 */
315 private void parseTrailerHeaders() throws IOException {
316 Header[] footers = null;
317 try {
318 String charset = "US-ASCII";
319 if (this.method != null) {
320 charset = this.method.getParams().getHttpElementCharset();
321 }
322 footers = HttpParser.parseHeaders(in, charset);
323 } catch(HttpException e) {
324 LOG.error("Error parsing trailer headers", e);
325 IOException ioe = new IOException(e.getMessage());
326 ExceptionUtil.initCause(ioe, e);
327 throw ioe;
328 }
329 if (this.method != null) {
330 for (int i = 0; i < footers.length; i++) {
331 this.method.addResponseFooter(footers[i]);
332 }
333 }
334 }
335
336 /***
337 * Upon close, this reads the remainder of the chunked message,
338 * leaving the underlying socket at a position to start reading the
339 * next response without scanning.
340 * @throws IOException If an IO problem occurs.
341 */
342 public void close() throws IOException {
343 if (!closed) {
344 try {
345 if (!eof) {
346 exhaustInputStream(this);
347 }
348 } finally {
349 eof = true;
350 closed = true;
351 }
352 }
353 }
354
355 /***
356 * Exhaust an input stream, reading until EOF has been encountered.
357 *
358 * <p>Note that this function is intended as a non-public utility.
359 * This is a little weird, but it seemed silly to make a utility
360 * class for this one function, so instead it is just static and
361 * shared that way.</p>
362 *
363 * @param inStream The {@link InputStream} to exhaust.
364 * @throws IOException If an IO problem occurs
365 */
366 static void exhaustInputStream(InputStream inStream) throws IOException {
367
368 byte buffer[] = new byte[1024];
369 while (inStream.read(buffer) >= 0) {
370 ;
371 }
372 }
373 }