View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  
28  package org.apache.hc.core5.http.io.entity;
29  
30  import java.io.IOException;
31  import java.io.InputStream;
32  import java.io.InputStreamReader;
33  import java.io.Reader;
34  import java.io.UnsupportedEncodingException;
35  import java.nio.charset.Charset;
36  import java.nio.charset.StandardCharsets;
37  import java.nio.charset.UnsupportedCharsetException;
38  import java.util.Collections;
39  import java.util.HashMap;
40  import java.util.List;
41  import java.util.Map;
42  
43  import org.apache.hc.core5.http.ContentType;
44  import org.apache.hc.core5.http.HttpEntity;
45  import org.apache.hc.core5.http.NameValuePair;
46  import org.apache.hc.core5.http.ParseException;
47  import org.apache.hc.core5.io.Closer;
48  import org.apache.hc.core5.net.WWWFormCodec;
49  import org.apache.hc.core5.util.Args;
50  import org.apache.hc.core5.util.ByteArrayBuffer;
51  import org.apache.hc.core5.util.CharArrayBuffer;
52  
53  /**
54   * Support methods for {@link HttpEntity}.
55   *
56   * @since 4.0
57   */
58  public final class EntityUtils {
59  
60      // TODO Consider using a sane value, but what is sane? 1 GB? 100 MB? 10 MB?
61      private static final int DEFAULT_ENTITY_RETURN_MAX_LENGTH = Integer.MAX_VALUE;
62      private static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
63      private static final int DEFAULT_CHAR_BUFFER_SIZE = 1024;
64      private static final int DEFAULT_BYTE_BUFFER_SIZE = 4096;
65  
66      private EntityUtils() {
67          // NoOp
68      }
69  
70      /**
71       * Ensures that the entity content is fully consumed and the content stream, if exists,
72       * is closed. The process is done, <i>quietly</i> , without throwing any IOException.
73       *
74       * @param entity the entity to consume.
75       *
76       * @since 4.2
77       */
78      public static void consumeQuietly(final HttpEntity entity) {
79          try {
80            consume(entity);
81          } catch (final IOException ignore) {
82              // Ignore exception
83          }
84      }
85  
86      /**
87       * Ensures that the entity content is fully consumed and the content stream, if exists,
88       * is closed.
89       *
90       * @param entity the entity to consume.
91       * @throws IOException if an error occurs reading the input stream
92       *
93       * @since 4.1
94       */
95      public static void consume(final HttpEntity entity) throws IOException {
96          if (entity == null) {
97              return;
98          }
99          if (entity.isStreaming()) {
100             Closer.close(entity.getContent());
101         }
102     }
103 
104     /**
105      * Gets a usable content length value for the given candidate.
106      *
107      * @param contentLength an integer.
108      * @return The given content length or {@value #DEFAULT_BYTE_BUFFER_SIZE} if it is &lt 0.
109      */
110     private static int toContentLength(final int contentLength) {
111         return contentLength < 0 ? DEFAULT_BYTE_BUFFER_SIZE : contentLength;
112     }
113 
114     /**
115      * Reads the contents of an entity and return it as a byte array.
116      *
117      * @param entity the entity to read from=
118      * @return byte array containing the entity content. May be null if
119      *   {@link HttpEntity#getContent()} is null.
120      * @throws IOException if an error occurs reading the input stream
121      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
122      */
123     public static byte[] toByteArray(final HttpEntity entity) throws IOException {
124         Args.notNull(entity, "HttpEntity");
125         final int contentLength = toContentLength((int) Args.checkContentLength(entity));
126         try (final InputStream inStream = entity.getContent()) {
127             if (inStream == null) {
128                 return null;
129             }
130             final ByteArrayBuffereArrayBuffer.html#ByteArrayBuffer">ByteArrayBuffer buffer = new ByteArrayBuffer(contentLength);
131             final byte[] tmp = new byte[DEFAULT_BYTE_BUFFER_SIZE];
132             int l;
133             while ((l = inStream.read(tmp)) != -1) {
134                 buffer.append(tmp, 0, l);
135             }
136             return buffer.toByteArray();
137         }
138     }
139 
140     /**
141      * Reads the contents of an entity and return it as a byte array.
142      *
143      * @param entity the entity to read from=
144      * @return byte array containing the entity content. May be null if
145      *   {@link HttpEntity#getContent()} is null.
146      * @param maxResultLength
147      *            The maximum size of the String to return; use it to guard against unreasonable or malicious processing.
148      * @throws IOException if an error occurs reading the input stream
149      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
150      */
151     public static byte[] toByteArray(final HttpEntity entity, final int maxResultLength) throws IOException {
152         Args.notNull(entity, "HttpEntity");
153         final int contentLength = toContentLength((int) Args.checkContentLength(entity));
154         try (final InputStream inStream = entity.getContent()) {
155             if (inStream == null) {
156                 return null;
157             }
158             final ByteArrayBuffereArrayBuffer.html#ByteArrayBuffer">ByteArrayBuffer buffer = new ByteArrayBuffer(Math.min(maxResultLength, contentLength));
159             final byte[] tmp = new byte[DEFAULT_BYTE_BUFFER_SIZE];
160             int l;
161             while ((l = inStream.read(tmp, 0, Math.min(DEFAULT_BYTE_BUFFER_SIZE, buffer.capacity() - buffer.length()))) > 0) {
162                 buffer.append(tmp, 0, l);
163             }
164             return buffer.toByteArray();
165         }
166     }
167 
168     private static CharArrayBuffer toCharArrayBuffer(final InputStream inStream, final int contentLength,
169             final Charset charset, final int maxResultLength) throws IOException {
170         Args.notNull(inStream, "InputStream");
171         Args.positive(maxResultLength, "maxResultLength");
172         final Charset actualCharset = charset == null ? DEFAULT_CHARSET : charset;
173         final CharArrayBufferCharArrayBuffer.html#CharArrayBuffer">CharArrayBuffer buf = new CharArrayBuffer(
174                 Math.min(maxResultLength, contentLength > 0 ? contentLength : DEFAULT_CHAR_BUFFER_SIZE));
175         final Reader reader = new InputStreamReader(inStream, actualCharset);
176         final char[] tmp = new char[DEFAULT_CHAR_BUFFER_SIZE];
177         int chReadCount;
178         while ((chReadCount = reader.read(tmp)) != -1 && buf.length() < maxResultLength) {
179             buf.append(tmp, 0, chReadCount);
180         }
181         buf.setLength(Math.min(buf.length(), maxResultLength));
182         return buf;
183     }
184 
185     private static final Map<String, ContentType> CONTENT_TYPE_MAP;
186     static {
187         final ContentType[] contentTypes = {
188                 ContentType.APPLICATION_ATOM_XML,
189                 ContentType.APPLICATION_FORM_URLENCODED,
190                 ContentType.APPLICATION_JSON,
191                 ContentType.APPLICATION_SVG_XML,
192                 ContentType.APPLICATION_XHTML_XML,
193                 ContentType.APPLICATION_XML,
194                 ContentType.MULTIPART_FORM_DATA,
195                 ContentType.TEXT_HTML,
196                 ContentType.TEXT_PLAIN,
197                 ContentType.TEXT_XML };
198         final HashMap<String, ContentType> map = new HashMap<>();
199         for (final ContentType contentType: contentTypes) {
200             map.put(contentType.getMimeType(), contentType);
201         }
202         CONTENT_TYPE_MAP = Collections.unmodifiableMap(map);
203     }
204 
205     private static String toString(final HttpEntity entity, final ContentType contentType, final int maxResultLength)
206             throws IOException {
207         Args.notNull(entity, "HttpEntity");
208         final int contentLength = toContentLength((int) Args.checkContentLength(entity));
209         try (final InputStream inStream = entity.getContent()) {
210             if (inStream == null) {
211                 return null;
212             }
213             Charset charset = null;
214             if (contentType != null) {
215                 charset = contentType.getCharset();
216                 if (charset == null) {
217                     final ContentType defaultContentType = CONTENT_TYPE_MAP.get(contentType.getMimeType());
218                     charset = defaultContentType != null ? defaultContentType.getCharset() : null;
219                 }
220             }
221             return toCharArrayBuffer(inStream, contentLength, charset, maxResultLength).toString();
222         }
223     }
224 
225     /**
226      * Gets the entity content as a String, using the provided default character set
227      * if none is found in the entity.
228      * If defaultCharset is null, the default "ISO-8859-1" is used.
229      *
230      * @param entity must not be null
231      * @param defaultCharset character set to be applied if none found in the entity,
232      * or if the entity provided charset is invalid or not available.
233      * @return the entity content as a String. May be null if
234      *   {@link HttpEntity#getContent()} is null.
235      * @throws ParseException if header elements cannot be parsed
236      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
237      * @throws IOException if an error occurs reading the input stream
238      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named entity's charset is not available in
239      * this instance of the Java virtual machine and no defaultCharset is provided.
240      */
241     public static String toString(
242             final HttpEntity entity, final Charset defaultCharset) throws IOException, ParseException {
243         return toString(entity, defaultCharset, DEFAULT_ENTITY_RETURN_MAX_LENGTH);
244     }
245 
246     /**
247      * Gets the entity content as a String, using the provided default character set
248      * if none is found in the entity.
249      * If defaultCharset is null, the default "ISO-8859-1" is used.
250      *
251      * @param entity must not be null
252      * @param defaultCharset character set to be applied if none found in the entity,
253      * or if the entity provided charset is invalid or not available.
254      * @param maxResultLength
255      *            The maximum size of the String to return; use it to guard against unreasonable or malicious processing.
256      * @return the entity content as a String. May be null if
257      *   {@link HttpEntity#getContent()} is null.
258      * @throws ParseException if header elements cannot be parsed
259      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
260      * @throws IOException if an error occurs reading the input stream
261      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named entity's charset is not available in
262      * this instance of the Java virtual machine and no defaultCharset is provided.
263      */
264     public static String toString(
265             final HttpEntity entity, final Charset defaultCharset, final int maxResultLength) throws IOException, ParseException {
266         Args.notNull(entity, "HttpEntity");
267         ContentType contentType = null;
268         try {
269             contentType = ContentType.parse(entity.getContentType());
270         } catch (final UnsupportedCharsetException ex) {
271             if (defaultCharset == null) {
272                 throw new UnsupportedEncodingException(ex.getMessage());
273             }
274         }
275         if (contentType != null) {
276             if (contentType.getCharset() == null) {
277                 contentType = contentType.withCharset(defaultCharset);
278             }
279         } else {
280             contentType = ContentType.DEFAULT_TEXT.withCharset(defaultCharset);
281         }
282         return toString(entity, contentType, maxResultLength);
283     }
284 
285     /**
286      * Gets the entity content as a String, using the provided default character set
287      * if none is found in the entity.
288      * If defaultCharset is null, the default "ISO-8859-1" is used.
289      *
290      * @param entity must not be null
291      * @param defaultCharset character set to be applied if none found in the entity
292      * @return the entity content as a String. May be null if
293      *   {@link HttpEntity#getContent()} is null.
294      * @throws ParseException if header elements cannot be parsed
295      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
296      * @throws IOException if an error occurs reading the input stream
297      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named charset is not available in
298      * this instance of the Java virtual machine
299      */
300     public static String toString(
301             final HttpEntity entity, final String defaultCharset) throws IOException, ParseException {
302         return toString(entity, defaultCharset, DEFAULT_ENTITY_RETURN_MAX_LENGTH);
303     }
304 
305     /**
306      * Gets the entity content as a String, using the provided default character set
307      * if none is found in the entity.
308      * If defaultCharset is null, the default "ISO-8859-1" is used.
309      *
310      * @param entity must not be null
311      * @param defaultCharset character set to be applied if none found in the entity
312      * @param maxResultLength
313      *            The maximum size of the String to return; use it to guard against unreasonable or malicious processing.
314      * @return the entity content as a String. May be null if
315      *   {@link HttpEntity#getContent()} is null.
316      * @throws ParseException if header elements cannot be parsed
317      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
318      * @throws IOException if an error occurs reading the input stream
319      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named charset is not available in
320      * this instance of the Java virtual machine
321      */
322     public static String toString(
323             final HttpEntity entity, final String defaultCharset, final int maxResultLength) throws IOException, ParseException {
324         return toString(entity, defaultCharset != null ? Charset.forName(defaultCharset) : null, maxResultLength);
325     }
326 
327     /**
328      * Reads the contents of an entity and return it as a String.
329      * The content is converted using the character set from the entity (if any),
330      * failing that, "ISO-8859-1" is used.
331      *
332      * @param entity the entity to convert to a string; must not be null
333      * @return String containing the content.
334      * @throws ParseException if header elements cannot be parsed
335      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
336      * @throws IOException if an error occurs reading the input stream
337      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named charset is not available in
338      * this instance of the Java virtual machine
339      */
340     public static String toString(final HttpEntity entity) throws IOException, ParseException {
341         return toString(entity, DEFAULT_ENTITY_RETURN_MAX_LENGTH);
342     }
343 
344     /**
345      * Reads the contents of an entity and return it as a String.
346      * The content is converted using the character set from the entity (if any),
347      * failing that, "ISO-8859-1" is used.
348      *
349      * @param entity the entity to convert to a string; must not be null
350      * @param maxResultLength
351      *            The maximum size of the String to return; use it to guard against unreasonable or malicious processing.
352      * @return String containing the content.
353      * @throws ParseException if header elements cannot be parsed
354      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
355      * @throws IOException if an error occurs reading the input stream
356      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named charset is not available in
357      * this instance of the Java virtual machine
358      */
359     public static String toString(final HttpEntity entity, final int maxResultLength) throws IOException, ParseException {
360         Args.notNull(entity, "HttpEntity");
361         return toString(entity, ContentType.parse(entity.getContentType()), maxResultLength);
362     }
363 
364     /**
365      * Returns a list of {@link NameValuePair NameValuePairs} as parsed from an {@link HttpEntity}.
366      * The encoding is taken from the entity's Content-Encoding header.
367      * <p>
368      * This is typically used while parsing an HTTP POST.
369      * </p>
370      *
371      * @param entity
372      *            The entity to parse
373      * @return a list of {@link NameValuePair} as built from the URI's query portion.
374      * @throws IOException
375      *             If there was an exception getting the entity's data.
376      */
377     public static List<NameValuePair> parse(final HttpEntity entity) throws IOException {
378         return parse(entity, DEFAULT_ENTITY_RETURN_MAX_LENGTH);
379     }
380 
381     /**
382      * Returns a list of {@link NameValuePair NameValuePairs} as parsed from an {@link HttpEntity}.
383      * The encoding is taken from the entity's Content-Encoding header.
384      * <p>
385      * This is typically used while parsing an HTTP POST.
386      * </p>
387      *
388      * @param entity
389      *            The entity to parse
390      * @param maxStreamLength
391      *            The maximum size of the stream to read; use it to guard against unreasonable or malicious processing.
392      * @return a list of {@link NameValuePair} as built from the URI's query portion.
393      * @throws IOException
394      *             If there was an exception getting the entity's data.
395      */
396     public static List<NameValuePair> parse(final HttpEntity entity, final int maxStreamLength) throws IOException {
397         Args.notNull(entity, "HttpEntity");
398         final int contentLength = toContentLength((int) Args.checkContentLength(entity));
399         final ContentType contentType = ContentType.parse(entity.getContentType());
400         if (!ContentType.APPLICATION_FORM_URLENCODED.isSameMimeType(contentType)) {
401             return Collections.emptyList();
402         }
403         final Charset charset = contentType.getCharset() != null ? contentType.getCharset()
404                         : DEFAULT_CHARSET;
405         final CharArrayBuffer buf;
406         try (final InputStream inStream = entity.getContent()) {
407             if (inStream == null) {
408                 return Collections.emptyList();
409             }
410             buf = toCharArrayBuffer(inStream, contentLength, charset, maxStreamLength);
411 
412         }
413         if (buf.isEmpty()) {
414             return Collections.emptyList();
415         }
416         return WWWFormCodec.parse(buf, charset);
417     }
418 
419 }