View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  
28  package org.apache.hc.core5.http.io.entity;
29  
30  import java.io.IOException;
31  import java.io.InputStream;
32  import java.io.InputStreamReader;
33  import java.io.Reader;
34  import java.io.UnsupportedEncodingException;
35  import java.nio.charset.Charset;
36  import java.nio.charset.StandardCharsets;
37  import java.nio.charset.UnsupportedCharsetException;
38  import java.util.Collections;
39  import java.util.HashMap;
40  import java.util.List;
41  import java.util.Map;
42  
43  import org.apache.hc.core5.http.ContentType;
44  import org.apache.hc.core5.http.EntityDetails;
45  import org.apache.hc.core5.http.HttpEntity;
46  import org.apache.hc.core5.http.NameValuePair;
47  import org.apache.hc.core5.http.ParseException;
48  import org.apache.hc.core5.io.Closer;
49  import org.apache.hc.core5.net.WWWFormCodec;
50  import org.apache.hc.core5.util.Args;
51  import org.apache.hc.core5.util.ByteArrayBuffer;
52  import org.apache.hc.core5.util.CharArrayBuffer;
53  
54  /**
55   * Support methods for {@link HttpEntity}.
56   *
57   * @since 4.0
58   */
59  public final class EntityUtils {
60  
61      // TODO Consider using a sane value, but what is sane? 1 GB? 100 MB? 10 MB?
62      private static final int DEFAULT_ENTITY_RETURN_MAX_LENGTH = Integer.MAX_VALUE;
63      private static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
64      private static final int DEFAULT_CHAR_BUFFER_SIZE = 1024;
65      private static final int DEFAULT_BYTE_BUFFER_SIZE = 4096;
66  
67      private EntityUtils() {
68          // NoOp
69      }
70  
71      /**
72       * Ensures that the entity content is fully consumed and the content stream, if exists,
73       * is closed. The process is done, <i>quietly</i> , without throwing any IOException.
74       *
75       * @param entity the entity to consume.
76       *
77       * @since 4.2
78       */
79      public static void consumeQuietly(final HttpEntity entity) {
80          try {
81            consume(entity);
82          } catch (final IOException ignore) {
83              // Ignore exception
84          }
85      }
86  
87      /**
88       * Ensures that the entity content is fully consumed and the content stream, if exists,
89       * is closed.
90       *
91       * @param entity the entity to consume.
92       * @throws IOException if an error occurs reading the input stream
93       *
94       * @since 4.1
95       */
96      public static void consume(final HttpEntity entity) throws IOException {
97          if (entity == null) {
98              return;
99          }
100         if (entity.isStreaming()) {
101             Closer.close(entity.getContent());
102         }
103     }
104 
105     /**
106      * Gets a usable content length value for the given candidate.
107      *
108      * @param contentLength an integer.
109      * @return The given content length or {@value #DEFAULT_BYTE_BUFFER_SIZE} if it is &lt 0.
110      */
111     private static int toContentLength(final int contentLength) {
112         return contentLength < 0 ? DEFAULT_BYTE_BUFFER_SIZE : contentLength;
113     }
114 
115     static long checkContentLength(final EntityDetails entityDetails) {
116         // -1 is a special value,
117         // 0 is allowed as well,
118         // but never more than Integer.MAX_VALUE.
119         return Args.checkRange(entityDetails.getContentLength(), -1, Integer.MAX_VALUE,
120                 "HTTP entity too large to be buffered in memory)");
121     }
122 
123     /**
124      * Reads the contents of an entity and return it as a byte array.
125      *
126      * @param entity the entity to read from=
127      * @return byte array containing the entity content. May be null if
128      *   {@link HttpEntity#getContent()} is null.
129      * @throws IOException if an error occurs reading the input stream
130      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
131      */
132     public static byte[] toByteArray(final HttpEntity entity) throws IOException {
133         Args.notNull(entity, "HttpEntity");
134         final int contentLength = toContentLength((int) checkContentLength(entity));
135         try (final InputStream inStream = entity.getContent()) {
136             if (inStream == null) {
137                 return null;
138             }
139             final ByteArrayBuffer buffer = new ByteArrayBuffer(contentLength);
140             final byte[] tmp = new byte[DEFAULT_BYTE_BUFFER_SIZE];
141             int l;
142             while ((l = inStream.read(tmp)) != -1) {
143                 buffer.append(tmp, 0, l);
144             }
145             return buffer.toByteArray();
146         }
147     }
148 
149     /**
150      * Reads the contents of an entity and return it as a byte array.
151      *
152      * @param entity the entity to read from=
153      * @return byte array containing the entity content. May be null if
154      *   {@link HttpEntity#getContent()} is null.
155      * @param maxResultLength
156      *            The maximum size of the String to return; use it to guard against unreasonable or malicious processing.
157      * @throws IOException if an error occurs reading the input stream
158      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
159      */
160     public static byte[] toByteArray(final HttpEntity entity, final int maxResultLength) throws IOException {
161         Args.notNull(entity, "HttpEntity");
162         final int contentLength = toContentLength((int) checkContentLength(entity));
163         try (final InputStream inStream = entity.getContent()) {
164             if (inStream == null) {
165                 return null;
166             }
167             final ByteArrayBuffer buffer = new ByteArrayBuffer(Math.min(maxResultLength, contentLength));
168             final byte[] tmp = new byte[DEFAULT_BYTE_BUFFER_SIZE];
169             int l;
170             while ((l = inStream.read(tmp)) != -1 && buffer.length() < maxResultLength) {
171                 buffer.append(tmp, 0, l);
172             }
173             buffer.setLength(Math.min(buffer.length(), maxResultLength));
174             return buffer.toByteArray();
175         }
176     }
177 
178     private static CharArrayBuffer toCharArrayBuffer(final InputStream inStream, final int contentLength,
179             final Charset charset, final int maxResultLength) throws IOException {
180         Args.notNull(inStream, "InputStream");
181         Args.positive(maxResultLength, "maxResultLength");
182         final Charset actualCharset = charset == null ? DEFAULT_CHARSET : charset;
183         final CharArrayBuffer buf = new CharArrayBuffer(
184                 Math.min(maxResultLength, contentLength > 0 ? contentLength : DEFAULT_CHAR_BUFFER_SIZE));
185         final Reader reader = new InputStreamReader(inStream, actualCharset);
186         final char[] tmp = new char[DEFAULT_CHAR_BUFFER_SIZE];
187         int chReadCount;
188         while ((chReadCount = reader.read(tmp)) != -1 && buf.length() < maxResultLength) {
189             buf.append(tmp, 0, chReadCount);
190         }
191         buf.setLength(Math.min(buf.length(), maxResultLength));
192         return buf;
193     }
194 
195     private static final Map<String, ContentType> CONTENT_TYPE_MAP;
196     static {
197         final ContentType[] contentTypes = {
198                 ContentType.APPLICATION_ATOM_XML,
199                 ContentType.APPLICATION_FORM_URLENCODED,
200                 ContentType.APPLICATION_JSON,
201                 ContentType.APPLICATION_SVG_XML,
202                 ContentType.APPLICATION_XHTML_XML,
203                 ContentType.APPLICATION_XML,
204                 ContentType.MULTIPART_FORM_DATA,
205                 ContentType.TEXT_HTML,
206                 ContentType.TEXT_PLAIN,
207                 ContentType.TEXT_XML };
208         final HashMap<String, ContentType> map = new HashMap<>();
209         for (final ContentType contentType: contentTypes) {
210             map.put(contentType.getMimeType(), contentType);
211         }
212         CONTENT_TYPE_MAP = Collections.unmodifiableMap(map);
213     }
214 
215     private static String toString(final HttpEntity entity, final ContentType contentType, final int maxResultLength)
216             throws IOException {
217         Args.notNull(entity, "HttpEntity");
218         final int contentLength = toContentLength((int) checkContentLength(entity));
219         try (final InputStream inStream = entity.getContent()) {
220             if (inStream == null) {
221                 return null;
222             }
223             Charset charset = null;
224             if (contentType != null) {
225                 charset = contentType.getCharset();
226                 if (charset == null) {
227                     final ContentType defaultContentType = CONTENT_TYPE_MAP.get(contentType.getMimeType());
228                     charset = defaultContentType != null ? defaultContentType.getCharset() : null;
229                 }
230             }
231             return toCharArrayBuffer(inStream, contentLength, charset, maxResultLength).toString();
232         }
233     }
234 
235     /**
236      * Gets the entity content as a String, using the provided default character set
237      * if none is found in the entity.
238      * If defaultCharset is null, the default "ISO-8859-1" is used.
239      *
240      * @param entity must not be null
241      * @param defaultCharset character set to be applied if none found in the entity,
242      * or if the entity provided charset is invalid or not available.
243      * @return the entity content as a String. May be null if
244      *   {@link HttpEntity#getContent()} is null.
245      * @throws ParseException if header elements cannot be parsed
246      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
247      * @throws IOException if an error occurs reading the input stream
248      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named entity's charset is not available in
249      * this instance of the Java virtual machine and no defaultCharset is provided.
250      */
251     public static String toString(
252             final HttpEntity entity, final Charset defaultCharset) throws IOException, ParseException {
253         return toString(entity, defaultCharset, DEFAULT_ENTITY_RETURN_MAX_LENGTH);
254     }
255 
256     /**
257      * Gets the entity content as a String, using the provided default character set
258      * if none is found in the entity.
259      * If defaultCharset is null, the default "ISO-8859-1" is used.
260      *
261      * @param entity must not be null
262      * @param defaultCharset character set to be applied if none found in the entity,
263      * or if the entity provided charset is invalid or not available.
264      * @param maxResultLength
265      *            The maximum size of the String to return; use it to guard against unreasonable or malicious processing.
266      * @return the entity content as a String. May be null if
267      *   {@link HttpEntity#getContent()} is null.
268      * @throws ParseException if header elements cannot be parsed
269      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
270      * @throws IOException if an error occurs reading the input stream
271      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named entity's charset is not available in
272      * this instance of the Java virtual machine and no defaultCharset is provided.
273      */
274     public static String toString(
275             final HttpEntity entity, final Charset defaultCharset, final int maxResultLength) throws IOException, ParseException {
276         Args.notNull(entity, "HttpEntity");
277         ContentType contentType = null;
278         try {
279             contentType = ContentType.parse(entity.getContentType());
280         } catch (final UnsupportedCharsetException ex) {
281             if (defaultCharset == null) {
282                 throw new UnsupportedEncodingException(ex.getMessage());
283             }
284         }
285         if (contentType != null) {
286             if (contentType.getCharset() == null) {
287                 contentType = contentType.withCharset(defaultCharset);
288             }
289         } else {
290             contentType = ContentType.DEFAULT_TEXT.withCharset(defaultCharset);
291         }
292         return toString(entity, contentType, maxResultLength);
293     }
294 
295     /**
296      * Gets the entity content as a String, using the provided default character set
297      * if none is found in the entity.
298      * If defaultCharset is null, the default "ISO-8859-1" is used.
299      *
300      * @param entity must not be null
301      * @param defaultCharset character set to be applied if none found in the entity
302      * @return the entity content as a String. May be null if
303      *   {@link HttpEntity#getContent()} is null.
304      * @throws ParseException if header elements cannot be parsed
305      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
306      * @throws IOException if an error occurs reading the input stream
307      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named charset is not available in
308      * this instance of the Java virtual machine
309      */
310     public static String toString(
311             final HttpEntity entity, final String defaultCharset) throws IOException, ParseException {
312         return toString(entity, defaultCharset, DEFAULT_ENTITY_RETURN_MAX_LENGTH);
313     }
314 
315     /**
316      * Gets the entity content as a String, using the provided default character set
317      * if none is found in the entity.
318      * If defaultCharset is null, the default "ISO-8859-1" is used.
319      *
320      * @param entity must not be null
321      * @param defaultCharset character set to be applied if none found in the entity
322      * @param maxResultLength
323      *            The maximum size of the String to return; use it to guard against unreasonable or malicious processing.
324      * @return the entity content as a String. May be null if
325      *   {@link HttpEntity#getContent()} is null.
326      * @throws ParseException if header elements cannot be parsed
327      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
328      * @throws IOException if an error occurs reading the input stream
329      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named charset is not available in
330      * this instance of the Java virtual machine
331      */
332     public static String toString(
333             final HttpEntity entity, final String defaultCharset, final int maxResultLength) throws IOException, ParseException {
334         return toString(entity, defaultCharset != null ? Charset.forName(defaultCharset) : null, maxResultLength);
335     }
336 
337     /**
338      * Reads the contents of an entity and return it as a String.
339      * The content is converted using the character set from the entity (if any),
340      * failing that, "ISO-8859-1" is used.
341      *
342      * @param entity the entity to convert to a string; must not be null
343      * @return String containing the content.
344      * @throws ParseException if header elements cannot be parsed
345      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
346      * @throws IOException if an error occurs reading the input stream
347      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named charset is not available in
348      * this instance of the Java virtual machine
349      */
350     public static String toString(final HttpEntity entity) throws IOException, ParseException {
351         return toString(entity, DEFAULT_ENTITY_RETURN_MAX_LENGTH);
352     }
353 
354     /**
355      * Reads the contents of an entity and return it as a String.
356      * The content is converted using the character set from the entity (if any),
357      * failing that, "ISO-8859-1" is used.
358      *
359      * @param entity the entity to convert to a string; must not be null
360      * @param maxResultLength
361      *            The maximum size of the String to return; use it to guard against unreasonable or malicious processing.
362      * @return String containing the content.
363      * @throws ParseException if header elements cannot be parsed
364      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
365      * @throws IOException if an error occurs reading the input stream
366      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named charset is not available in
367      * this instance of the Java virtual machine
368      */
369     public static String toString(final HttpEntity entity, final int maxResultLength) throws IOException, ParseException {
370         Args.notNull(entity, "HttpEntity");
371         return toString(entity, ContentType.parse(entity.getContentType()), maxResultLength);
372     }
373 
374     /**
375      * Returns a list of {@link NameValuePair NameValuePairs} as parsed from an {@link HttpEntity}.
376      * The encoding is taken from the entity's Content-Encoding header.
377      * <p>
378      * This is typically used while parsing an HTTP POST.
379      * </p>
380      *
381      * @param entity
382      *            The entity to parse
383      * @return a list of {@link NameValuePair} as built from the URI's query portion.
384      * @throws IOException
385      *             If there was an exception getting the entity's data.
386      */
387     public static List<NameValuePair> parse(final HttpEntity entity) throws IOException {
388         return parse(entity, DEFAULT_ENTITY_RETURN_MAX_LENGTH);
389     }
390 
391     /**
392      * Returns a list of {@link NameValuePair NameValuePairs} as parsed from an {@link HttpEntity}.
393      * The encoding is taken from the entity's Content-Encoding header.
394      * <p>
395      * This is typically used while parsing an HTTP POST.
396      * </p>
397      *
398      * @param entity
399      *            The entity to parse
400      * @param maxStreamLength
401      *            The maximum size of the stream to read; use it to guard against unreasonable or malicious processing.
402      * @return a list of {@link NameValuePair} as built from the URI's query portion.
403      * @throws IOException
404      *             If there was an exception getting the entity's data.
405      */
406     public static List<NameValuePair> parse(final HttpEntity entity, final int maxStreamLength) throws IOException {
407         Args.notNull(entity, "HttpEntity");
408         final int contentLength = toContentLength((int) checkContentLength(entity));
409         final ContentType contentType = ContentType.parse(entity.getContentType());
410         if (!ContentType.APPLICATION_FORM_URLENCODED.isSameMimeType(contentType)) {
411             return Collections.emptyList();
412         }
413         final Charset charset = contentType.getCharset(DEFAULT_CHARSET);
414         final CharArrayBuffer buf;
415         try (final InputStream inStream = entity.getContent()) {
416             if (inStream == null) {
417                 return Collections.emptyList();
418             }
419             buf = toCharArrayBuffer(inStream, contentLength, charset, maxStreamLength);
420 
421         }
422         if (buf.isEmpty()) {
423             return Collections.emptyList();
424         }
425         return WWWFormCodec.parse(buf, charset);
426     }
427 
428 }