1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.apache.commons.httpclient.util;
31
32 import java.io.UnsupportedEncodingException;
33
34 import org.apache.commons.codec.net.URLCodec;
35 import org.apache.commons.httpclient.HttpClientError;
36 import org.apache.commons.httpclient.NameValuePair;
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39
40 /***
41 * The home for utility methods that handle various encoding tasks.
42 *
43 * @author Michael Becke
44 * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
45 *
46 * @since 2.0 final
47 */
48 public class EncodingUtil {
49
50 /*** Default content encoding chatset */
51 private static final String DEFAULT_CHARSET = "ISO-8859-1";
52
53 /*** Log object for this class. */
54 private static final Log LOG = LogFactory.getLog(EncodingUtil.class);
55
56 /***
57 * Form-urlencoding routine.
58 *
59 * The default encoding for all forms is `application/x-www-form-urlencoded'.
60 * A form data set is represented in this media type as follows:
61 *
62 * The form field names and values are escaped: space characters are replaced
63 * by `+', and then reserved characters are escaped as per [URL]; that is,
64 * non-alphanumeric characters are replaced by `%HH', a percent sign and two
65 * hexadecimal digits representing the ASCII code of the character. Line breaks,
66 * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
67 *
68 * <p>
69 * if the given charset is not supported, ISO-8859-1 is used instead.
70 * </p>
71 *
72 * @param pairs the values to be encoded
73 * @param charset the character set of pairs to be encoded
74 *
75 * @return the urlencoded pairs
76 *
77 * @since 2.0 final
78 */
79 public static String formUrlEncode(NameValuePair[] pairs, String charset) {
80 try {
81 return doFormUrlEncode(pairs, charset);
82 } catch (UnsupportedEncodingException e) {
83 LOG.error("Encoding not supported: " + charset);
84 try {
85 return doFormUrlEncode(pairs, DEFAULT_CHARSET);
86 } catch (UnsupportedEncodingException fatal) {
87
88 throw new HttpClientError("Encoding not supported: " +
89 DEFAULT_CHARSET);
90 }
91 }
92 }
93
94 /***
95 * Form-urlencoding routine.
96 *
97 * The default encoding for all forms is `application/x-www-form-urlencoded'.
98 * A form data set is represented in this media type as follows:
99 *
100 * The form field names and values are escaped: space characters are replaced
101 * by `+', and then reserved characters are escaped as per [URL]; that is,
102 * non-alphanumeric characters are replaced by `%HH', a percent sign and two
103 * hexadecimal digits representing the ASCII code of the character. Line breaks,
104 * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
105 *
106 * @param pairs the values to be encoded
107 * @param charset the character set of pairs to be encoded
108 *
109 * @return the urlencoded pairs
110 * @throws UnsupportedEncodingException if charset is not supported
111 *
112 * @since 2.0 final
113 */
114 private static String doFormUrlEncode(NameValuePair[] pairs, String charset)
115 throws UnsupportedEncodingException
116 {
117 StringBuffer buf = new StringBuffer();
118 for (int i = 0; i < pairs.length; i++) {
119 URLCodec codec = new URLCodec();
120 NameValuePair pair = pairs[i];
121 if (pair.getName() != null) {
122 if (i > 0) {
123 buf.append("&");
124 }
125 buf.append(codec.encode(pair.getName(), charset));
126 buf.append("=");
127 if (pair.getValue() != null) {
128 buf.append(codec.encode(pair.getValue(), charset));
129 }
130 }
131 }
132 return buf.toString();
133 }
134
135 /***
136 * Converts the byte array of HTTP content characters to a string. If
137 * the specified charset is not supported, default system encoding
138 * is used.
139 *
140 * @param data the byte array to be encoded
141 * @param offset the index of the first byte to encode
142 * @param length the number of bytes to encode
143 * @param charset the desired character encoding
144 * @return The result of the conversion.
145 *
146 * @since 3.0
147 */
148 public static String getString(
149 final byte[] data,
150 int offset,
151 int length,
152 String charset
153 ) {
154
155 if (data == null) {
156 throw new IllegalArgumentException("Parameter may not be null");
157 }
158
159 if (charset == null || charset.length() == 0) {
160 throw new IllegalArgumentException("charset may not be null or empty");
161 }
162
163 try {
164 return new String(data, offset, length, charset);
165 } catch (UnsupportedEncodingException e) {
166
167 if (LOG.isWarnEnabled()) {
168 LOG.warn("Unsupported encoding: " + charset + ". System encoding used");
169 }
170 return new String(data, offset, length);
171 }
172 }
173
174
175 /***
176 * Converts the byte array of HTTP content characters to a string. If
177 * the specified charset is not supported, default system encoding
178 * is used.
179 *
180 * @param data the byte array to be encoded
181 * @param charset the desired character encoding
182 * @return The result of the conversion.
183 *
184 * @since 3.0
185 */
186 public static String getString(final byte[] data, String charset) {
187 return getString(data, 0, data.length, charset);
188 }
189
190 /***
191 * Converts the specified string to a byte array. If the charset is not supported the
192 * default system charset is used.
193 *
194 * @param data the string to be encoded
195 * @param charset the desired character encoding
196 * @return The resulting byte array.
197 *
198 * @since 3.0
199 */
200 public static byte[] getBytes(final String data, String charset) {
201
202 if (data == null) {
203 throw new IllegalArgumentException("data may not be null");
204 }
205
206 if (charset == null || charset.length() == 0) {
207 throw new IllegalArgumentException("charset may not be null or empty");
208 }
209
210 try {
211 return data.getBytes(charset);
212 } catch (UnsupportedEncodingException e) {
213
214 if (LOG.isWarnEnabled()) {
215 LOG.warn("Unsupported encoding: " + charset + ". System encoding used.");
216 }
217
218 return data.getBytes();
219 }
220 }
221
222 /***
223 * Converts the specified string to byte array of ASCII characters.
224 *
225 * @param data the string to be encoded
226 * @return The string as a byte array.
227 *
228 * @since 3.0
229 */
230 public static byte[] getAsciiBytes(final String data) {
231
232 if (data == null) {
233 throw new IllegalArgumentException("Parameter may not be null");
234 }
235
236 try {
237 return data.getBytes("US-ASCII");
238 } catch (UnsupportedEncodingException e) {
239 throw new HttpClientError("HttpClient requires ASCII support");
240 }
241 }
242
243 /***
244 * Converts the byte array of ASCII characters to a string. This method is
245 * to be used when decoding content of HTTP elements (such as response
246 * headers)
247 *
248 * @param data the byte array to be encoded
249 * @param offset the index of the first byte to encode
250 * @param length the number of bytes to encode
251 * @return The string representation of the byte array
252 *
253 * @since 3.0
254 */
255 public static String getAsciiString(final byte[] data, int offset, int length) {
256
257 if (data == null) {
258 throw new IllegalArgumentException("Parameter may not be null");
259 }
260
261 try {
262 return new String(data, offset, length, "US-ASCII");
263 } catch (UnsupportedEncodingException e) {
264 throw new HttpClientError("HttpClient requires ASCII support");
265 }
266 }
267
268 /***
269 * Converts the byte array of ASCII characters to a string. This method is
270 * to be used when decoding content of HTTP elements (such as response
271 * headers)
272 *
273 * @param data the byte array to be encoded
274 * @return The string representation of the byte array
275 *
276 * @since 3.0
277 */
278 public static String getAsciiString(final byte[] data) {
279 return getAsciiString(data, 0, data.length);
280 }
281
282 /***
283 * This class should not be instantiated.
284 */
285 private EncodingUtil() {
286 }
287
288 }