Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
QuotedPrintableCodec |
|
| 2.888888888888889;2.889 |
1 | /* | |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.commons.codec.net; | |
19 | ||
20 | import java.io.ByteArrayOutputStream; | |
21 | import java.io.UnsupportedEncodingException; | |
22 | import java.nio.charset.Charset; | |
23 | import java.nio.charset.IllegalCharsetNameException; | |
24 | import java.nio.charset.UnsupportedCharsetException; | |
25 | import java.util.BitSet; | |
26 | ||
27 | import org.apache.commons.codec.BinaryDecoder; | |
28 | import org.apache.commons.codec.BinaryEncoder; | |
29 | import org.apache.commons.codec.Charsets; | |
30 | import org.apache.commons.codec.DecoderException; | |
31 | import org.apache.commons.codec.EncoderException; | |
32 | import org.apache.commons.codec.StringDecoder; | |
33 | import org.apache.commons.codec.StringEncoder; | |
34 | import org.apache.commons.codec.binary.StringUtils; | |
35 | ||
36 | /** | |
37 | * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>. | |
38 | * <p> | |
39 | * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to | |
40 | * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are | |
41 | * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the | |
42 | * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable | |
43 | * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping | |
44 | * gateway. | |
45 | * <p> | |
46 | * Note: | |
47 | * <p> | |
48 | * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec | |
49 | * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec | |
50 | * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy | |
51 | * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec. | |
52 | * <p> | |
53 | * This class is immutable and thread-safe. | |
54 | * | |
55 | * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One: | |
56 | * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a> | |
57 | * | |
58 | * @since 1.3 | |
59 | * @version $Id$ | |
60 | */ | |
61 | public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { | |
62 | /** | |
63 | * The default charset used for string decoding and encoding. | |
64 | */ | |
65 | private final Charset charset; | |
66 | ||
67 | /** | |
68 | * BitSet of printable characters as defined in RFC 1521. | |
69 | */ | |
70 | 1 | private static final BitSet PRINTABLE_CHARS = new BitSet(256); |
71 | ||
72 | private static final byte ESCAPE_CHAR = '='; | |
73 | ||
74 | private static final byte TAB = 9; | |
75 | ||
76 | private static final byte SPACE = 32; | |
77 | // Static initializer for printable chars collection | |
78 | static { | |
79 | // alpha characters | |
80 | 29 | for (int i = 33; i <= 60; i++) { |
81 | 28 | PRINTABLE_CHARS.set(i); |
82 | } | |
83 | 66 | for (int i = 62; i <= 126; i++) { |
84 | 65 | PRINTABLE_CHARS.set(i); |
85 | } | |
86 | 1 | PRINTABLE_CHARS.set(TAB); |
87 | 1 | PRINTABLE_CHARS.set(SPACE); |
88 | 1 | } |
89 | ||
90 | /** | |
91 | * Default constructor, assumes default charset of {@link Charsets#UTF_8} | |
92 | */ | |
93 | public QuotedPrintableCodec() { | |
94 | 12 | this(Charsets.UTF_8); |
95 | 12 | } |
96 | ||
97 | /** | |
98 | * Constructor which allows for the selection of a default charset. | |
99 | * | |
100 | * @param charset | |
101 | * the default string charset to use. | |
102 | * @since 1.7 | |
103 | */ | |
104 | 13 | public QuotedPrintableCodec(final Charset charset) { |
105 | 13 | this.charset = charset; |
106 | 13 | } |
107 | ||
108 | /** | |
109 | * Constructor which allows for the selection of a default charset. | |
110 | * | |
111 | * @param charsetName | |
112 | * the default string charset to use. | |
113 | * @throws UnsupportedCharsetException | |
114 | * If no support for the named charset is available | |
115 | * in this instance of the Java virtual machine | |
116 | * @throws IllegalArgumentException | |
117 | * If the given charsetName is null | |
118 | * @throws IllegalCharsetNameException | |
119 | * If the given charset name is illegal | |
120 | * | |
121 | * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable | |
122 | */ | |
123 | public QuotedPrintableCodec(final String charsetName) | |
124 | throws IllegalCharsetNameException, IllegalArgumentException, UnsupportedCharsetException { | |
125 | 2 | this(Charset.forName(charsetName)); |
126 | 1 | } |
127 | ||
128 | /** | |
129 | * Encodes byte into its quoted-printable representation. | |
130 | * | |
131 | * @param b | |
132 | * byte to encode | |
133 | * @param buffer | |
134 | * the buffer to write to | |
135 | */ | |
136 | private static final void encodeQuotedPrintable(final int b, final ByteArrayOutputStream buffer) { | |
137 | 170 | buffer.write(ESCAPE_CHAR); |
138 | 170 | final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); |
139 | 170 | final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); |
140 | 170 | buffer.write(hex1); |
141 | 170 | buffer.write(hex2); |
142 | 170 | } |
143 | ||
144 | /** | |
145 | * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. | |
146 | * <p> | |
147 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
148 | * RFC 1521 and is suitable for encoding binary data and unformatted text. | |
149 | * | |
150 | * @param printable | |
151 | * bitset of characters deemed quoted-printable | |
152 | * @param bytes | |
153 | * array of bytes to be encoded | |
154 | * @return array of bytes containing quoted-printable data | |
155 | */ | |
156 | public static final byte[] encodeQuotedPrintable(BitSet printable, final byte[] bytes) { | |
157 | 23 | if (bytes == null) { |
158 | 1 | return null; |
159 | } | |
160 | 22 | if (printable == null) { |
161 | 1 | printable = PRINTABLE_CHARS; |
162 | } | |
163 | 22 | final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); |
164 | 388 | for (final byte c : bytes) { |
165 | 366 | int b = c; |
166 | 366 | if (b < 0) { |
167 | 110 | b = 256 + b; |
168 | } | |
169 | 366 | if (printable.get(b)) { |
170 | 196 | buffer.write(b); |
171 | } else { | |
172 | 170 | encodeQuotedPrintable(b, buffer); |
173 | } | |
174 | } | |
175 | 22 | return buffer.toByteArray(); |
176 | } | |
177 | ||
178 | /** | |
179 | * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted | |
180 | * back to their original representation. | |
181 | * <p> | |
182 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
183 | * RFC 1521. | |
184 | * | |
185 | * @param bytes | |
186 | * array of quoted-printable characters | |
187 | * @return array of original bytes | |
188 | * @throws DecoderException | |
189 | * Thrown if quoted-printable decoding is unsuccessful | |
190 | */ | |
191 | public static final byte[] decodeQuotedPrintable(final byte[] bytes) throws DecoderException { | |
192 | 19 | if (bytes == null) { |
193 | 1 | return null; |
194 | } | |
195 | 18 | final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); |
196 | 236 | for (int i = 0; i < bytes.length; i++) { |
197 | 221 | final int b = bytes[i]; |
198 | 221 | if (b == ESCAPE_CHAR) { |
199 | try { | |
200 | 77 | final int u = Utils.digit16(bytes[++i]); |
201 | 75 | final int l = Utils.digit16(bytes[++i]); |
202 | 74 | buffer.write((char) ((u << 4) + l)); |
203 | 2 | } catch (final ArrayIndexOutOfBoundsException e) { |
204 | 2 | throw new DecoderException("Invalid quoted-printable encoding", e); |
205 | 74 | } |
206 | } else { | |
207 | 144 | buffer.write(b); |
208 | } | |
209 | } | |
210 | 15 | return buffer.toByteArray(); |
211 | } | |
212 | ||
213 | /** | |
214 | * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. | |
215 | * <p> | |
216 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
217 | * RFC 1521 and is suitable for encoding binary data and unformatted text. | |
218 | * | |
219 | * @param bytes | |
220 | * array of bytes to be encoded | |
221 | * @return array of bytes containing quoted-printable data | |
222 | */ | |
223 | @Override | |
224 | public byte[] encode(final byte[] bytes) { | |
225 | 13 | return encodeQuotedPrintable(PRINTABLE_CHARS, bytes); |
226 | } | |
227 | ||
228 | /** | |
229 | * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted | |
230 | * back to their original representation. | |
231 | * <p> | |
232 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
233 | * RFC 1521. | |
234 | * | |
235 | * @param bytes | |
236 | * array of quoted-printable characters | |
237 | * @return array of original bytes | |
238 | * @throws DecoderException | |
239 | * Thrown if quoted-printable decoding is unsuccessful | |
240 | */ | |
241 | @Override | |
242 | public byte[] decode(final byte[] bytes) throws DecoderException { | |
243 | 11 | return decodeQuotedPrintable(bytes); |
244 | } | |
245 | ||
246 | /** | |
247 | * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped. | |
248 | * <p> | |
249 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
250 | * RFC 1521 and is suitable for encoding binary data. | |
251 | * | |
252 | * @param str | |
253 | * string to convert to quoted-printable form | |
254 | * @return quoted-printable string | |
255 | * @throws EncoderException | |
256 | * Thrown if quoted-printable encoding is unsuccessful | |
257 | * | |
258 | * @see #getCharset() | |
259 | */ | |
260 | @Override | |
261 | public String encode(final String str) throws EncoderException { | |
262 | 7 | return this.encode(str, getCharset()); |
263 | } | |
264 | ||
265 | /** | |
266 | * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters | |
267 | * are converted back to their original representation. | |
268 | * | |
269 | * @param str | |
270 | * quoted-printable string to convert into its original form | |
271 | * @param charset | |
272 | * the original string charset | |
273 | * @return original string | |
274 | * @throws DecoderException | |
275 | * Thrown if quoted-printable decoding is unsuccessful | |
276 | * @since 1.7 | |
277 | */ | |
278 | public String decode(final String str, final Charset charset) throws DecoderException { | |
279 | 9 | if (str == null) { |
280 | 1 | return null; |
281 | } | |
282 | 8 | return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset); |
283 | } | |
284 | ||
285 | /** | |
286 | * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters | |
287 | * are converted back to their original representation. | |
288 | * | |
289 | * @param str | |
290 | * quoted-printable string to convert into its original form | |
291 | * @param charset | |
292 | * the original string charset | |
293 | * @return original string | |
294 | * @throws DecoderException | |
295 | * Thrown if quoted-printable decoding is unsuccessful | |
296 | * @throws UnsupportedEncodingException | |
297 | * Thrown if charset is not supported | |
298 | */ | |
299 | public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException { | |
300 | 3 | if (str == null) { |
301 | 1 | return null; |
302 | } | |
303 | 2 | return new String(decode(StringUtils.getBytesUsAscii(str)), charset); |
304 | } | |
305 | ||
306 | /** | |
307 | * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are | |
308 | * converted back to their original representation. | |
309 | * | |
310 | * @param str | |
311 | * quoted-printable string to convert into its original form | |
312 | * @return original string | |
313 | * @throws DecoderException | |
314 | * Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported. | |
315 | * @see #getCharset() | |
316 | */ | |
317 | @Override | |
318 | public String decode(final String str) throws DecoderException { | |
319 | 9 | return this.decode(str, this.getCharset()); |
320 | } | |
321 | ||
322 | /** | |
323 | * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped. | |
324 | * | |
325 | * @param obj | |
326 | * string to convert to a quoted-printable form | |
327 | * @return quoted-printable object | |
328 | * @throws EncoderException | |
329 | * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is | |
330 | * unsuccessful | |
331 | */ | |
332 | @Override | |
333 | public Object encode(final Object obj) throws EncoderException { | |
334 | 4 | if (obj == null) { |
335 | 1 | return null; |
336 | 3 | } else if (obj instanceof byte[]) { |
337 | 1 | return encode((byte[]) obj); |
338 | 2 | } else if (obj instanceof String) { |
339 | 1 | return encode((String) obj); |
340 | } else { | |
341 | 1 | throw new EncoderException("Objects of type " + |
342 | obj.getClass().getName() + | |
343 | " cannot be quoted-printable encoded"); | |
344 | } | |
345 | } | |
346 | ||
347 | /** | |
348 | * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original | |
349 | * representation. | |
350 | * | |
351 | * @param obj | |
352 | * quoted-printable object to convert into its original form | |
353 | * @return original object | |
354 | * @throws DecoderException | |
355 | * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure | |
356 | * condition is encountered during the decode process. | |
357 | */ | |
358 | @Override | |
359 | public Object decode(final Object obj) throws DecoderException { | |
360 | 4 | if (obj == null) { |
361 | 1 | return null; |
362 | 3 | } else if (obj instanceof byte[]) { |
363 | 1 | return decode((byte[]) obj); |
364 | 2 | } else if (obj instanceof String) { |
365 | 1 | return decode((String) obj); |
366 | } else { | |
367 | 1 | throw new DecoderException("Objects of type " + |
368 | obj.getClass().getName() + | |
369 | " cannot be quoted-printable decoded"); | |
370 | } | |
371 | } | |
372 | ||
373 | /** | |
374 | * Gets the default charset name used for string decoding and encoding. | |
375 | * | |
376 | * @return the default charset name | |
377 | * @since 1.7 | |
378 | */ | |
379 | public Charset getCharset() { | |
380 | 16 | return this.charset; |
381 | } | |
382 | ||
383 | /** | |
384 | * Gets the default charset name used for string decoding and encoding. | |
385 | * | |
386 | * @return the default charset name | |
387 | */ | |
388 | public String getDefaultCharset() { | |
389 | 0 | return this.charset.name(); |
390 | } | |
391 | ||
392 | /** | |
393 | * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. | |
394 | * <p> | |
395 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
396 | * RFC 1521 and is suitable for encoding binary data and unformatted text. | |
397 | * | |
398 | * @param str | |
399 | * string to convert to quoted-printable form | |
400 | * @param charset | |
401 | * the charset for str | |
402 | * @return quoted-printable string | |
403 | * @since 1.7 | |
404 | */ | |
405 | public String encode(final String str, final Charset charset) { | |
406 | 7 | if (str == null) { |
407 | 1 | return null; |
408 | } | |
409 | 6 | return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset))); |
410 | } | |
411 | ||
412 | /** | |
413 | * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. | |
414 | * <p> | |
415 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
416 | * RFC 1521 and is suitable for encoding binary data and unformatted text. | |
417 | * | |
418 | * @param str | |
419 | * string to convert to quoted-printable form | |
420 | * @param charset | |
421 | * the charset for str | |
422 | * @return quoted-printable string | |
423 | * @throws UnsupportedEncodingException | |
424 | * Thrown if the charset is not supported | |
425 | */ | |
426 | public String encode(final String str, final String charset) throws UnsupportedEncodingException { | |
427 | 6 | if (str == null) { |
428 | 1 | return null; |
429 | } | |
430 | 5 | return StringUtils.newStringUsAscii(encode(str.getBytes(charset))); |
431 | } | |
432 | } |