Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
QuotedPrintableCodec |
|
| 2.888888888888889;2.889 |
1 | /* | |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.commons.codec.net; | |
19 | ||
20 | import java.io.ByteArrayOutputStream; | |
21 | import java.io.UnsupportedEncodingException; | |
22 | import java.nio.charset.Charset; | |
23 | import java.util.BitSet; | |
24 | ||
25 | import org.apache.commons.codec.BinaryDecoder; | |
26 | import org.apache.commons.codec.BinaryEncoder; | |
27 | import org.apache.commons.codec.Charsets; | |
28 | import org.apache.commons.codec.DecoderException; | |
29 | import org.apache.commons.codec.EncoderException; | |
30 | import org.apache.commons.codec.StringDecoder; | |
31 | import org.apache.commons.codec.StringEncoder; | |
32 | import org.apache.commons.codec.binary.StringUtils; | |
33 | ||
34 | /** | |
35 | * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>. | |
36 | * <p> | |
37 | * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to | |
38 | * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are | |
39 | * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the | |
40 | * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable | |
41 | * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping | |
42 | * gateway. | |
43 | * <p> | |
44 | * Note: | |
45 | * <p> | |
46 | * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec | |
47 | * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec | |
48 | * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy | |
49 | * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec. | |
50 | * <p> | |
51 | * This class is immutable and thread-safe. | |
52 | * | |
53 | * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One: | |
54 | * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a> | |
55 | * | |
56 | * @since 1.3 | |
57 | * @version $Id$ | |
58 | */ | |
59 | public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { | |
60 | /** | |
61 | * The default charset used for string decoding and encoding. | |
62 | */ | |
63 | private final Charset charset; | |
64 | ||
65 | /** | |
66 | * BitSet of printable characters as defined in RFC 1521. | |
67 | */ | |
68 | 1 | private static final BitSet PRINTABLE_CHARS = new BitSet(256); |
69 | ||
70 | private static final byte ESCAPE_CHAR = '='; | |
71 | ||
72 | private static final byte TAB = 9; | |
73 | ||
74 | private static final byte SPACE = 32; | |
75 | // Static initializer for printable chars collection | |
76 | static { | |
77 | // alpha characters | |
78 | 29 | for (int i = 33; i <= 60; i++) { |
79 | 28 | PRINTABLE_CHARS.set(i); |
80 | } | |
81 | 66 | for (int i = 62; i <= 126; i++) { |
82 | 65 | PRINTABLE_CHARS.set(i); |
83 | } | |
84 | 1 | PRINTABLE_CHARS.set(TAB); |
85 | 1 | PRINTABLE_CHARS.set(SPACE); |
86 | 1 | } |
87 | ||
88 | /** | |
89 | * Default constructor. | |
90 | */ | |
91 | public QuotedPrintableCodec() { | |
92 | 12 | this(Charsets.UTF_8); |
93 | 12 | } |
94 | ||
95 | /** | |
96 | * Constructor which allows for the selection of a default charset. | |
97 | * | |
98 | * @param charset | |
99 | * the default string charset to use. | |
100 | * @throws UnsupportedCharsetException | |
101 | * If the named charset is unavailable | |
102 | * @since 1.7 | |
103 | */ | |
104 | 13 | public QuotedPrintableCodec(Charset charset) { |
105 | 13 | this.charset = charset; |
106 | 13 | } |
107 | ||
108 | /** | |
109 | * Constructor which allows for the selection of a default charset. | |
110 | * | |
111 | * @param charsetName | |
112 | * the default string charset to use. | |
113 | * @throws java.nio.charset.UnsupportedCharsetException | |
114 | * If the named charset is unavailable | |
115 | * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable | |
116 | */ | |
117 | public QuotedPrintableCodec(String charsetName) { | |
118 | 2 | this(Charset.forName(charsetName)); |
119 | 1 | } |
120 | ||
121 | /** | |
122 | * Encodes byte into its quoted-printable representation. | |
123 | * | |
124 | * @param b | |
125 | * byte to encode | |
126 | * @param buffer | |
127 | * the buffer to write to | |
128 | */ | |
129 | private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) { | |
130 | 170 | buffer.write(ESCAPE_CHAR); |
131 | 170 | char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); |
132 | 170 | char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); |
133 | 170 | buffer.write(hex1); |
134 | 170 | buffer.write(hex2); |
135 | 170 | } |
136 | ||
137 | /** | |
138 | * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. | |
139 | * <p> | |
140 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
141 | * RFC 1521 and is suitable for encoding binary data and unformatted text. | |
142 | * | |
143 | * @param printable | |
144 | * bitset of characters deemed quoted-printable | |
145 | * @param bytes | |
146 | * array of bytes to be encoded | |
147 | * @return array of bytes containing quoted-printable data | |
148 | */ | |
149 | public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) { | |
150 | 23 | if (bytes == null) { |
151 | 1 | return null; |
152 | } | |
153 | 22 | if (printable == null) { |
154 | 1 | printable = PRINTABLE_CHARS; |
155 | } | |
156 | 22 | ByteArrayOutputStream buffer = new ByteArrayOutputStream(); |
157 | 388 | for (byte c : bytes) { |
158 | 366 | int b = c; |
159 | 366 | if (b < 0) { |
160 | 110 | b = 256 + b; |
161 | } | |
162 | 366 | if (printable.get(b)) { |
163 | 196 | buffer.write(b); |
164 | } else { | |
165 | 170 | encodeQuotedPrintable(b, buffer); |
166 | } | |
167 | } | |
168 | 22 | return buffer.toByteArray(); |
169 | } | |
170 | ||
171 | /** | |
172 | * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted | |
173 | * back to their original representation. | |
174 | * <p> | |
175 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
176 | * RFC 1521. | |
177 | * | |
178 | * @param bytes | |
179 | * array of quoted-printable characters | |
180 | * @return array of original bytes | |
181 | * @throws DecoderException | |
182 | * Thrown if quoted-printable decoding is unsuccessful | |
183 | */ | |
184 | public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException { | |
185 | 19 | if (bytes == null) { |
186 | 1 | return null; |
187 | } | |
188 | 18 | ByteArrayOutputStream buffer = new ByteArrayOutputStream(); |
189 | 236 | for (int i = 0; i < bytes.length; i++) { |
190 | 221 | int b = bytes[i]; |
191 | 221 | if (b == ESCAPE_CHAR) { |
192 | try { | |
193 | 77 | int u = Utils.digit16(bytes[++i]); |
194 | 75 | int l = Utils.digit16(bytes[++i]); |
195 | 74 | buffer.write((char) ((u << 4) + l)); |
196 | 2 | } catch (ArrayIndexOutOfBoundsException e) { |
197 | 2 | throw new DecoderException("Invalid quoted-printable encoding", e); |
198 | 74 | } |
199 | } else { | |
200 | 144 | buffer.write(b); |
201 | } | |
202 | } | |
203 | 15 | return buffer.toByteArray(); |
204 | } | |
205 | ||
206 | /** | |
207 | * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. | |
208 | * <p> | |
209 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
210 | * RFC 1521 and is suitable for encoding binary data and unformatted text. | |
211 | * | |
212 | * @param bytes | |
213 | * array of bytes to be encoded | |
214 | * @return array of bytes containing quoted-printable data | |
215 | */ | |
216 | @Override | |
217 | public byte[] encode(byte[] bytes) { | |
218 | 13 | return encodeQuotedPrintable(PRINTABLE_CHARS, bytes); |
219 | } | |
220 | ||
221 | /** | |
222 | * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted | |
223 | * back to their original representation. | |
224 | * <p> | |
225 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
226 | * RFC 1521. | |
227 | * | |
228 | * @param bytes | |
229 | * array of quoted-printable characters | |
230 | * @return array of original bytes | |
231 | * @throws DecoderException | |
232 | * Thrown if quoted-printable decoding is unsuccessful | |
233 | */ | |
234 | @Override | |
235 | public byte[] decode(byte[] bytes) throws DecoderException { | |
236 | 11 | return decodeQuotedPrintable(bytes); |
237 | } | |
238 | ||
239 | /** | |
240 | * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped. | |
241 | * <p> | |
242 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
243 | * RFC 1521 and is suitable for encoding binary data. | |
244 | * | |
245 | * @param str | |
246 | * string to convert to quoted-printable form | |
247 | * @return quoted-printable string | |
248 | * @throws EncoderException | |
249 | * Thrown if quoted-printable encoding is unsuccessful | |
250 | * | |
251 | * @see #getCharset() | |
252 | */ | |
253 | @Override | |
254 | public String encode(String str) throws EncoderException { | |
255 | 7 | return this.encode(str, getCharset()); |
256 | } | |
257 | ||
258 | /** | |
259 | * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters | |
260 | * are converted back to their original representation. | |
261 | * | |
262 | * @param str | |
263 | * quoted-printable string to convert into its original form | |
264 | * @param charset | |
265 | * the original string charset | |
266 | * @return original string | |
267 | * @throws DecoderException | |
268 | * Thrown if quoted-printable decoding is unsuccessful | |
269 | * @since 1.7 | |
270 | */ | |
271 | public String decode(String str, Charset charset) throws DecoderException { | |
272 | 9 | if (str == null) { |
273 | 1 | return null; |
274 | } | |
275 | 8 | return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset); |
276 | } | |
277 | ||
278 | /** | |
279 | * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters | |
280 | * are converted back to their original representation. | |
281 | * | |
282 | * @param str | |
283 | * quoted-printable string to convert into its original form | |
284 | * @param charset | |
285 | * the original string charset | |
286 | * @return original string | |
287 | * @throws DecoderException | |
288 | * Thrown if quoted-printable decoding is unsuccessful | |
289 | * @throws UnsupportedEncodingException | |
290 | * Thrown if charset is not supported | |
291 | */ | |
292 | public String decode(String str, String charset) throws DecoderException, UnsupportedEncodingException { | |
293 | 3 | if (str == null) { |
294 | 1 | return null; |
295 | } | |
296 | 2 | return new String(decode(StringUtils.getBytesUsAscii(str)), charset); |
297 | } | |
298 | ||
299 | /** | |
300 | * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are | |
301 | * converted back to their original representation. | |
302 | * | |
303 | * @param str | |
304 | * quoted-printable string to convert into its original form | |
305 | * @return original string | |
306 | * @throws DecoderException | |
307 | * Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported. | |
308 | * @see #getCharset() | |
309 | */ | |
310 | @Override | |
311 | public String decode(String str) throws DecoderException { | |
312 | 9 | return this.decode(str, this.getCharset()); |
313 | } | |
314 | ||
315 | /** | |
316 | * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped. | |
317 | * | |
318 | * @param obj | |
319 | * string to convert to a quoted-printable form | |
320 | * @return quoted-printable object | |
321 | * @throws EncoderException | |
322 | * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is | |
323 | * unsuccessful | |
324 | */ | |
325 | @Override | |
326 | public Object encode(Object obj) throws EncoderException { | |
327 | 4 | if (obj == null) { |
328 | 1 | return null; |
329 | 3 | } else if (obj instanceof byte[]) { |
330 | 1 | return encode((byte[]) obj); |
331 | 2 | } else if (obj instanceof String) { |
332 | 1 | return encode((String) obj); |
333 | } else { | |
334 | 1 | throw new EncoderException("Objects of type " + |
335 | obj.getClass().getName() + | |
336 | " cannot be quoted-printable encoded"); | |
337 | } | |
338 | } | |
339 | ||
340 | /** | |
341 | * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original | |
342 | * representation. | |
343 | * | |
344 | * @param obj | |
345 | * quoted-printable object to convert into its original form | |
346 | * @return original object | |
347 | * @throws DecoderException | |
348 | * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure | |
349 | * condition is encountered during the decode process. | |
350 | */ | |
351 | @Override | |
352 | public Object decode(Object obj) throws DecoderException { | |
353 | 4 | if (obj == null) { |
354 | 1 | return null; |
355 | 3 | } else if (obj instanceof byte[]) { |
356 | 1 | return decode((byte[]) obj); |
357 | 2 | } else if (obj instanceof String) { |
358 | 1 | return decode((String) obj); |
359 | } else { | |
360 | 1 | throw new DecoderException("Objects of type " + |
361 | obj.getClass().getName() + | |
362 | " cannot be quoted-printable decoded"); | |
363 | } | |
364 | } | |
365 | ||
366 | /** | |
367 | * Gets the default charset name used for string decoding and encoding. | |
368 | * | |
369 | * @return the default charset name | |
370 | * @since 1.7 | |
371 | */ | |
372 | public Charset getCharset() { | |
373 | 16 | return this.charset; |
374 | } | |
375 | ||
376 | /** | |
377 | * Gets the default charset name used for string decoding and encoding. | |
378 | * | |
379 | * @return the default charset name | |
380 | */ | |
381 | public String getDefaultCharset() { | |
382 | 0 | return this.charset.name(); |
383 | } | |
384 | ||
385 | /** | |
386 | * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. | |
387 | * <p> | |
388 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
389 | * RFC 1521 and is suitable for encoding binary data and unformatted text. | |
390 | * | |
391 | * @param str | |
392 | * string to convert to quoted-printable form | |
393 | * @param charset | |
394 | * the charset for str | |
395 | * @return quoted-printable string | |
396 | * @since 1.7 | |
397 | */ | |
398 | public String encode(String str, Charset charset) { | |
399 | 7 | if (str == null) { |
400 | 1 | return null; |
401 | } | |
402 | 6 | return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset))); |
403 | } | |
404 | ||
405 | /** | |
406 | * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. | |
407 | * <p> | |
408 | * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in | |
409 | * RFC 1521 and is suitable for encoding binary data and unformatted text. | |
410 | * | |
411 | * @param str | |
412 | * string to convert to quoted-printable form | |
413 | * @param charset | |
414 | * the charset for str | |
415 | * @return quoted-printable string | |
416 | * @throws UnsupportedEncodingException | |
417 | * Thrown if the charset is not supported | |
418 | */ | |
419 | public String encode(String str, String charset) throws UnsupportedEncodingException { | |
420 | 6 | if (str == null) { |
421 | 1 | return null; |
422 | } | |
423 | 5 | return StringUtils.newStringUsAscii(encode(str.getBytes(charset))); |
424 | } | |
425 | } |