Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
BaseNCodec |
|
| 2.75;2.75 | ||||
BaseNCodec$Context |
|
| 2.75;2.75 |
1 | /* | |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.commons.codec.binary; | |
19 | ||
20 | import java.util.Arrays; | |
21 | ||
22 | import org.apache.commons.codec.BinaryDecoder; | |
23 | import org.apache.commons.codec.BinaryEncoder; | |
24 | import org.apache.commons.codec.DecoderException; | |
25 | import org.apache.commons.codec.EncoderException; | |
26 | ||
27 | /** | |
28 | * Abstract superclass for Base-N encoders and decoders. | |
29 | * | |
30 | * <p> | |
31 | * This class is thread-safe. | |
32 | * </p> | |
33 | * | |
34 | * @version $Id$ | |
35 | */ | |
36 | public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { | |
37 | ||
38 | /** | |
39 | * Holds thread context so classes can be thread-safe. | |
40 | * | |
41 | * This class is not itself thread-safe; each thread must allocate its own copy. | |
42 | * | |
43 | * @since 1.7 | |
44 | */ | |
45 | static class Context { | |
46 | ||
47 | /** | |
48 | * Place holder for the bytes we're dealing with for our based logic. | |
49 | * Bitwise operations store and extract the encoding or decoding from this variable. | |
50 | */ | |
51 | int ibitWorkArea; | |
52 | ||
53 | /** | |
54 | * Place holder for the bytes we're dealing with for our based logic. | |
55 | * Bitwise operations store and extract the encoding or decoding from this variable. | |
56 | */ | |
57 | long lbitWorkArea; | |
58 | ||
59 | /** | |
60 | * Buffer for streaming. | |
61 | */ | |
62 | byte[] buffer; | |
63 | ||
64 | /** | |
65 | * Position where next character should be written in the buffer. | |
66 | */ | |
67 | int pos; | |
68 | ||
69 | /** | |
70 | * Position where next character should be read from the buffer. | |
71 | */ | |
72 | int readPos; | |
73 | ||
74 | /** | |
75 | * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, | |
76 | * and must be thrown away. | |
77 | */ | |
78 | boolean eof; | |
79 | ||
80 | /** | |
81 | * Variable tracks how many characters have been written to the current line. Only used when encoding. We use | |
82 | * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). | |
83 | */ | |
84 | int currentLinePos; | |
85 | ||
86 | /** | |
87 | * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This | |
88 | * variable helps track that. | |
89 | */ | |
90 | int modulus; | |
91 | ||
92 | 31675 | Context() { |
93 | 31675 | } |
94 | ||
95 | /** | |
96 | * Returns a String useful for debugging (especially within a debugger.) | |
97 | * | |
98 | * @return a String useful for debugging. | |
99 | */ | |
100 | @SuppressWarnings("boxing") // OK to ignore boxing here | |
101 | @Override | |
102 | public String toString() { | |
103 | 0 | return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " + |
104 | "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer), | |
105 | currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos); | |
106 | } | |
107 | } | |
108 | ||
109 | /** | |
110 | * EOF | |
111 | * | |
112 | * @since 1.7 | |
113 | */ | |
114 | static final int EOF = -1; | |
115 | ||
116 | /** | |
117 | * MIME chunk size per RFC 2045 section 6.8. | |
118 | * | |
119 | * <p> | |
120 | * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any | |
121 | * equal signs. | |
122 | * </p> | |
123 | * | |
124 | * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> | |
125 | */ | |
126 | public static final int MIME_CHUNK_SIZE = 76; | |
127 | ||
128 | /** | |
129 | * PEM chunk size per RFC 1421 section 4.3.2.4. | |
130 | * | |
131 | * <p> | |
132 | * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any | |
133 | * equal signs. | |
134 | * </p> | |
135 | * | |
136 | * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> | |
137 | */ | |
138 | public static final int PEM_CHUNK_SIZE = 64; | |
139 | ||
140 | private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; | |
141 | ||
142 | /** | |
143 | * Defines the default buffer size - currently {@value} | |
144 | * - must be large enough for at least one encoded block+separator | |
145 | */ | |
146 | private static final int DEFAULT_BUFFER_SIZE = 8192; | |
147 | ||
148 | /** Mask used to extract 8 bits, used in decoding bytes */ | |
149 | protected static final int MASK_8BITS = 0xff; | |
150 | ||
151 | /** | |
152 | * Byte used to pad output. | |
153 | */ | |
154 | protected static final byte PAD_DEFAULT = '='; // Allow static access to default | |
155 | ||
156 | 30720 | protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later |
157 | ||
158 | /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */ | |
159 | private final int unencodedBlockSize; | |
160 | ||
161 | /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */ | |
162 | private final int encodedBlockSize; | |
163 | ||
164 | /** | |
165 | * Chunksize for encoding. Not used when decoding. | |
166 | * A value of zero or less implies no chunking of the encoded data. | |
167 | * Rounded down to nearest multiple of encodedBlockSize. | |
168 | */ | |
169 | protected final int lineLength; | |
170 | ||
171 | /** | |
172 | * Size of chunk separator. Not used unless {@link #lineLength} > 0. | |
173 | */ | |
174 | private final int chunkSeparatorLength; | |
175 | ||
176 | /** | |
177 | * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} | |
178 | * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. | |
179 | * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) | |
180 | * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) | |
181 | * @param lineLength if > 0, use chunking with a length <code>lineLength</code> | |
182 | * @param chunkSeparatorLength the chunk separator length, if relevant | |
183 | */ | |
184 | protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, | |
185 | 30720 | final int lineLength, final int chunkSeparatorLength) { |
186 | 30720 | this.unencodedBlockSize = unencodedBlockSize; |
187 | 30720 | this.encodedBlockSize = encodedBlockSize; |
188 | 30720 | final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0; |
189 | 30720 | this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0; |
190 | 30720 | this.chunkSeparatorLength = chunkSeparatorLength; |
191 | 30720 | } |
192 | ||
193 | /** | |
194 | * Returns true if this object has buffered data for reading. | |
195 | * | |
196 | * @param context the context to be used | |
197 | * @return true if there is data still available for reading. | |
198 | */ | |
199 | boolean hasData(final Context context) { // package protected for access from I/O streams | |
200 | 190801 | return context.buffer != null; |
201 | } | |
202 | ||
203 | /** | |
204 | * Returns the amount of buffered data available for reading. | |
205 | * | |
206 | * @param context the context to be used | |
207 | * @return The amount of buffered data available for reading. | |
208 | */ | |
209 | int available(final Context context) { // package protected for access from I/O streams | |
210 | 720233 | return context.buffer != null ? context.pos - context.readPos : 0; |
211 | } | |
212 | ||
213 | /** | |
214 | * Get the default buffer size. Can be overridden. | |
215 | * | |
216 | * @return {@link #DEFAULT_BUFFER_SIZE} | |
217 | */ | |
218 | protected int getDefaultBufferSize() { | |
219 | 199500 | return DEFAULT_BUFFER_SIZE; |
220 | } | |
221 | ||
222 | /** | |
223 | * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. | |
224 | * @param context the context to be used | |
225 | */ | |
226 | private byte[] resizeBuffer(final Context context) { | |
227 | 199502 | if (context.buffer == null) { |
228 | 199500 | context.buffer = new byte[getDefaultBufferSize()]; |
229 | 199500 | context.pos = 0; |
230 | 199500 | context.readPos = 0; |
231 | } else { | |
232 | 2 | final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; |
233 | 2 | System.arraycopy(context.buffer, 0, b, 0, context.buffer.length); |
234 | 2 | context.buffer = b; |
235 | } | |
236 | 199502 | return context.buffer; |
237 | } | |
238 | ||
239 | /** | |
240 | * Ensure that the buffer has room for <code>size</code> bytes | |
241 | * | |
242 | * @param size minimum spare space required | |
243 | * @param context the context to be used | |
244 | */ | |
245 | protected byte[] ensureBufferSize(final int size, final Context context){ | |
246 | 3001547 | if ((context.buffer == null) || (context.buffer.length < context.pos + size)){ |
247 | 199502 | return resizeBuffer(context); |
248 | } | |
249 | 2802045 | return context.buffer; |
250 | } | |
251 | ||
252 | /** | |
253 | * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail | |
254 | * bytes. Returns how many bytes were actually extracted. | |
255 | * <p> | |
256 | * Package protected for access from I/O streams. | |
257 | * | |
258 | * @param b | |
259 | * byte[] array to extract the buffered data into. | |
260 | * @param bPos | |
261 | * position in byte[] array to start extraction at. | |
262 | * @param bAvail | |
263 | * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). | |
264 | * @param context | |
265 | * the context to be used | |
266 | * @return The number of bytes successfully extracted into the provided byte[] array. | |
267 | */ | |
268 | int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) { | |
269 | 363379 | if (context.buffer != null) { |
270 | 287628 | final int len = Math.min(available(context), bAvail); |
271 | 287628 | System.arraycopy(context.buffer, context.readPos, b, bPos, len); |
272 | 287628 | context.readPos += len; |
273 | 287628 | if (context.readPos >= context.pos) { |
274 | 199409 | context.buffer = null; // so hasData() will return false, and this method can return -1 |
275 | } | |
276 | 287628 | return len; |
277 | } | |
278 | 75751 | return context.eof ? EOF : 0; |
279 | } | |
280 | ||
281 | /** | |
282 | * Checks if a byte value is whitespace or not. | |
283 | * Whitespace is taken to mean: space, tab, CR, LF | |
284 | * @param byteToCheck | |
285 | * the byte to check | |
286 | * @return true if byte is whitespace, false otherwise | |
287 | */ | |
288 | protected static boolean isWhiteSpace(final byte byteToCheck) { | |
289 | 42 | switch (byteToCheck) { |
290 | case ' ' : | |
291 | case '\n' : | |
292 | case '\r' : | |
293 | case '\t' : | |
294 | 32 | return true; |
295 | default : | |
296 | 10 | return false; |
297 | } | |
298 | } | |
299 | ||
300 | /** | |
301 | * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of | |
302 | * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. | |
303 | * | |
304 | * @param obj | |
305 | * Object to encode | |
306 | * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. | |
307 | * @throws EncoderException | |
308 | * if the parameter supplied is not of type byte[] | |
309 | */ | |
310 | @Override | |
311 | public Object encode(final Object obj) throws EncoderException { | |
312 | 99 | if (!(obj instanceof byte[])) { |
313 | 1 | throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); |
314 | } | |
315 | 98 | return encode((byte[]) obj); |
316 | } | |
317 | ||
318 | /** | |
319 | * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. | |
320 | * Uses UTF8 encoding. | |
321 | * | |
322 | * @param pArray | |
323 | * a byte array containing binary data | |
324 | * @return A String containing only Base-N character data | |
325 | */ | |
326 | public String encodeToString(final byte[] pArray) { | |
327 | 4 | return StringUtils.newStringUtf8(encode(pArray)); |
328 | } | |
329 | ||
330 | /** | |
331 | * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. | |
332 | * Uses UTF8 encoding. | |
333 | * | |
334 | * @param pArray a byte array containing binary data | |
335 | * @return String containing only character data in the appropriate alphabet. | |
336 | */ | |
337 | public String encodeAsString(final byte[] pArray){ | |
338 | 21 | return StringUtils.newStringUtf8(encode(pArray)); |
339 | } | |
340 | ||
341 | /** | |
342 | * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of | |
343 | * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. | |
344 | * | |
345 | * @param obj | |
346 | * Object to decode | |
347 | * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String | |
348 | * supplied. | |
349 | * @throws DecoderException | |
350 | * if the parameter supplied is not of type byte[] | |
351 | */ | |
352 | @Override | |
353 | public Object decode(final Object obj) throws DecoderException { | |
354 | 100 | if (obj instanceof byte[]) { |
355 | 98 | return decode((byte[]) obj); |
356 | 2 | } else if (obj instanceof String) { |
357 | 1 | return decode((String) obj); |
358 | } else { | |
359 | 1 | throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); |
360 | } | |
361 | } | |
362 | ||
363 | /** | |
364 | * Decodes a String containing characters in the Base-N alphabet. | |
365 | * | |
366 | * @param pArray | |
367 | * A String containing Base-N character data | |
368 | * @return a byte array containing binary data | |
369 | */ | |
370 | public byte[] decode(final String pArray) { | |
371 | 40 | return decode(StringUtils.getBytesUtf8(pArray)); |
372 | } | |
373 | ||
374 | /** | |
375 | * Decodes a byte[] containing characters in the Base-N alphabet. | |
376 | * | |
377 | * @param pArray | |
378 | * A byte array containing Base-N character data | |
379 | * @return a byte array containing binary data | |
380 | */ | |
381 | @Override | |
382 | public byte[] decode(final byte[] pArray) { | |
383 | 1160 | if (pArray == null || pArray.length == 0) { |
384 | 16 | return pArray; |
385 | } | |
386 | 1144 | final Context context = new Context(); |
387 | 1144 | decode(pArray, 0, pArray.length, context); |
388 | 1144 | decode(pArray, 0, EOF, context); // Notify decoder of EOF. |
389 | 1144 | final byte[] result = new byte[context.pos]; |
390 | 1144 | readResults(result, 0, result.length, context); |
391 | 1144 | return result; |
392 | } | |
393 | ||
394 | /** | |
395 | * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. | |
396 | * | |
397 | * @param pArray | |
398 | * a byte array containing binary data | |
399 | * @return A byte array containing only the basen alphabetic character data | |
400 | */ | |
401 | @Override | |
402 | public byte[] encode(final byte[] pArray) { | |
403 | 2769 | if (pArray == null || pArray.length == 0) { |
404 | 15 | return pArray; |
405 | } | |
406 | 2754 | final Context context = new Context(); |
407 | 2754 | encode(pArray, 0, pArray.length, context); |
408 | 2754 | encode(pArray, 0, EOF, context); // Notify encoder of EOF. |
409 | 2754 | final byte[] buf = new byte[context.pos - context.readPos]; |
410 | 2754 | readResults(buf, 0, buf.length, context); |
411 | 2754 | return buf; |
412 | } | |
413 | ||
414 | // package protected for access from I/O streams | |
415 | abstract void encode(byte[] pArray, int i, int length, Context context); | |
416 | ||
417 | // package protected for access from I/O streams | |
418 | abstract void decode(byte[] pArray, int i, int length, Context context); | |
419 | ||
420 | /** | |
421 | * Returns whether or not the <code>octet</code> is in the current alphabet. | |
422 | * Does not allow whitespace or pad. | |
423 | * | |
424 | * @param value The value to test | |
425 | * | |
426 | * @return {@code true} if the value is defined in the current alphabet, {@code false} otherwise. | |
427 | */ | |
428 | protected abstract boolean isInAlphabet(byte value); | |
429 | ||
430 | /** | |
431 | * Tests a given byte array to see if it contains only valid characters within the alphabet. | |
432 | * The method optionally treats whitespace and pad as valid. | |
433 | * | |
434 | * @param arrayOctet byte array to test | |
435 | * @param allowWSPad if {@code true}, then whitespace and PAD are also allowed | |
436 | * | |
437 | * @return {@code true} if all bytes are valid characters in the alphabet or if the byte array is empty; | |
438 | * {@code false}, otherwise | |
439 | */ | |
440 | public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) { | |
441 | 26 | for (int i = 0; i < arrayOctet.length; i++) { |
442 | 18 | if (!isInAlphabet(arrayOctet[i]) && |
443 | (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) { | |
444 | 2 | return false; |
445 | } | |
446 | } | |
447 | 8 | return true; |
448 | } | |
449 | ||
450 | /** | |
451 | * Tests a given String to see if it contains only valid characters within the alphabet. | |
452 | * The method treats whitespace and PAD as valid. | |
453 | * | |
454 | * @param basen String to test | |
455 | * @return {@code true} if all characters in the String are valid characters in the alphabet or if | |
456 | * the String is empty; {@code false}, otherwise | |
457 | * @see #isInAlphabet(byte[], boolean) | |
458 | */ | |
459 | public boolean isInAlphabet(final String basen) { | |
460 | 2 | return isInAlphabet(StringUtils.getBytesUtf8(basen), true); |
461 | } | |
462 | ||
463 | /** | |
464 | * Tests a given byte array to see if it contains any characters within the alphabet or PAD. | |
465 | * | |
466 | * Intended for use in checking line-ending arrays | |
467 | * | |
468 | * @param arrayOctet | |
469 | * byte array to test | |
470 | * @return {@code true} if any byte is a valid character in the alphabet or PAD; {@code false} otherwise | |
471 | */ | |
472 | protected boolean containsAlphabetOrPad(final byte[] arrayOctet) { | |
473 | 17024 | if (arrayOctet == null) { |
474 | 1 | return false; |
475 | } | |
476 | 44326 | for (final byte element : arrayOctet) { |
477 | 27310 | if (PAD == element || isInAlphabet(element)) { |
478 | 7 | return true; |
479 | } | |
480 | } | |
481 | 17016 | return false; |
482 | } | |
483 | ||
484 | /** | |
485 | * Calculates the amount of space needed to encode the supplied array. | |
486 | * | |
487 | * @param pArray byte[] array which will later be encoded | |
488 | * | |
489 | * @return amount of space needed to encoded the supplied array. | |
490 | * Returns a long since a max-len array will require > Integer.MAX_VALUE | |
491 | */ | |
492 | public long getEncodedLength(final byte[] pArray) { | |
493 | // Calculate non-chunked size - rounded up to allow for padding | |
494 | // cast to long is needed to avoid possibility of overflow | |
495 | 1924 | long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize; |
496 | 1924 | if (lineLength > 0) { // We're using chunking |
497 | // Round up to nearest multiple | |
498 | 291 | len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength; |
499 | } | |
500 | 1924 | return len; |
501 | } | |
502 | } |