Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
BaseNCodec |
|
| 2.75;2.75 | ||||
BaseNCodec$Context |
|
| 2.75;2.75 |
1 | /* | |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.commons.codec.binary; | |
19 | ||
20 | import org.apache.commons.codec.BinaryDecoder; | |
21 | import org.apache.commons.codec.BinaryEncoder; | |
22 | import org.apache.commons.codec.DecoderException; | |
23 | import org.apache.commons.codec.EncoderException; | |
24 | ||
25 | /** | |
26 | * Abstract superclass for Base-N encoders and decoders. | |
27 | * | |
28 | * <p> | |
29 | * This class is thread-safe. | |
30 | * </p> | |
31 | * | |
32 | * @version $Id$ | |
33 | */ | |
34 | public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { | |
35 | ||
36 | /** | |
37 | * Holds thread context so classes can be thread-safe. | |
38 | * | |
39 | * This class is not itself thread-safe; each thread must allocate its own copy. | |
40 | * | |
41 | * @since 1.7 | |
42 | */ | |
43 | static class Context { | |
44 | ||
45 | /** | |
46 | * Place holder for the bytes we're dealing with for our based logic. | |
47 | * Bitwise operations store and extract the encoding or decoding from this variable. | |
48 | */ | |
49 | int ibitWorkArea; | |
50 | ||
51 | /** | |
52 | * Place holder for the bytes we're dealing with for our based logic. | |
53 | * Bitwise operations store and extract the encoding or decoding from this variable. | |
54 | */ | |
55 | long lbitWorkArea; | |
56 | ||
57 | /** | |
58 | * Buffer for streaming. | |
59 | */ | |
60 | byte[] buffer; | |
61 | ||
62 | /** | |
63 | * Position where next character should be written in the buffer. | |
64 | */ | |
65 | int pos; | |
66 | ||
67 | /** | |
68 | * Position where next character should be read from the buffer. | |
69 | */ | |
70 | int readPos; | |
71 | ||
72 | /** | |
73 | * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, | |
74 | * and must be thrown away. | |
75 | */ | |
76 | boolean eof; | |
77 | ||
78 | /** | |
79 | * Variable tracks how many characters have been written to the current line. Only used when encoding. We use | |
80 | * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). | |
81 | */ | |
82 | int currentLinePos; | |
83 | ||
84 | /** | |
85 | * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This | |
86 | * variable helps track that. | |
87 | */ | |
88 | int modulus; | |
89 | ||
90 | 31674 | Context() { |
91 | 31674 | } |
92 | ||
93 | /** | |
94 | * Returns a String useful for debugging (especially within a debugger.) | |
95 | * | |
96 | * @return a String useful for debugging. | |
97 | */ | |
98 | @SuppressWarnings("boxing") // OK to ignore boxing here | |
99 | @Override | |
100 | public String toString() { | |
101 | 0 | return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " + |
102 | "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), buffer, currentLinePos, eof, | |
103 | ibitWorkArea, lbitWorkArea, modulus, pos, readPos); | |
104 | } | |
105 | } | |
106 | ||
107 | /** | |
108 | * EOF | |
109 | * | |
110 | * @since 1.7 | |
111 | */ | |
112 | static final int EOF = -1; | |
113 | ||
114 | /** | |
115 | * MIME chunk size per RFC 2045 section 6.8. | |
116 | * | |
117 | * <p> | |
118 | * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any | |
119 | * equal signs. | |
120 | * </p> | |
121 | * | |
122 | * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> | |
123 | */ | |
124 | public static final int MIME_CHUNK_SIZE = 76; | |
125 | ||
126 | /** | |
127 | * PEM chunk size per RFC 1421 section 4.3.2.4. | |
128 | * | |
129 | * <p> | |
130 | * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any | |
131 | * equal signs. | |
132 | * </p> | |
133 | * | |
134 | * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> | |
135 | */ | |
136 | public static final int PEM_CHUNK_SIZE = 64; | |
137 | ||
138 | private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; | |
139 | ||
140 | /** | |
141 | * Defines the default buffer size - currently {@value} | |
142 | * - must be large enough for at least one encoded block+separator | |
143 | */ | |
144 | private static final int DEFAULT_BUFFER_SIZE = 8192; | |
145 | ||
146 | /** Mask used to extract 8 bits, used in decoding bytes */ | |
147 | protected static final int MASK_8BITS = 0xff; | |
148 | ||
149 | /** | |
150 | * Byte used to pad output. | |
151 | */ | |
152 | protected static final byte PAD_DEFAULT = '='; // Allow static access to default | |
153 | ||
154 | 30719 | protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later |
155 | ||
156 | /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */ | |
157 | private final int unencodedBlockSize; | |
158 | ||
159 | /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */ | |
160 | private final int encodedBlockSize; | |
161 | ||
162 | /** | |
163 | * Chunksize for encoding. Not used when decoding. | |
164 | * A value of zero or less implies no chunking of the encoded data. | |
165 | * Rounded down to nearest multiple of encodedBlockSize. | |
166 | */ | |
167 | protected final int lineLength; | |
168 | ||
169 | /** | |
170 | * Size of chunk separator. Not used unless {@link #lineLength} > 0. | |
171 | */ | |
172 | private final int chunkSeparatorLength; | |
173 | ||
174 | /** | |
175 | * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} | |
176 | * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. | |
177 | * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) | |
178 | * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) | |
179 | * @param lineLength if > 0, use chunking with a length <code>lineLength</code> | |
180 | * @param chunkSeparatorLength the chunk separator length, if relevant | |
181 | */ | |
182 | 30719 | protected BaseNCodec(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength) { |
183 | 30719 | this.unencodedBlockSize = unencodedBlockSize; |
184 | 30719 | this.encodedBlockSize = encodedBlockSize; |
185 | 30719 | final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0; |
186 | 30719 | this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0; |
187 | 30719 | this.chunkSeparatorLength = chunkSeparatorLength; |
188 | 30719 | } |
189 | ||
190 | /** | |
191 | * Returns true if this object has buffered data for reading. | |
192 | * | |
193 | * @param context the context to be used | |
194 | * @return true if there is data still available for reading. | |
195 | */ | |
196 | boolean hasData(Context context) { // package protected for access from I/O streams | |
197 | 190801 | return context.buffer != null; |
198 | } | |
199 | ||
200 | /** | |
201 | * Returns the amount of buffered data available for reading. | |
202 | * | |
203 | * @param context the context to be used | |
204 | * @return The amount of buffered data available for reading. | |
205 | */ | |
206 | int available(Context context) { // package protected for access from I/O streams | |
207 | 720232 | return context.buffer != null ? context.pos - context.readPos : 0; |
208 | } | |
209 | ||
210 | /** | |
211 | * Get the default buffer size. Can be overridden. | |
212 | * | |
213 | * @return {@link #DEFAULT_BUFFER_SIZE} | |
214 | */ | |
215 | protected int getDefaultBufferSize() { | |
216 | 199499 | return DEFAULT_BUFFER_SIZE; |
217 | } | |
218 | ||
219 | /** | |
220 | * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. | |
221 | * @param context the context to be used | |
222 | */ | |
223 | private byte[] resizeBuffer(Context context) { | |
224 | 199503 | if (context.buffer == null) { |
225 | 199499 | context.buffer = new byte[getDefaultBufferSize()]; |
226 | 199499 | context.pos = 0; |
227 | 199499 | context.readPos = 0; |
228 | } else { | |
229 | 4 | byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; |
230 | 4 | System.arraycopy(context.buffer, 0, b, 0, context.buffer.length); |
231 | 4 | context.buffer = b; |
232 | } | |
233 | 199503 | return context.buffer; |
234 | } | |
235 | ||
236 | /** | |
237 | * Ensure that the buffer has room for <code>size</code> bytes | |
238 | * | |
239 | * @param size minimum spare space required | |
240 | * @param context the context to be used | |
241 | */ | |
242 | protected byte[] ensureBufferSize(int size, Context context){ | |
243 | 3026032 | if ((context.buffer == null) || (context.buffer.length < context.pos + size)){ |
244 | 199503 | return resizeBuffer(context); |
245 | } | |
246 | 2826529 | return context.buffer; |
247 | } | |
248 | ||
249 | /** | |
250 | * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail | |
251 | * bytes. Returns how many bytes were actually extracted. | |
252 | * <p> | |
253 | * Package protected for access from I/O streams. | |
254 | * | |
255 | * @param b | |
256 | * byte[] array to extract the buffered data into. | |
257 | * @param bPos | |
258 | * position in byte[] array to start extraction at. | |
259 | * @param bAvail | |
260 | * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). | |
261 | * @param context | |
262 | * the context to be used | |
263 | * @return The number of bytes successfully extracted into the provided byte[] array. | |
264 | */ | |
265 | int readResults(byte[] b, int bPos, int bAvail, Context context) { | |
266 | 363378 | if (context.buffer != null) { |
267 | 287627 | int len = Math.min(available(context), bAvail); |
268 | 287627 | System.arraycopy(context.buffer, context.readPos, b, bPos, len); |
269 | 287627 | context.readPos += len; |
270 | 287627 | if (context.readPos >= context.pos) { |
271 | 199408 | context.buffer = null; // so hasData() will return false, and this method can return -1 |
272 | } | |
273 | 287627 | return len; |
274 | } | |
275 | 75751 | return context.eof ? EOF : 0; |
276 | } | |
277 | ||
278 | /** | |
279 | * Checks if a byte value is whitespace or not. | |
280 | * Whitespace is taken to mean: space, tab, CR, LF | |
281 | * @param byteToCheck | |
282 | * the byte to check | |
283 | * @return true if byte is whitespace, false otherwise | |
284 | */ | |
285 | protected static boolean isWhiteSpace(byte byteToCheck) { | |
286 | 42 | switch (byteToCheck) { |
287 | case ' ' : | |
288 | case '\n' : | |
289 | case '\r' : | |
290 | case '\t' : | |
291 | 32 | return true; |
292 | default : | |
293 | 10 | return false; |
294 | } | |
295 | } | |
296 | ||
297 | /** | |
298 | * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of | |
299 | * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. | |
300 | * | |
301 | * @param obj | |
302 | * Object to encode | |
303 | * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. | |
304 | * @throws EncoderException | |
305 | * if the parameter supplied is not of type byte[] | |
306 | */ | |
307 | @Override | |
308 | public Object encode(Object obj) throws EncoderException { | |
309 | 99 | if (!(obj instanceof byte[])) { |
310 | 1 | throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); |
311 | } | |
312 | 98 | return encode((byte[]) obj); |
313 | } | |
314 | ||
315 | /** | |
316 | * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. | |
317 | * Uses UTF8 encoding. | |
318 | * | |
319 | * @param pArray | |
320 | * a byte array containing binary data | |
321 | * @return A String containing only Base-N character data | |
322 | */ | |
323 | public String encodeToString(byte[] pArray) { | |
324 | 4 | return StringUtils.newStringUtf8(encode(pArray)); |
325 | } | |
326 | ||
327 | /** | |
328 | * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. | |
329 | * Uses UTF8 encoding. | |
330 | * | |
331 | * @param pArray a byte array containing binary data | |
332 | * @return String containing only character data in the appropriate alphabet. | |
333 | */ | |
334 | public String encodeAsString(byte[] pArray){ | |
335 | 21 | return StringUtils.newStringUtf8(encode(pArray)); |
336 | } | |
337 | ||
338 | /** | |
339 | * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of | |
340 | * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. | |
341 | * | |
342 | * @param obj | |
343 | * Object to decode | |
344 | * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String | |
345 | * supplied. | |
346 | * @throws DecoderException | |
347 | * if the parameter supplied is not of type byte[] | |
348 | */ | |
349 | @Override | |
350 | public Object decode(Object obj) throws DecoderException { | |
351 | 100 | if (obj instanceof byte[]) { |
352 | 98 | return decode((byte[]) obj); |
353 | 2 | } else if (obj instanceof String) { |
354 | 1 | return decode((String) obj); |
355 | } else { | |
356 | 1 | throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); |
357 | } | |
358 | } | |
359 | ||
360 | /** | |
361 | * Decodes a String containing characters in the Base-N alphabet. | |
362 | * | |
363 | * @param pArray | |
364 | * A String containing Base-N character data | |
365 | * @return a byte array containing binary data | |
366 | */ | |
367 | public byte[] decode(String pArray) { | |
368 | 39 | return decode(StringUtils.getBytesUtf8(pArray)); |
369 | } | |
370 | ||
371 | /** | |
372 | * Decodes a byte[] containing characters in the Base-N alphabet. | |
373 | * | |
374 | * @param pArray | |
375 | * A byte array containing Base-N character data | |
376 | * @return a byte array containing binary data | |
377 | */ | |
378 | @Override | |
379 | public byte[] decode(byte[] pArray) { | |
380 | 1159 | if (pArray == null || pArray.length == 0) { |
381 | 16 | return pArray; |
382 | } | |
383 | 1143 | Context context = new Context(); |
384 | 1143 | decode(pArray, 0, pArray.length, context); |
385 | 1143 | decode(pArray, 0, EOF, context); // Notify decoder of EOF. |
386 | 1143 | byte[] result = new byte[context.pos]; |
387 | 1143 | readResults(result, 0, result.length, context); |
388 | 1143 | return result; |
389 | } | |
390 | ||
391 | /** | |
392 | * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. | |
393 | * | |
394 | * @param pArray | |
395 | * a byte array containing binary data | |
396 | * @return A byte array containing only the basen alphabetic character data | |
397 | */ | |
398 | @Override | |
399 | public byte[] encode(byte[] pArray) { | |
400 | 2769 | if (pArray == null || pArray.length == 0) { |
401 | 15 | return pArray; |
402 | } | |
403 | 2754 | Context context = new Context(); |
404 | 2754 | encode(pArray, 0, pArray.length, context); |
405 | 2754 | encode(pArray, 0, EOF, context); // Notify encoder of EOF. |
406 | 2754 | byte[] buf = new byte[context.pos - context.readPos]; |
407 | 2754 | readResults(buf, 0, buf.length, context); |
408 | 2754 | return buf; |
409 | } | |
410 | ||
411 | // package protected for access from I/O streams | |
412 | abstract void encode(byte[] pArray, int i, int length, Context context); | |
413 | ||
414 | // package protected for access from I/O streams | |
415 | abstract void decode(byte[] pArray, int i, int length, Context context); | |
416 | ||
417 | /** | |
418 | * Returns whether or not the <code>octet</code> is in the current alphabet. | |
419 | * Does not allow whitespace or pad. | |
420 | * | |
421 | * @param value The value to test | |
422 | * | |
423 | * @return {@code true} if the value is defined in the current alphabet, {@code false} otherwise. | |
424 | */ | |
425 | protected abstract boolean isInAlphabet(byte value); | |
426 | ||
427 | /** | |
428 | * Tests a given byte array to see if it contains only valid characters within the alphabet. | |
429 | * The method optionally treats whitespace and pad as valid. | |
430 | * | |
431 | * @param arrayOctet byte array to test | |
432 | * @param allowWSPad if {@code true}, then whitespace and PAD are also allowed | |
433 | * | |
434 | * @return {@code true} if all bytes are valid characters in the alphabet or if the byte array is empty; | |
435 | * {@code false}, otherwise | |
436 | */ | |
437 | public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) { | |
438 | 26 | for (int i = 0; i < arrayOctet.length; i++) { |
439 | 18 | if (!isInAlphabet(arrayOctet[i]) && |
440 | (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) { | |
441 | 2 | return false; |
442 | } | |
443 | } | |
444 | 8 | return true; |
445 | } | |
446 | ||
447 | /** | |
448 | * Tests a given String to see if it contains only valid characters within the alphabet. | |
449 | * The method treats whitespace and PAD as valid. | |
450 | * | |
451 | * @param basen String to test | |
452 | * @return {@code true} if all characters in the String are valid characters in the alphabet or if | |
453 | * the String is empty; {@code false}, otherwise | |
454 | * @see #isInAlphabet(byte[], boolean) | |
455 | */ | |
456 | public boolean isInAlphabet(String basen) { | |
457 | 2 | return isInAlphabet(StringUtils.getBytesUtf8(basen), true); |
458 | } | |
459 | ||
460 | /** | |
461 | * Tests a given byte array to see if it contains any characters within the alphabet or PAD. | |
462 | * | |
463 | * Intended for use in checking line-ending arrays | |
464 | * | |
465 | * @param arrayOctet | |
466 | * byte array to test | |
467 | * @return {@code true} if any byte is a valid character in the alphabet or PAD; {@code false} otherwise | |
468 | */ | |
469 | protected boolean containsAlphabetOrPad(byte[] arrayOctet) { | |
470 | 17023 | if (arrayOctet == null) { |
471 | 1 | return false; |
472 | } | |
473 | 44323 | for (byte element : arrayOctet) { |
474 | 27308 | if (PAD == element || isInAlphabet(element)) { |
475 | 7 | return true; |
476 | } | |
477 | } | |
478 | 17015 | return false; |
479 | } | |
480 | ||
481 | /** | |
482 | * Calculates the amount of space needed to encode the supplied array. | |
483 | * | |
484 | * @param pArray byte[] array which will later be encoded | |
485 | * | |
486 | * @return amount of space needed to encoded the supplied array. | |
487 | * Returns a long since a max-len array will require > Integer.MAX_VALUE | |
488 | */ | |
489 | public long getEncodedLength(byte[] pArray) { | |
490 | // Calculate non-chunked size - rounded up to allow for padding | |
491 | // cast to long is needed to avoid possibility of overflow | |
492 | 1924 | long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize; |
493 | 1924 | if (lineLength > 0) { // We're using chunking |
494 | // Round up to nearest multiple | |
495 | 291 | len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength; |
496 | } | |
497 | 1924 | return len; |
498 | } | |
499 | } |