View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one
3    *  or more contributor license agreements.  See the NOTICE file
4    *  distributed with this work for additional information
5    *  regarding copyright ownership.  The ASF licenses this file
6    *  to you under the Apache License, Version 2.0 (the
7    *  "License"); you may not use this file except in compliance
8    *  with the License.  You may obtain a copy of the License at
9    *
10   *    http://www.apache.org/licenses/LICENSE-2.0
11   *
12   *  Unless required by applicable law or agreed to in writing,
13   *  software distributed under the License is distributed on an
14   *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   *  KIND, either express or implied.  See the License for the
16   *  specific language governing permissions and limitations
17   *  under the License.
18   *
19   */
20  package org.apache.mina.util;
21  
22  import java.security.InvalidParameterException;
23  
24  /**
25   * Provides Base64 encoding and decoding as defined by RFC 2045.
26   * 
27   * <p>
28   * This class implements section <cite>6.8. Base64
29   * Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose Internet
30   * Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by
31   * Freed and Borenstein.
32   * </p>
33   * 
34   * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
35   * 
36   * 
37   * @author Apache Software Foundation commons codec
38   *         (http://commons.apache.org/codec/)
39   * @author <a href="http://mina.apache.org">Apache MINA Project</a>
40   */
41  public class Base64 {
42  
43      /**
44       * Chunk size per RFC 2045 section 6.8.
45       * 
46       * <p>The {@value} character limit does not count the trailing CRLF, but counts
47       * all other characters, including any equal signs.</p>
48       * 
49       * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
50       */
51      static final int CHUNK_SIZE = 76;
52  
53      /**
54       * Chunk separator per RFC 2045 section 2.1.
55       * 
56       * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
57       */
58      static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
59  
60      /**
61       * The base length.
62       */
63      static final int BASELENGTH = 255;
64  
65      /**
66       * Lookup length.
67       */
68      static final int LOOKUPLENGTH = 64;
69  
70      /**
71       * Used to calculate the number of bits in a byte.
72       */
73      static final int EIGHTBIT = 8;
74  
75      /**
76       * Used when encoding something which has fewer than 24 bits.
77       */
78      static final int SIXTEENBIT = 16;
79  
80      /**
81       * Used to determine how many bits data contains.
82       */
83      static final int TWENTYFOURBITGROUP = 24;
84  
85      /**
86       * Used to get the number of Quadruples.
87       */
88      static final int FOURBYTE = 4;
89  
90      /**
91       * Used to test the sign of a byte.
92       */
93      static final int SIGN = -128;
94  
95      /**
96       * Byte used to pad output.
97       */
98      static final byte PAD = (byte) '=';
99  
100     // Create arrays to hold the base64 characters and a
101     // lookup for base64 chars
102     private static byte[] base64Alphabet = new byte[BASELENGTH];
103 
104     private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
105 
106     // Populating the lookup and character arrays
107     static {
108         for (int i = 0; i < BASELENGTH; i++) {
109             base64Alphabet[i] = (byte) -1;
110         }
111         for (int i = 'Z'; i >= 'A'; i--) {
112             base64Alphabet[i] = (byte) (i - 'A');
113         }
114         for (int i = 'z'; i >= 'a'; i--) {
115             base64Alphabet[i] = (byte) (i - 'a' + 26);
116         }
117         for (int i = '9'; i >= '0'; i--) {
118             base64Alphabet[i] = (byte) (i - '0' + 52);
119         }
120 
121         base64Alphabet['+'] = 62;
122         base64Alphabet['/'] = 63;
123 
124         for (int i = 0; i <= 25; i++) {
125             lookUpBase64Alphabet[i] = (byte) ('A' + i);
126         }
127 
128         for (int i = 26, j = 0; i <= 51; i++, j++) {
129             lookUpBase64Alphabet[i] = (byte) ('a' + j);
130         }
131 
132         for (int i = 52, j = 0; i <= 61; i++, j++) {
133             lookUpBase64Alphabet[i] = (byte) ('0' + j);
134         }
135 
136         lookUpBase64Alphabet[62] = (byte) '+';
137         lookUpBase64Alphabet[63] = (byte) '/';
138     }
139 
140     private static boolean isBase64(byte octect) {
141         if (octect == PAD) {
142             return true;
143         } else if (base64Alphabet[octect] == -1) {
144             return false;
145         } else {
146             return true;
147         }
148     }
149 
150     /**
151      * Tests a given byte array to see if it contains
152      * only valid characters within the Base64 alphabet.
153      *
154      * @param arrayOctect byte array to test
155      * @return true if all bytes are valid characters in the Base64
156      *         alphabet or if the byte array is empty; false, otherwise
157      */
158     public static boolean isArrayByteBase64(byte[] arrayOctect) {
159 
160         arrayOctect = discardWhitespace(arrayOctect);
161 
162         int length = arrayOctect.length;
163         if (length == 0) {
164             // shouldn't a 0 length array be valid base64 data?
165             return true;
166         }
167         for (int i = 0; i < length; i++) {
168             if (!isBase64(arrayOctect[i])) {
169                 return false;
170             }
171         }
172         return true;
173     }
174 
175     /**
176      * Encodes binary data using the base64 algorithm but
177      * does not chunk the output.
178      *
179      * @param binaryData binary data to encode
180      * @return Base64 characters
181      */
182     public static byte[] encodeBase64(byte[] binaryData) {
183         return encodeBase64(binaryData, false);
184     }
185 
186     /**
187      * Encodes binary data using the base64 algorithm and chunks
188      * the encoded output into 76 character blocks
189      *
190      * @param binaryData binary data to encode
191      * @return Base64 characters chunked in 76 character blocks
192      */
193     public static byte[] encodeBase64Chunked(byte[] binaryData) {
194         return encodeBase64(binaryData, true);
195     }
196 
197     /**
198      * Decodes an Object using the base64 algorithm.  This method
199      * is provided in order to satisfy the requirements of the
200      * Decoder interface, and will throw a DecoderException if the
201      * supplied object is not of type byte[].
202      *
203      * @param pObject Object to decode
204      * @return An object (of type byte[]) containing the
205      *         binary data which corresponds to the byte[] supplied.
206      * @throws InvalidParameterException if the parameter supplied is not
207      *                          of type byte[]
208      */
209     public Object decode(Object pObject) {
210         if (!(pObject instanceof byte[])) {
211             throw new InvalidParameterException("Parameter supplied to Base64 decode is not a byte[]");
212         }
213         return decode((byte[]) pObject);
214     }
215 
216     /**
217      * Decodes a byte[] containing containing
218      * characters in the Base64 alphabet.
219      *
220      * @param pArray A byte array containing Base64 character data
221      * @return a byte array containing binary data
222      */
223     public byte[] decode(byte[] pArray) {
224         return decodeBase64(pArray);
225     }
226 
227     /**
228      * Encodes binary data using the base64 algorithm, optionally
229      * chunking the output into 76 character blocks.
230      *
231      * @param binaryData Array containing binary data to encode.
232      * @param isChunked if isChunked is true this encoder will chunk
233      *                  the base64 output into 76 character blocks
234      * @return Base64-encoded data.
235      */
236     public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
237         int lengthDataBits = binaryData.length * EIGHTBIT;
238         int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
239         int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP;
240         byte encodedData[] = null;
241         int encodedDataLength = 0;
242         int nbrChunks = 0;
243 
244         if (fewerThan24bits != 0) {
245             //data not divisible by 24 bit
246             encodedDataLength = (numberTriplets + 1) * 4;
247         } else {
248             // 16 or 8 bit
249             encodedDataLength = numberTriplets * 4;
250         }
251 
252         // If the output is to be "chunked" into 76 character sections,
253         // for compliance with RFC 2045 MIME, then it is important to
254         // allow for extra length to account for the separator(s)
255         if (isChunked) {
256 
257             nbrChunks = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math.ceil((float) encodedDataLength / CHUNK_SIZE));
258             encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length;
259         }
260 
261         encodedData = new byte[encodedDataLength];
262 
263         byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0;
264 
265         int encodedIndex = 0;
266         int dataIndex = 0;
267         int i = 0;
268         int nextSeparatorIndex = CHUNK_SIZE;
269         int chunksSoFar = 0;
270 
271         for (i = 0; i < numberTriplets; i++) {
272             dataIndex = i * 3;
273             b1 = binaryData[dataIndex];
274             b2 = binaryData[dataIndex + 1];
275             b3 = binaryData[dataIndex + 2];
276 
277             l = (byte) (b2 & 0x0f);
278             k = (byte) (b1 & 0x03);
279 
280             byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
281             byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
282             byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
283     
284             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
285             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
286             encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3];
287             encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
288     
289             encodedIndex += 4;
290     
291             // If we are chunking, let's put a chunk separator down.
292             if (isChunked) {
293                 // this assumes that CHUNK_SIZE % 4 == 0
294                 if (encodedIndex == nextSeparatorIndex) {
295                     System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length);
296                     chunksSoFar++;
297                     nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1)) + (chunksSoFar * CHUNK_SEPARATOR.length);
298                     encodedIndex += CHUNK_SEPARATOR.length;
299                 }
300             }
301         }
302 
303         // form integral number of 6-bit groups
304         dataIndex = i * 3;
305 
306         if (fewerThan24bits == EIGHTBIT) {
307             b1 = binaryData[dataIndex];
308             k = (byte) (b1 & 0x03);
309             byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
310             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
311             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
312             encodedData[encodedIndex + 2] = PAD;
313             encodedData[encodedIndex + 3] = PAD;
314         } else if (fewerThan24bits == SIXTEENBIT) {
315 
316             b1 = binaryData[dataIndex];
317             b2 = binaryData[dataIndex + 1];
318             l = (byte) (b2 & 0x0f);
319             k = (byte) (b1 & 0x03);
320 
321             byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
322             byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
323     
324             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
325             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
326             encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
327             encodedData[encodedIndex + 3] = PAD;
328         }
329 
330         if (isChunked) {
331             // we also add a separator to the end of the final chunk.
332             if (chunksSoFar < nbrChunks) {
333                 System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length,
334                         CHUNK_SEPARATOR.length);
335             }
336         }
337 
338         return encodedData;
339     }
340 
341     /**
342      * Decodes Base64 data into octects
343      *
344      * @param base64Data Byte array containing Base64 data
345      * @return Array containing decoded data.
346      */
347     public static byte[] decodeBase64(byte[] base64Data) {
348         // RFC 2045 requires that we discard ALL non-Base64 characters
349         base64Data = discardNonBase64(base64Data);
350 
351         // handle the edge case, so we don't have to worry about it later
352         if (base64Data.length == 0) {
353             return new byte[0];
354         }
355 
356         int numberQuadruple = base64Data.length / FOURBYTE;
357         byte decodedData[] = null;
358         byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0;
359 
360         // Throw away anything not in base64Data
361 
362         int encodedIndex = 0;
363         int dataIndex = 0;
364         {
365             // this sizes the output array properly - rlw
366             int lastData = base64Data.length;
367             // ignore the '=' padding
368             while (base64Data[lastData - 1] == PAD) {
369                 if (--lastData == 0) {
370                     return new byte[0];
371                 }
372             }
373             decodedData = new byte[lastData - numberQuadruple];
374         }
375 
376         for (int i = 0; i < numberQuadruple; i++) {
377             dataIndex = i * 4;
378             marker0 = base64Data[dataIndex + 2];
379             marker1 = base64Data[dataIndex + 3];
380 
381             b1 = base64Alphabet[base64Data[dataIndex]];
382             b2 = base64Alphabet[base64Data[dataIndex + 1]];
383 
384             if (marker0 != PAD && marker1 != PAD) {
385                 //No PAD e.g 3cQl
386                 b3 = base64Alphabet[marker0];
387                 b4 = base64Alphabet[marker1];
388 
389                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
390                 decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
391                 decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
392             } else if (marker0 == PAD) {
393                 //Two PAD e.g. 3c[Pad][Pad]
394                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
395             } else if (marker1 == PAD) {
396                 //One PAD e.g. 3cQ[Pad]
397                 b3 = base64Alphabet[marker0];
398 
399                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
400                 decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
401             }
402             encodedIndex += 3;
403         }
404         return decodedData;
405     }
406 
407     /**
408      * Discards any whitespace from a base-64 encoded block.
409      *
410      * @param data The base-64 encoded data to discard the whitespace
411      * from.
412      * @return The data, less whitespace (see RFC 2045).
413      */
414     static byte[] discardWhitespace(byte[] data) {
415         byte groomedData[] = new byte[data.length];
416         int bytesCopied = 0;
417 
418         for (int i = 0; i < data.length; i++) {
419             switch (data[i]) {
420             case (byte) ' ':
421             case (byte) '\n':
422             case (byte) '\r':
423             case (byte) '\t':
424                 break;
425             default:
426                 groomedData[bytesCopied++] = data[i];
427             }
428         }
429 
430         byte packedData[] = new byte[bytesCopied];
431 
432         System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
433 
434         return packedData;
435     }
436 
437     /**
438      * Discards any characters outside of the base64 alphabet, per
439      * the requirements on page 25 of RFC 2045 - "Any characters
440      * outside of the base64 alphabet are to be ignored in base64
441      * encoded data."
442      *
443      * @param data The base-64 encoded data to groom
444      * @return The data, less non-base64 characters (see RFC 2045).
445      */
446     static byte[] discardNonBase64(byte[] data) {
447         byte groomedData[] = new byte[data.length];
448         int bytesCopied = 0;
449 
450         for (int i = 0; i < data.length; i++) {
451             if (isBase64(data[i])) {
452                 groomedData[bytesCopied++] = data[i];
453             }
454         }
455 
456         byte packedData[] = new byte[bytesCopied];
457 
458         System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
459 
460         return packedData;
461     }
462 
463     // Implementation of the Encoder Interface
464 
465     /**
466      * Encodes an Object using the base64 algorithm.  This method
467      * is provided in order to satisfy the requirements of the
468      * Encoder interface, and will throw an EncoderException if the
469      * supplied object is not of type byte[].
470      *
471      * @param pObject Object to encode
472      * @return An object (of type byte[]) containing the
473      *         base64 encoded data which corresponds to the byte[] supplied.
474      * @throws InvalidParameterException if the parameter supplied is not
475      *                          of type byte[]
476      */
477     public Object encode(Object pObject) {
478         if (!(pObject instanceof byte[])) {
479             throw new InvalidParameterException("Parameter supplied to Base64 encode is not a byte[]");
480         }
481         return encode((byte[]) pObject);
482     }
483 
484     /**
485      * Encodes a byte[] containing binary data, into a byte[] containing
486      * characters in the Base64 alphabet.
487      *
488      * @param pArray a byte array containing binary data
489      * @return A byte array containing only Base64 character data
490      */
491     public byte[] encode(byte[] pArray) {
492         return encodeBase64(pArray, false);
493     }
494 
495 }