001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one
003 *  or more contributor license agreements.  See the NOTICE file
004 *  distributed with this work for additional information
005 *  regarding copyright ownership.  The ASF licenses this file
006 *  to you under the Apache License, Version 2.0 (the
007 *  "License"); you may not use this file except in compliance
008 *  with the License.  You may obtain a copy of the License at
009 *
010 *    http://www.apache.org/licenses/LICENSE-2.0
011 *
012 *  Unless required by applicable law or agreed to in writing,
013 *  software distributed under the License is distributed on an
014 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 *  KIND, either express or implied.  See the License for the
016 *  specific language governing permissions and limitations
017 *  under the License.
018 *
019 */
020package org.apache.mina.util;
021
022import java.security.InvalidParameterException;
023
024/**
025 * Provides Base64 encoding and decoding as defined by RFC 2045.
026 * 
027 * <p>
028 * This class implements section <cite>6.8. Base64
029 * Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose Internet
030 * Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by
031 * Freed and Borenstein.
032 * </p>
033 * 
034 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
035 * 
036 * 
037 * @author Apache Software Foundation commons codec
038 *         (http://commons.apache.org/codec/)
039 * @author <a href="http://mina.apache.org">Apache MINA Project</a>
040 */
041public class Base64 {
042
043    /**
044     * Chunk size per RFC 2045 section 6.8.
045     * 
046     * <p>The {@value} character limit does not count the trailing CRLF, but counts
047     * all other characters, including any equal signs.</p>
048     * 
049     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
050     */
051    static final int CHUNK_SIZE = 76;
052
053    /**
054     * Chunk separator per RFC 2045 section 2.1.
055     * 
056     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
057     */
058    static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
059
060    /**
061     * The base length.
062     */
063    static final int BASELENGTH = 255;
064
065    /**
066     * Lookup length.
067     */
068    static final int LOOKUPLENGTH = 64;
069
070    /**
071     * Used to calculate the number of bits in a byte.
072     */
073    static final int EIGHTBIT = 8;
074
075    /**
076     * Used when encoding something which has fewer than 24 bits.
077     */
078    static final int SIXTEENBIT = 16;
079
080    /**
081     * Used to determine how many bits data contains.
082     */
083    static final int TWENTYFOURBITGROUP = 24;
084
085    /**
086     * Used to get the number of Quadruples.
087     */
088    static final int FOURBYTE = 4;
089
090    /**
091     * Used to test the sign of a byte.
092     */
093    static final int SIGN = -128;
094
095    /**
096     * Byte used to pad output.
097     */
098    static final byte PAD = (byte) '=';
099
100    // Create arrays to hold the base64 characters and a
101    // lookup for base64 chars
102    private static byte[] base64Alphabet = new byte[BASELENGTH];
103
104    private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
105
106    // Populating the lookup and character arrays
107    static {
108        for (int i = 0; i < BASELENGTH; i++) {
109            base64Alphabet[i] = (byte) -1;
110        }
111        for (int i = 'Z'; i >= 'A'; i--) {
112            base64Alphabet[i] = (byte) (i - 'A');
113        }
114        for (int i = 'z'; i >= 'a'; i--) {
115            base64Alphabet[i] = (byte) (i - 'a' + 26);
116        }
117        for (int i = '9'; i >= '0'; i--) {
118            base64Alphabet[i] = (byte) (i - '0' + 52);
119        }
120
121        base64Alphabet['+'] = 62;
122        base64Alphabet['/'] = 63;
123
124        for (int i = 0; i <= 25; i++) {
125            lookUpBase64Alphabet[i] = (byte) ('A' + i);
126        }
127
128        for (int i = 26, j = 0; i <= 51; i++, j++) {
129            lookUpBase64Alphabet[i] = (byte) ('a' + j);
130        }
131
132        for (int i = 52, j = 0; i <= 61; i++, j++) {
133            lookUpBase64Alphabet[i] = (byte) ('0' + j);
134        }
135
136        lookUpBase64Alphabet[62] = (byte) '+';
137        lookUpBase64Alphabet[63] = (byte) '/';
138    }
139
140    private static boolean isBase64(byte octect) {
141        if (octect == PAD) {
142            return true;
143        } else if (base64Alphabet[octect] == -1) {
144            return false;
145        } else {
146            return true;
147        }
148    }
149
150    /**
151     * Tests a given byte array to see if it contains
152     * only valid characters within the Base64 alphabet.
153     *
154     * @param arrayOctect byte array to test
155     * @return true if all bytes are valid characters in the Base64
156     *         alphabet or if the byte array is empty; false, otherwise
157     */
158    public static boolean isArrayByteBase64(byte[] arrayOctect) {
159
160        arrayOctect = discardWhitespace(arrayOctect);
161
162        int length = arrayOctect.length;
163        if (length == 0) {
164            // shouldn't a 0 length array be valid base64 data?
165            return true;
166        }
167        for (int i = 0; i < length; i++) {
168            if (!isBase64(arrayOctect[i])) {
169                return false;
170            }
171        }
172        return true;
173    }
174
175    /**
176     * Encodes binary data using the base64 algorithm but
177     * does not chunk the output.
178     *
179     * @param binaryData binary data to encode
180     * @return Base64 characters
181     */
182    public static byte[] encodeBase64(byte[] binaryData) {
183        return encodeBase64(binaryData, false);
184    }
185
186    /**
187     * Encodes binary data using the base64 algorithm and chunks
188     * the encoded output into 76 character blocks
189     *
190     * @param binaryData binary data to encode
191     * @return Base64 characters chunked in 76 character blocks
192     */
193    public static byte[] encodeBase64Chunked(byte[] binaryData) {
194        return encodeBase64(binaryData, true);
195    }
196
197    /**
198     * Decodes an Object using the base64 algorithm.  This method
199     * is provided in order to satisfy the requirements of the
200     * Decoder interface, and will throw a DecoderException if the
201     * supplied object is not of type byte[].
202     *
203     * @param pObject Object to decode
204     * @return An object (of type byte[]) containing the
205     *         binary data which corresponds to the byte[] supplied.
206     * @throws InvalidParameterException if the parameter supplied is not
207     *                          of type byte[]
208     */
209    public Object decode(Object pObject) {
210        if (!(pObject instanceof byte[])) {
211            throw new InvalidParameterException("Parameter supplied to Base64 decode is not a byte[]");
212        }
213        return decode((byte[]) pObject);
214    }
215
216    /**
217     * Decodes a byte[] containing containing
218     * characters in the Base64 alphabet.
219     *
220     * @param pArray A byte array containing Base64 character data
221     * @return a byte array containing binary data
222     */
223    public byte[] decode(byte[] pArray) {
224        return decodeBase64(pArray);
225    }
226
227    /**
228     * Encodes binary data using the base64 algorithm, optionally
229     * chunking the output into 76 character blocks.
230     *
231     * @param binaryData Array containing binary data to encode.
232     * @param isChunked if isChunked is true this encoder will chunk
233     *                  the base64 output into 76 character blocks
234     * @return Base64-encoded data.
235     */
236    public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
237        int lengthDataBits = binaryData.length * EIGHTBIT;
238        int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
239        int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP;
240        byte encodedData[] = null;
241        int encodedDataLength = 0;
242        int nbrChunks = 0;
243
244        if (fewerThan24bits != 0) {
245            //data not divisible by 24 bit
246            encodedDataLength = (numberTriplets + 1) * 4;
247        } else {
248            // 16 or 8 bit
249            encodedDataLength = numberTriplets * 4;
250        }
251
252        // If the output is to be "chunked" into 76 character sections,
253        // for compliance with RFC 2045 MIME, then it is important to
254        // allow for extra length to account for the separator(s)
255        if (isChunked) {
256
257            nbrChunks = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math.ceil((float) encodedDataLength / CHUNK_SIZE));
258            encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length;
259        }
260
261        encodedData = new byte[encodedDataLength];
262
263        byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0;
264
265        int encodedIndex = 0;
266        int dataIndex = 0;
267        int i = 0;
268        int nextSeparatorIndex = CHUNK_SIZE;
269        int chunksSoFar = 0;
270
271        for (i = 0; i < numberTriplets; i++) {
272            dataIndex = i * 3;
273            b1 = binaryData[dataIndex];
274            b2 = binaryData[dataIndex + 1];
275            b3 = binaryData[dataIndex + 2];
276
277            l = (byte) (b2 & 0x0f);
278            k = (byte) (b1 & 0x03);
279
280            byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
281            byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
282            byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
283    
284            encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
285            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
286            encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3];
287            encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
288    
289            encodedIndex += 4;
290    
291            // If we are chunking, let's put a chunk separator down.
292            if (isChunked) {
293                // this assumes that CHUNK_SIZE % 4 == 0
294                if (encodedIndex == nextSeparatorIndex) {
295                    System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length);
296                    chunksSoFar++;
297                    nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1)) + (chunksSoFar * CHUNK_SEPARATOR.length);
298                    encodedIndex += CHUNK_SEPARATOR.length;
299                }
300            }
301        }
302
303        // form integral number of 6-bit groups
304        dataIndex = i * 3;
305
306        if (fewerThan24bits == EIGHTBIT) {
307            b1 = binaryData[dataIndex];
308            k = (byte) (b1 & 0x03);
309            byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
310            encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
311            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
312            encodedData[encodedIndex + 2] = PAD;
313            encodedData[encodedIndex + 3] = PAD;
314        } else if (fewerThan24bits == SIXTEENBIT) {
315
316            b1 = binaryData[dataIndex];
317            b2 = binaryData[dataIndex + 1];
318            l = (byte) (b2 & 0x0f);
319            k = (byte) (b1 & 0x03);
320
321            byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
322            byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
323    
324            encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
325            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
326            encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
327            encodedData[encodedIndex + 3] = PAD;
328        }
329
330        if (isChunked) {
331            // we also add a separator to the end of the final chunk.
332            if (chunksSoFar < nbrChunks) {
333                System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length,
334                        CHUNK_SEPARATOR.length);
335            }
336        }
337
338        return encodedData;
339    }
340
341    /**
342     * Decodes Base64 data into octects
343     *
344     * @param base64Data Byte array containing Base64 data
345     * @return Array containing decoded data.
346     */
347    public static byte[] decodeBase64(byte[] base64Data) {
348        // RFC 2045 requires that we discard ALL non-Base64 characters
349        base64Data = discardNonBase64(base64Data);
350
351        // handle the edge case, so we don't have to worry about it later
352        if (base64Data.length == 0) {
353            return new byte[0];
354        }
355
356        int numberQuadruple = base64Data.length / FOURBYTE;
357        byte decodedData[] = null;
358        byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0;
359
360        // Throw away anything not in base64Data
361
362        int encodedIndex = 0;
363        int dataIndex = 0;
364        {
365            // this sizes the output array properly - rlw
366            int lastData = base64Data.length;
367            // ignore the '=' padding
368            while (base64Data[lastData - 1] == PAD) {
369                if (--lastData == 0) {
370                    return new byte[0];
371                }
372            }
373            decodedData = new byte[lastData - numberQuadruple];
374        }
375
376        for (int i = 0; i < numberQuadruple; i++) {
377            dataIndex = i * 4;
378            marker0 = base64Data[dataIndex + 2];
379            marker1 = base64Data[dataIndex + 3];
380
381            b1 = base64Alphabet[base64Data[dataIndex]];
382            b2 = base64Alphabet[base64Data[dataIndex + 1]];
383
384            if (marker0 != PAD && marker1 != PAD) {
385                //No PAD e.g 3cQl
386                b3 = base64Alphabet[marker0];
387                b4 = base64Alphabet[marker1];
388
389                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
390                decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
391                decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
392            } else if (marker0 == PAD) {
393                //Two PAD e.g. 3c[Pad][Pad]
394                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
395            } else if (marker1 == PAD) {
396                //One PAD e.g. 3cQ[Pad]
397                b3 = base64Alphabet[marker0];
398
399                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
400                decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
401            }
402            encodedIndex += 3;
403        }
404        return decodedData;
405    }
406
407    /**
408     * Discards any whitespace from a base-64 encoded block.
409     *
410     * @param data The base-64 encoded data to discard the whitespace
411     * from.
412     * @return The data, less whitespace (see RFC 2045).
413     */
414    static byte[] discardWhitespace(byte[] data) {
415        byte groomedData[] = new byte[data.length];
416        int bytesCopied = 0;
417
418        for (int i = 0; i < data.length; i++) {
419            switch (data[i]) {
420            case (byte) ' ':
421            case (byte) '\n':
422            case (byte) '\r':
423            case (byte) '\t':
424                break;
425            default:
426                groomedData[bytesCopied++] = data[i];
427            }
428        }
429
430        byte packedData[] = new byte[bytesCopied];
431
432        System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
433
434        return packedData;
435    }
436
437    /**
438     * Discards any characters outside of the base64 alphabet, per
439     * the requirements on page 25 of RFC 2045 - "Any characters
440     * outside of the base64 alphabet are to be ignored in base64
441     * encoded data."
442     *
443     * @param data The base-64 encoded data to groom
444     * @return The data, less non-base64 characters (see RFC 2045).
445     */
446    static byte[] discardNonBase64(byte[] data) {
447        byte groomedData[] = new byte[data.length];
448        int bytesCopied = 0;
449
450        for (int i = 0; i < data.length; i++) {
451            if (isBase64(data[i])) {
452                groomedData[bytesCopied++] = data[i];
453            }
454        }
455
456        byte packedData[] = new byte[bytesCopied];
457
458        System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
459
460        return packedData;
461    }
462
463    // Implementation of the Encoder Interface
464
465    /**
466     * Encodes an Object using the base64 algorithm.  This method
467     * is provided in order to satisfy the requirements of the
468     * Encoder interface, and will throw an EncoderException if the
469     * supplied object is not of type byte[].
470     *
471     * @param pObject Object to encode
472     * @return An object (of type byte[]) containing the
473     *         base64 encoded data which corresponds to the byte[] supplied.
474     * @throws InvalidParameterException if the parameter supplied is not
475     *                          of type byte[]
476     */
477    public Object encode(Object pObject) {
478        if (!(pObject instanceof byte[])) {
479            throw new InvalidParameterException("Parameter supplied to Base64 encode is not a byte[]");
480        }
481        return encode((byte[]) pObject);
482    }
483
484    /**
485     * Encodes a byte[] containing binary data, into a byte[] containing
486     * characters in the Base64 alphabet.
487     *
488     * @param pArray a byte array containing binary data
489     * @return A byte array containing only Base64 character data
490     */
491    public byte[] encode(byte[] pArray) {
492        return encodeBase64(pArray, false);
493    }
494
495}