001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.lang3.text.translate;
018    
019    import java.io.IOException;
020    import java.io.Writer;
021    
022    import java.util.EnumSet;
023    import java.util.Arrays;
024    
025    /**
026     * Translates escaped unicode values of the form \\u+\d\d\d\d back to 
027     * unicode.
028     * 
029     * @author Apache Software Foundation
030     * @since 3.0
031     * @version $Id: UnicodeUnescaper.java 967237 2010-07-23 20:08:57Z mbenson $
032     */
033    public class UnicodeUnescaper extends CharSequenceTranslator {
034    
035        public static enum OPTION { escapePlus }
036    
037        // TODO: Create an OptionsSet class to hide some of the conditional logic below
038        private final EnumSet<OPTION> options;
039    
040        public UnicodeUnescaper(OPTION... options) {
041            if(options.length > 0) {
042                this.options = EnumSet.copyOf(Arrays.asList(options));
043            } else {
044                this.options = null;
045            }
046        }
047    
048        public boolean isSet(OPTION opt) { 
049            return (options == null) ? false : options.contains(opt);
050        }
051    
052        /**
053         * {@inheritDoc}
054         */
055        @Override
056        public int translate(CharSequence input, int index, Writer out) throws IOException {
057            if(input.charAt(index) == '\\') {
058                if( (index + 1 < input.length()) && input.charAt(index + 1) == 'u') {
059                    // consume optional additional 'u' chars
060                    int i=2;
061                    while( (index + i < input.length()) && input.charAt(index + i) == 'u') {
062                        i++;
063                    }
064    
065                    // consume + symbol in \\u+0045
066                    if(isSet(OPTION.escapePlus)) {
067                        if( (index + i < input.length()) && (input.charAt(index + i) == '+') ) {
068                            i++;
069                        }
070                    }
071    
072                    if( (index + i + 4 <= input.length()) ) {
073                        // Get 4 hex digits
074                        CharSequence unicode = input.subSequence(index + i, index + i + 4);
075    
076                        try {
077                            int value = Integer.parseInt(unicode.toString(), 16);
078                            out.write((char) value);
079                        } catch (NumberFormatException nfe) {
080                            throw new IllegalArgumentException("Unable to parse unicode value: " + unicode, nfe);
081                        }
082                        return i + 4;
083                    } else {
084                        throw new IllegalArgumentException("Less than 4 hex digits in unicode value: '" + 
085                                                           input.subSequence(index, input.length()) +
086                                                           "' due to end of CharSequence");
087                    }
088                }
089            }
090            return 0;
091        }
092    }