001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.lang3.text.translate;
018    
019    import java.io.IOException;
020    import java.io.Writer;
021    
022    /**
023     * Translate XML numeric entities of the form &#[xX]?\d+; to 
024     * the specific codepoint.
025     * 
026     * @author Apache Software Foundation
027     * @since 3.0
028     * @version $Id: NumericEntityUnescaper.java 967237 2010-07-23 20:08:57Z mbenson $
029     */
030    public class NumericEntityUnescaper extends CharSequenceTranslator {
031    
032        /**
033         * {@inheritDoc}
034         */
035        @Override
036        public int translate(CharSequence input, int index, Writer out) throws IOException {
037            // TODO: Protect from ArrayIndexOutOfBounds
038            if(input.charAt(index) == '&' && input.charAt(index + 1) == '#') {
039                int start = index + 2;
040                boolean isHex = false;
041    
042                char firstChar = input.charAt(start);
043                if(firstChar == 'x' || firstChar == 'X') {
044                    start++;
045                    isHex = true;
046                }
047    
048                int end = start;
049                while(input.charAt(end) != ';') {
050                    end++;
051                }
052    
053                int entityValue;
054                try {
055                    if(isHex) {
056                        entityValue = Integer.parseInt(input.subSequence(start, end).toString(), 16);
057                    } else {
058                        entityValue = Integer.parseInt(input.subSequence(start, end).toString(), 10);
059                    }
060                } catch(NumberFormatException nfe) {
061                    return 0;
062                }
063    
064                if(entityValue > 0xFFFF) {
065                    char[] chrs = Character.toChars(entityValue);
066                    out.write(chrs[0]);
067                    out.write(chrs[1]);
068                } else {
069                    out.write(entityValue);
070                }
071                return 2 + (end - start) + (isHex ? 1 : 0) + 1;
072            }
073            return 0;
074        }
075    }