001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.lang3.text.translate; 018 019 import java.io.IOException; 020 import java.io.Writer; 021 022 /** 023 * Translate XML numeric entities of the form &#[xX]?\d+; to 024 * the specific codepoint. 025 * 026 * @author Apache Software Foundation 027 * @since 3.0 028 * @version $Id: NumericEntityUnescaper.java 967237 2010-07-23 20:08:57Z mbenson $ 029 */ 030 public class NumericEntityUnescaper extends CharSequenceTranslator { 031 032 /** 033 * {@inheritDoc} 034 */ 035 @Override 036 public int translate(CharSequence input, int index, Writer out) throws IOException { 037 // TODO: Protect from ArrayIndexOutOfBounds 038 if(input.charAt(index) == '&' && input.charAt(index + 1) == '#') { 039 int start = index + 2; 040 boolean isHex = false; 041 042 char firstChar = input.charAt(start); 043 if(firstChar == 'x' || firstChar == 'X') { 044 start++; 045 isHex = true; 046 } 047 048 int end = start; 049 while(input.charAt(end) != ';') { 050 end++; 051 } 052 053 int entityValue; 054 try { 055 if(isHex) { 056 entityValue = Integer.parseInt(input.subSequence(start, end).toString(), 16); 057 } else { 058 entityValue = Integer.parseInt(input.subSequence(start, end).toString(), 10); 059 } 060 } catch(NumberFormatException nfe) { 061 return 0; 062 } 063 064 if(entityValue > 0xFFFF) { 065 char[] chrs = Character.toChars(entityValue); 066 out.write(chrs[0]); 067 out.write(chrs[1]); 068 } else { 069 out.write(entityValue); 070 } 071 return 2 + (end - start) + (isHex ? 1 : 0) + 1; 072 } 073 return 0; 074 } 075 }