Coverage Report

Coverage Report - org.apache.commons.feedparser.FeedFilter

Classes in this File

Line Coverage

Branch Coverage

Complexity

FeedFilter

0/164

0/24

2.857

 /*
  * Copyright 1999,2004 The Apache Software Foundation.
  * 
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  * 
  *      http://www.apache.org/licenses/LICENSE-2.0
  * 
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
 package org.apache.commons.feedparser;
 
 import java.util.HashMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.log4j.Logger;
 
 /**
  *
  * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
  * @version $Id: FeedFilter.java 373614 2006-01-30 22:31:21Z mvdb $
  */
 public class FeedFilter {
 
     private static Logger log = Logger.getLogger( FeedFilter.class );
 
     public static boolean DO_REMOVE_LEADING_PROLOG = true;
     public static boolean DO_DECODE_ENTITIES = true;
 
     public static HashMap LATIN1_ENTITIES = new HashMap();
 
     private static Pattern entity_pattern = Pattern.compile( "&([a-zA-Z]+);" );
 
     /**
      * This is probably the wrong behavior.  I shouldn't call this method I
      * think because assuming a content type is bad form.
      *
      * @deprecated Specify an encoding with #parse( bytes[], encoding )
      * 
      */
     public static byte[] parse( byte[] bytes )
         throws Exception {
 
         return parse( bytes, "UTF-8" );
 
     }
 
     public static byte[] parse( byte[] bytes, String encoding )
         throws Exception {
 
         String content = new String( bytes, encoding );
 
         return parse( content, encoding );
 
     }
 
     /**
      * Parse out an input string of content.
      * 
      * http://wiki.apache.org/jakarta-commons/FeedParser_2fStringAllocationConsideredHelpful
      *
      * 
      */
     public static byte[] parse( String content, String encoding )
         throws Exception {
 
         //FIXME: return an object here so that I can flag a bozo bit.
         
         //remove leading prolog...
         if ( DO_REMOVE_LEADING_PROLOG )
             content = doRemoveLeadingProlog( content, encoding );
 
         //decode HTML entities that are referenced.
         if ( DO_DECODE_ENTITIES )
             content = doDecodeEntities( content );
 
         //TODO: undeclared namespace prefixes should be expanded to their common
         //form. 'rdf, 'atom', 'xhtml' etc. Considering that they're will only be
         //a handful H and then 4^36 different possibilities the probability will
         //only be H in 4^36 which is pretty good that we won't have a false
         //positive.
         
         return content.getBytes( encoding );
 
     }
         
     /**
      * Removing prolog whitespace, comments, and other garbage from the
      * beginning of a feed.
      *
      * 
      */
     private static String doRemoveLeadingProlog( String content, String encoding ) {
 
         // if we're a UTF-16 or UTF-32 feed we need to LEAVE the prolog because
         // it triggers a UTF-16 parse due to the BOM.
         //
         // FIXME: this isn't actually true.  We should leave the BOM and remove
         // the prolog anyway due to the fact that this will still break the
         // parser.  Come up with some tests for UTF-16 to see if I can get it to
         // break and then update this method.
 
         if ( "UTF-16".equals( encoding ) ||
              "UTF-32".equals( encoding ) )
             return content;
         
         //move to the beginning of the first element or comment.  When this is a
         //processing instruction we will move to that
         int begin = content.indexOf( "<" );
 
         if ( begin > 0 ) {
             content = content.substring( begin, content.length() );
             log.warn( "Skipped whitespace in prolog and moved towards first element." );
         }
 
         //now skip to the XML processing instruction when necessary.  This is
         //used to remove comments prior to <?xml which are not allowed.
         
         begin = content.indexOf( "<?xml" );
 
         if ( begin > 0 ) {
             content = content.substring( begin, content.length() );
             log.warn( "Removed prolog towards first processing instruction." );
         }
 
         content = doRemoveElementProlog( content );
 
         return content;
         
     }
 
     /**
      * Remove element content between:
      *
      * <?xml version="1.0"?>
      *
      * THIS IS BROKEN PROLOG
      *
      * <foo>
      *
      * 
      */
     private static String doRemoveElementProlog( String content ) {
 
         int end = content.lastIndexOf( "?>", 100 );
 
         if ( end == -1 )
             return content;
 
         StringBuffer buff = new StringBuffer( content.length() );
         end = end + 2;
         buff.append( content.substring( 0, end ) );
 
         int begin = content.indexOf( "<", end );
 
         if ( begin != -1 ) {
 
             buff.append( "\n" );
             buff.append( content.substring( begin, content.length() ) );
             
         }
         
         return buff.toString();
         
     }
     
     private static String doDecodeEntities( String content ) {
 
         StringBuffer buff = new StringBuffer( content.length() + 1000 );
 
         Matcher m = entity_pattern.matcher( content );
 
         int begin = 0;
 
         boolean hasFilterDecodedEntities = false;
         boolean hasFilterFoundUnknownEntity = false;
 
         //FIXME: note that when I was benchmarking this code that this showed up
         //as a MAJOR bottleneck so we might want to optimize it a little more.
 
         while ( m.find() ) {
 
             buff.append( content.substring( begin, m.start() ) );
             
             String entity = m.group( 1 );
 
             String value = (String)LATIN1_ENTITIES.get( entity );
 
             if ( value != null ) {
                 buff.append( "&#" );
                 buff.append( value );
                 buff.append( ";" );
 
                 hasFilterDecodedEntities = true;
 
             } else {
 
                 //This is not a known entity so we have no way to correct it.
                 //If this is done then we have a problem and the feed probably
                 //still won't parse
                 buff.append( "&" );
                 buff.append( entity );
                 buff.append( ";" );
 
                 hasFilterFoundUnknownEntity = true;
             }
 
             begin = m.end( 0 );
             
         } 
 
         buff.append( content.substring( begin, content.length() ) );
 
         if ( hasFilterFoundUnknownEntity ) 
             log.warn( "Filter encountered unknown entities" );
 
         if ( hasFilterDecodedEntities ) 
             log.warn( "Filter has decoded latin1 entities." );
 
         return buff.toString();
         
     }
     
     public static void main( String[] args ) throws Exception {
 
         byte[] b = parse( "hello &eacute; world".getBytes() );
 
         String v = new String( b );
 
         System.out.println( "v: " + v );
         
     }
     
     static {
 
         // load the latin1 entity map.  We will replace latin1 entities with
         // their char references directly.  For example if someone incorrectly
         // references:
         //
         // &auml;
         //
         // we replace it with:
         //
         // &#228;
         //
         // Which is correct in Latin1
 
         // http://my.netscape.com/publish/formats/rss-0.91.dtd
 
         LATIN1_ENTITIES.put( "nbsp",      "160" );
         LATIN1_ENTITIES.put( "iexcl",     "161" );
         LATIN1_ENTITIES.put( "cent",      "162" );
         LATIN1_ENTITIES.put( "pound",     "163" );
         LATIN1_ENTITIES.put( "curren",    "164" );
         LATIN1_ENTITIES.put( "yen",       "165" );
         LATIN1_ENTITIES.put( "brvbar",    "166" );
         LATIN1_ENTITIES.put( "sect",      "167" );
         LATIN1_ENTITIES.put( "uml",       "168" );
         LATIN1_ENTITIES.put( "copy",      "169" );
         LATIN1_ENTITIES.put( "ordf",      "170" );
         LATIN1_ENTITIES.put( "laquo",     "171" );
         LATIN1_ENTITIES.put( "not",       "172" );
         LATIN1_ENTITIES.put( "shy",       "173" );
         LATIN1_ENTITIES.put( "reg",       "174" );
         LATIN1_ENTITIES.put( "macr",      "175" );
         LATIN1_ENTITIES.put( "deg",       "176" );
         LATIN1_ENTITIES.put( "plusmn",    "177" );
         LATIN1_ENTITIES.put( "sup2",      "178" );
         LATIN1_ENTITIES.put( "sup3",      "179" );
         LATIN1_ENTITIES.put( "acute",     "180" );
         LATIN1_ENTITIES.put( "micro",     "181" );
         LATIN1_ENTITIES.put( "para",      "182" );
         LATIN1_ENTITIES.put( "middot",    "183" );
         LATIN1_ENTITIES.put( "cedil",     "184" );
         LATIN1_ENTITIES.put( "sup1",      "185" );
         LATIN1_ENTITIES.put( "ordm",      "186" );
         LATIN1_ENTITIES.put( "raquo",     "187" );
         LATIN1_ENTITIES.put( "frac14",    "188" );
         LATIN1_ENTITIES.put( "frac12",    "189" );
         LATIN1_ENTITIES.put( "frac34",    "190" );
         LATIN1_ENTITIES.put( "iquest",    "191" );
         LATIN1_ENTITIES.put( "Agrave",    "192" );
         LATIN1_ENTITIES.put( "Aacute",    "193" );
         LATIN1_ENTITIES.put( "Acirc",     "194" );
         LATIN1_ENTITIES.put( "Atilde",    "195" );
         LATIN1_ENTITIES.put( "Auml",      "196" );
         LATIN1_ENTITIES.put( "Aring",     "197" );
         LATIN1_ENTITIES.put( "AElig",     "198" );
         LATIN1_ENTITIES.put( "Ccedil",    "199" );
         LATIN1_ENTITIES.put( "Egrave",    "200" );
         LATIN1_ENTITIES.put( "Eacute",    "201" );
         LATIN1_ENTITIES.put( "Ecirc",     "202" );
         LATIN1_ENTITIES.put( "Euml",      "203" );
         LATIN1_ENTITIES.put( "Igrave",    "204" );
         LATIN1_ENTITIES.put( "Iacute",    "205" );
         LATIN1_ENTITIES.put( "Icirc",     "206" );
         LATIN1_ENTITIES.put( "Iuml",      "207" );
         LATIN1_ENTITIES.put( "ETH",       "208" );
         LATIN1_ENTITIES.put( "Ntilde",    "209" );
         LATIN1_ENTITIES.put( "Ograve",    "210" );
         LATIN1_ENTITIES.put( "Oacute",    "211" );
         LATIN1_ENTITIES.put( "Ocirc",     "212" );
         LATIN1_ENTITIES.put( "Otilde",    "213" );
         LATIN1_ENTITIES.put( "Ouml",      "214" );
         LATIN1_ENTITIES.put( "times",     "215" );
         LATIN1_ENTITIES.put( "Oslash",    "216" );
         LATIN1_ENTITIES.put( "Ugrave",    "217" );
         LATIN1_ENTITIES.put( "Uacute",    "218" );
         LATIN1_ENTITIES.put( "Ucirc",     "219" );
         LATIN1_ENTITIES.put( "Uuml",      "220" );
         LATIN1_ENTITIES.put( "Yacute",    "221" );
         LATIN1_ENTITIES.put( "THORN",     "222" );
         LATIN1_ENTITIES.put( "szlig",     "223" );
         LATIN1_ENTITIES.put( "agrave",    "224" );
         LATIN1_ENTITIES.put( "aacute",    "225" );
         LATIN1_ENTITIES.put( "acirc",     "226" );
         LATIN1_ENTITIES.put( "atilde",    "227" );
         LATIN1_ENTITIES.put( "auml",      "228" );
         LATIN1_ENTITIES.put( "aring",     "229" );
         LATIN1_ENTITIES.put( "aelig",     "230" );
         LATIN1_ENTITIES.put( "ccedil",    "231" );
         LATIN1_ENTITIES.put( "egrave",    "232" );
         LATIN1_ENTITIES.put( "eacute",    "233" );
         LATIN1_ENTITIES.put( "ecirc",     "234" );
         LATIN1_ENTITIES.put( "euml",      "235" );
         LATIN1_ENTITIES.put( "igrave",    "236" );
         LATIN1_ENTITIES.put( "iacute",    "237" );
         LATIN1_ENTITIES.put( "icirc",     "238" );
         LATIN1_ENTITIES.put( "iuml",      "239" );
         LATIN1_ENTITIES.put( "eth",       "240" );
         LATIN1_ENTITIES.put( "ntilde",    "241" );
         LATIN1_ENTITIES.put( "ograve",    "242" );
         LATIN1_ENTITIES.put( "oacute",    "243" );
         LATIN1_ENTITIES.put( "ocirc",     "244" );
         LATIN1_ENTITIES.put( "otilde",    "245" );
         LATIN1_ENTITIES.put( "ouml",      "246" );
         LATIN1_ENTITIES.put( "divide",    "247" );
         LATIN1_ENTITIES.put( "oslash",    "248" );
         LATIN1_ENTITIES.put( "ugrave",    "249" );
         LATIN1_ENTITIES.put( "uacute",    "250" );
         LATIN1_ENTITIES.put( "ucirc",     "251" );
         LATIN1_ENTITIES.put( "uuml",      "252" );
         LATIN1_ENTITIES.put( "yacute",    "253" );
         LATIN1_ENTITIES.put( "thorn",     "254" );
         LATIN1_ENTITIES.put( "yuml",      "255" );
 
     }
     
 }

1		/*
2		* Copyright 1999,2004 The Apache Software Foundation.
3		*
4		* Licensed under the Apache License, Version 2.0 (the "License");
5		* you may not use this file except in compliance with the License.
6		* You may obtain a copy of the License at
7		*
8		* http://www.apache.org/licenses/LICENSE-2.0
9		*
10		* Unless required by applicable law or agreed to in writing, software
11		* distributed under the License is distributed on an "AS IS" BASIS,
12		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13		* See the License for the specific language governing permissions and
14		* limitations under the License.
15		*/
16
17		package org.apache.commons.feedparser;
18
19		import java.util.HashMap;
20		import java.util.regex.Matcher;
21		import java.util.regex.Pattern;
22
23		import org.apache.log4j.Logger;
24
25		/**
26		*
27		* @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
28		* @version $Id: FeedFilter.java 373614 2006-01-30 22:31:21Z mvdb $
29		*/
30	0	public class FeedFilter {
31
32	0	private static Logger log = Logger.getLogger( FeedFilter.class );
33
34	0	public static boolean DO_REMOVE_LEADING_PROLOG = true;
35	0	public static boolean DO_DECODE_ENTITIES = true;
36
37	0	public static HashMap LATIN1_ENTITIES = new HashMap();
38
39	0	private static Pattern entity_pattern = Pattern.compile( "&([a-zA-Z]+);" );
40
41		/**
42		* This is probably the wrong behavior. I shouldn't call this method I
43		* think because assuming a content type is bad form.
44		*
45		* @deprecated Specify an encoding with #parse( bytes[], encoding )
46		*
47		*/
48		public static byte[] parse( byte[] bytes )
49		throws Exception {
50
51	0	return parse( bytes, "UTF-8" );
52
53		}
54
55		public static byte[] parse( byte[] bytes, String encoding )
56		throws Exception {
57
58	0	String content = new String( bytes, encoding );
59
60	0	return parse( content, encoding );
61
62		}
63
64		/**
65		* Parse out an input string of content.
66		*
67		* http://wiki.apache.org/jakarta-commons/FeedParser_2fStringAllocationConsideredHelpful
68		*
69		*
70		*/
71		public static byte[] parse( String content, String encoding )
72		throws Exception {
73
74		//FIXME: return an object here so that I can flag a bozo bit.
75
76		//remove leading prolog...
77	0	if ( DO_REMOVE_LEADING_PROLOG )
78	0	content = doRemoveLeadingProlog( content, encoding );
79
80		//decode HTML entities that are referenced.
81	0	if ( DO_DECODE_ENTITIES )
82	0	content = doDecodeEntities( content );
83
84		//TODO: undeclared namespace prefixes should be expanded to their common
85		//form. 'rdf, 'atom', 'xhtml' etc. Considering that they're will only be
86		//a handful H and then 4^36 different possibilities the probability will
87		//only be H in 4^36 which is pretty good that we won't have a false
88		//positive.
89
90	0	return content.getBytes( encoding );
91
92		}
93
94		/**
95		* Removing prolog whitespace, comments, and other garbage from the
96		* beginning of a feed.
97		*
98		*
99		*/
100		private static String doRemoveLeadingProlog( String content, String encoding ) {
101
102		// if we're a UTF-16 or UTF-32 feed we need to LEAVE the prolog because
103		// it triggers a UTF-16 parse due to the BOM.
104		//
105		// FIXME: this isn't actually true. We should leave the BOM and remove
106		// the prolog anyway due to the fact that this will still break the
107		// parser. Come up with some tests for UTF-16 to see if I can get it to
108		// break and then update this method.
109
110	0	if ( "UTF-16".equals( encoding ) \|\|
111		"UTF-32".equals( encoding ) )
112	0	return content;
113
114		//move to the beginning of the first element or comment. When this is a
115		//processing instruction we will move to that
116	0	int begin = content.indexOf( "<" );
117
118	0	if ( begin > 0 ) {
119	0	content = content.substring( begin, content.length() );
120	0	log.warn( "Skipped whitespace in prolog and moved towards first element." );
121		}
122
123		//now skip to the XML processing instruction when necessary. This is
124		//used to remove comments prior to <?xml which are not allowed.
125
126	0	begin = content.indexOf( "<?xml" );
127
128	0	if ( begin > 0 ) {
129	0	content = content.substring( begin, content.length() );
130	0	log.warn( "Removed prolog towards first processing instruction." );
131		}
132
133	0	content = doRemoveElementProlog( content );
134
135	0	return content;
136
137		}
138
139		/**
140		* Remove element content between:
141		*
142		* <?xml version="1.0"?>
143		*
144		* THIS IS BROKEN PROLOG
145		*
146		* <foo>
147		*
148		*
149		*/
150		private static String doRemoveElementProlog( String content ) {
151
152	0	int end = content.lastIndexOf( "?>", 100 );
153
154	0	if ( end == -1 )
155	0	return content;
156
157	0	StringBuffer buff = new StringBuffer( content.length() );
158	0	end = end + 2;
159	0	buff.append( content.substring( 0, end ) );
160
161	0	int begin = content.indexOf( "<", end );
162
163	0	if ( begin != -1 ) {
164
165	0	buff.append( "\n" );
166	0	buff.append( content.substring( begin, content.length() ) );
167
168		}
169
170	0	return buff.toString();
171
172		}
173
174		private static String doDecodeEntities( String content ) {
175
176	0	StringBuffer buff = new StringBuffer( content.length() + 1000 );
177
178	0	Matcher m = entity_pattern.matcher( content );
179
180	0	int begin = 0;
181
182	0	boolean hasFilterDecodedEntities = false;
183	0	boolean hasFilterFoundUnknownEntity = false;
184
185		//FIXME: note that when I was benchmarking this code that this showed up
186		//as a MAJOR bottleneck so we might want to optimize it a little more.
187
188	0	while ( m.find() ) {
189
190	0	buff.append( content.substring( begin, m.start() ) );
191
192	0	String entity = m.group( 1 );
193
194	0	String value = (String)LATIN1_ENTITIES.get( entity );
195
196	0	if ( value != null ) {
197	0	buff.append( "&#" );
198	0	buff.append( value );
199	0	buff.append( ";" );
200
201	0	hasFilterDecodedEntities = true;
202
203		} else {
204
205		//This is not a known entity so we have no way to correct it.
206		//If this is done then we have a problem and the feed probably
207		//still won't parse
208	0	buff.append( "&" );
209	0	buff.append( entity );
210	0	buff.append( ";" );
211
212	0	hasFilterFoundUnknownEntity = true;
213		}
214
215	0	begin = m.end( 0 );
216
217	0	}
218
219	0	buff.append( content.substring( begin, content.length() ) );
220
221	0	if ( hasFilterFoundUnknownEntity )
222	0	log.warn( "Filter encountered unknown entities" );
223
224	0	if ( hasFilterDecodedEntities )
225	0	log.warn( "Filter has decoded latin1 entities." );
226
227	0	return buff.toString();
228
229		}
230
231		public static void main( String[] args ) throws Exception {
232
233	0	byte[] b = parse( "hello é world".getBytes() );
234
235	0	String v = new String( b );
236
237	0	System.out.println( "v: " + v );
238
239	0	}
240
241		static {
242
243		// load the latin1 entity map. We will replace latin1 entities with
244		// their char references directly. For example if someone incorrectly
245		// references:
246		//
247		// ä
248		//
249		// we replace it with:
250		//
251		// ä
252		//
253		// Which is correct in Latin1
254
255		// http://my.netscape.com/publish/formats/rss-0.91.dtd
256
257	0	LATIN1_ENTITIES.put( "nbsp", "160" );
258	0	LATIN1_ENTITIES.put( "iexcl", "161" );
259	0	LATIN1_ENTITIES.put( "cent", "162" );
260	0	LATIN1_ENTITIES.put( "pound", "163" );
261	0	LATIN1_ENTITIES.put( "curren", "164" );
262	0	LATIN1_ENTITIES.put( "yen", "165" );
263	0	LATIN1_ENTITIES.put( "brvbar", "166" );
264	0	LATIN1_ENTITIES.put( "sect", "167" );
265	0	LATIN1_ENTITIES.put( "uml", "168" );
266	0	LATIN1_ENTITIES.put( "copy", "169" );
267	0	LATIN1_ENTITIES.put( "ordf", "170" );
268	0	LATIN1_ENTITIES.put( "laquo", "171" );
269	0	LATIN1_ENTITIES.put( "not", "172" );
270	0	LATIN1_ENTITIES.put( "shy", "173" );
271	0	LATIN1_ENTITIES.put( "reg", "174" );
272	0	LATIN1_ENTITIES.put( "macr", "175" );
273	0	LATIN1_ENTITIES.put( "deg", "176" );
274	0	LATIN1_ENTITIES.put( "plusmn", "177" );
275	0	LATIN1_ENTITIES.put( "sup2", "178" );
276	0	LATIN1_ENTITIES.put( "sup3", "179" );
277	0	LATIN1_ENTITIES.put( "acute", "180" );
278	0	LATIN1_ENTITIES.put( "micro", "181" );
279	0	LATIN1_ENTITIES.put( "para", "182" );
280	0	LATIN1_ENTITIES.put( "middot", "183" );
281	0	LATIN1_ENTITIES.put( "cedil", "184" );
282	0	LATIN1_ENTITIES.put( "sup1", "185" );
283	0	LATIN1_ENTITIES.put( "ordm", "186" );
284	0	LATIN1_ENTITIES.put( "raquo", "187" );
285	0	LATIN1_ENTITIES.put( "frac14", "188" );
286	0	LATIN1_ENTITIES.put( "frac12", "189" );
287	0	LATIN1_ENTITIES.put( "frac34", "190" );
288	0	LATIN1_ENTITIES.put( "iquest", "191" );
289	0	LATIN1_ENTITIES.put( "Agrave", "192" );
290	0	LATIN1_ENTITIES.put( "Aacute", "193" );
291	0	LATIN1_ENTITIES.put( "Acirc", "194" );
292	0	LATIN1_ENTITIES.put( "Atilde", "195" );
293	0	LATIN1_ENTITIES.put( "Auml", "196" );
294	0	LATIN1_ENTITIES.put( "Aring", "197" );
295	0	LATIN1_ENTITIES.put( "AElig", "198" );
296	0	LATIN1_ENTITIES.put( "Ccedil", "199" );
297	0	LATIN1_ENTITIES.put( "Egrave", "200" );
298	0	LATIN1_ENTITIES.put( "Eacute", "201" );
299	0	LATIN1_ENTITIES.put( "Ecirc", "202" );
300	0	LATIN1_ENTITIES.put( "Euml", "203" );
301	0	LATIN1_ENTITIES.put( "Igrave", "204" );
302	0	LATIN1_ENTITIES.put( "Iacute", "205" );
303	0	LATIN1_ENTITIES.put( "Icirc", "206" );
304	0	LATIN1_ENTITIES.put( "Iuml", "207" );
305	0	LATIN1_ENTITIES.put( "ETH", "208" );
306	0	LATIN1_ENTITIES.put( "Ntilde", "209" );
307	0	LATIN1_ENTITIES.put( "Ograve", "210" );
308	0	LATIN1_ENTITIES.put( "Oacute", "211" );
309	0	LATIN1_ENTITIES.put( "Ocirc", "212" );
310	0	LATIN1_ENTITIES.put( "Otilde", "213" );
311	0	LATIN1_ENTITIES.put( "Ouml", "214" );
312	0	LATIN1_ENTITIES.put( "times", "215" );
313	0	LATIN1_ENTITIES.put( "Oslash", "216" );
314	0	LATIN1_ENTITIES.put( "Ugrave", "217" );
315	0	LATIN1_ENTITIES.put( "Uacute", "218" );
316	0	LATIN1_ENTITIES.put( "Ucirc", "219" );
317	0	LATIN1_ENTITIES.put( "Uuml", "220" );
318	0	LATIN1_ENTITIES.put( "Yacute", "221" );
319	0	LATIN1_ENTITIES.put( "THORN", "222" );
320	0	LATIN1_ENTITIES.put( "szlig", "223" );
321	0	LATIN1_ENTITIES.put( "agrave", "224" );
322	0	LATIN1_ENTITIES.put( "aacute", "225" );
323	0	LATIN1_ENTITIES.put( "acirc", "226" );
324	0	LATIN1_ENTITIES.put( "atilde", "227" );
325	0	LATIN1_ENTITIES.put( "auml", "228" );
326	0	LATIN1_ENTITIES.put( "aring", "229" );
327	0	LATIN1_ENTITIES.put( "aelig", "230" );
328	0	LATIN1_ENTITIES.put( "ccedil", "231" );
329	0	LATIN1_ENTITIES.put( "egrave", "232" );
330	0	LATIN1_ENTITIES.put( "eacute", "233" );
331	0	LATIN1_ENTITIES.put( "ecirc", "234" );
332	0	LATIN1_ENTITIES.put( "euml", "235" );
333	0	LATIN1_ENTITIES.put( "igrave", "236" );
334	0	LATIN1_ENTITIES.put( "iacute", "237" );
335	0	LATIN1_ENTITIES.put( "icirc", "238" );
336	0	LATIN1_ENTITIES.put( "iuml", "239" );
337	0	LATIN1_ENTITIES.put( "eth", "240" );
338	0	LATIN1_ENTITIES.put( "ntilde", "241" );
339	0	LATIN1_ENTITIES.put( "ograve", "242" );
340	0	LATIN1_ENTITIES.put( "oacute", "243" );
341	0	LATIN1_ENTITIES.put( "ocirc", "244" );
342	0	LATIN1_ENTITIES.put( "otilde", "245" );
343	0	LATIN1_ENTITIES.put( "ouml", "246" );
344	0	LATIN1_ENTITIES.put( "divide", "247" );
345	0	LATIN1_ENTITIES.put( "oslash", "248" );
346	0	LATIN1_ENTITIES.put( "ugrave", "249" );
347	0	LATIN1_ENTITIES.put( "uacute", "250" );
348	0	LATIN1_ENTITIES.put( "ucirc", "251" );
349	0	LATIN1_ENTITIES.put( "uuml", "252" );
350	0	LATIN1_ENTITIES.put( "yacute", "253" );
351	0	LATIN1_ENTITIES.put( "thorn", "254" );
352	0	LATIN1_ENTITIES.put( "yuml", "255" );
353
354	0	}
355
356		}