Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
EntityDecoder |
|
| 4.0;4 |
1 | /* | |
2 | * Copyright 1999,2004 The Apache Software Foundation. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | package org.apache.commons.feedparser.locate; | |
18 | ||
19 | import java.util.HashMap; | |
20 | import java.util.regex.Matcher; | |
21 | import java.util.regex.Pattern; | |
22 | ||
23 | /** | |
24 | * | |
25 | * Given a string of HTML content we decode the entities it contains. | |
26 | * | |
27 | * NOTE: Currently this is a trivial implementation and we need to go through | |
28 | * and make sure all HTML entities are correctly supported. | |
29 | * | |
30 | * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a> | |
31 | * @version $Id: EntityDecoder.java 373622 2006-01-30 22:53:00Z mvdb $ | |
32 | */ | |
33 | 0 | public class EntityDecoder { |
34 | ||
35 | //FIXME: see FeedFilter.java for a list of all valid HTML entities. I | |
36 | //should replace them with character literals in this situation. | |
37 | ||
38 | 0 | private static HashMap entities = new HashMap(); |
39 | ||
40 | 0 | static Pattern pattern = Pattern.compile( "&([a-z]+);" ); |
41 | ||
42 | static { | |
43 | ||
44 | //FIXME: there are a LOT more of these and we need an exhaustive colleciton. | |
45 | ||
46 | 0 | entities.put( "gt", ">" ); |
47 | 0 | entities.put( "apos", ">" ); |
48 | 0 | entities.put( "lt", "<" ); |
49 | 0 | entities.put( "amp", "&" ); |
50 | ||
51 | //FIXME: | |
52 | 0 | entities.put( "raquo", "" ); |
53 | 0 | entities.put( "laquo", "" ); |
54 | ||
55 | 0 | } |
56 | ||
57 | /** | |
58 | * Decode content. If a null is passed in we return null. | |
59 | * | |
60 | * | |
61 | */ | |
62 | public static String decode( String content ) { | |
63 | ||
64 | 0 | if ( content == null ) |
65 | 0 | return null; |
66 | ||
67 | //FIXME(performance): do I have existing code that does this more efficiently? | |
68 | 0 | if (content == null) |
69 | 0 | return null; |
70 | ||
71 | 0 | StringBuffer buff = new StringBuffer( content.length() ); |
72 | ||
73 | 0 | Matcher m = pattern.matcher( content ); |
74 | ||
75 | 0 | int index = 0; |
76 | 0 | while ( m.find() ) { |
77 | ||
78 | //figure out which entity to escape or just include it. | |
79 | ||
80 | 0 | buff.append( content.substring( index, m.start( 0 ) ) ); |
81 | ||
82 | 0 | String entity = m.group( 1 ); |
83 | ||
84 | 0 | if ( entities.containsKey( entity ) ) { |
85 | 0 | buff.append( entities.get( entity ) ); |
86 | } else { | |
87 | //found an entity we no NOTHING about. Should we warn? | |
88 | ||
89 | 0 | buff.append( m.group( 0 ) ); |
90 | } | |
91 | ||
92 | 0 | index = m.end( 0 ); |
93 | ||
94 | 0 | } |
95 | ||
96 | 0 | buff.append( content.substring( index, content.length() ) ); |
97 | ||
98 | 0 | return buff.toString(); |
99 | ||
100 | } | |
101 | ||
102 | public static void main( String[] args ) throws Exception { | |
103 | ||
104 | 0 | System.out.println( decode( "&" ) ); |
105 | 0 | System.out.println( decode( "asdf&asdf" ) ); |
106 | ||
107 | 0 | System.out.println( decode( "asdf&" ) ); |
108 | ||
109 | 0 | System.out.println( decode( "&asdf" ) ); |
110 | ||
111 | 0 | } |
112 | ||
113 | } |