Coverage Report - org.apache.commons.feedparser.locate.EntityDecoder
 
Classes in this File Line Coverage Branch Coverage Complexity
EntityDecoder
0%
0/32
0%
0/8
4
 
 1  
 /*
 2  
  * Copyright 1999,2004 The Apache Software Foundation.
 3  
  * 
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  * 
 8  
  *      http://www.apache.org/licenses/LICENSE-2.0
 9  
  * 
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 
 17  
 package org.apache.commons.feedparser.locate;
 18  
 
 19  
 import java.util.HashMap;
 20  
 import java.util.regex.Matcher;
 21  
 import java.util.regex.Pattern;
 22  
 
 23  
 /**
 24  
  *
 25  
  * Given a string of HTML content we decode the entities it contains.
 26  
  *
 27  
  * NOTE: Currently this is a trivial implementation and we need to go through
 28  
  * and make sure all HTML entities are correctly supported.
 29  
  * 
 30  
  * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
 31  
  * @version $Id: EntityDecoder.java 373622 2006-01-30 22:53:00Z mvdb $
 32  
  */
 33  0
 public class EntityDecoder {
 34  
 
 35  
     //FIXME: see FeedFilter.java for a list of all valid HTML entities.  I
 36  
     //should replace them with character literals in this situation.
 37  
     
 38  0
     private static HashMap entities = new HashMap();
 39  
 
 40  0
     static Pattern pattern = Pattern.compile( "&([a-z]+);" );
 41  
     
 42  
     static {
 43  
 
 44  
         //FIXME: there are a LOT more of these and we need an exhaustive colleciton.
 45  
         
 46  0
         entities.put( "gt", ">" );
 47  0
         entities.put( "apos", ">" );
 48  0
         entities.put( "lt", "<" );
 49  0
         entities.put( "amp", "&" );
 50  
 
 51  
         //FIXME: 
 52  0
         entities.put( "raquo", "" );
 53  0
         entities.put( "laquo", "" );
 54  
         
 55  0
     }
 56  
 
 57  
     /**
 58  
      * Decode content.  If a null is passed in we return null. 
 59  
      *
 60  
      * 
 61  
      */
 62  
     public static String decode( String content ) {
 63  
 
 64  0
         if ( content == null )
 65  0
             return null;
 66  
         
 67  
         //FIXME(performance): do I have existing code that does this more efficiently?
 68  0
         if (content == null)
 69  0
             return null;
 70  
 
 71  0
         StringBuffer buff = new StringBuffer( content.length() );
 72  
 
 73  0
         Matcher m = pattern.matcher( content );
 74  
         
 75  0
         int index = 0;
 76  0
         while ( m.find() ) {
 77  
 
 78  
             //figure out which entity to escape or just include it.
 79  
 
 80  0
             buff.append( content.substring( index, m.start( 0 ) ) );
 81  
 
 82  0
             String entity = m.group( 1 );
 83  
 
 84  0
             if ( entities.containsKey( entity ) ) {
 85  0
                 buff.append( entities.get( entity ) );
 86  
             } else {
 87  
                 //found an entity we no NOTHING about.  Should we warn?
 88  
                 
 89  0
                 buff.append( m.group( 0 ) );
 90  
             }
 91  
 
 92  0
             index = m.end( 0 );
 93  
 
 94  0
         }
 95  
 
 96  0
         buff.append( content.substring( index, content.length() ) );
 97  
 
 98  0
         return buff.toString();
 99  
         
 100  
     }
 101  
 
 102  
     public static void main( String[] args ) throws Exception {
 103  
 
 104  0
         System.out.println( decode( "&amp;" ) );
 105  0
         System.out.println( decode( "asdf&amp;asdf" ) );
 106  
 
 107  0
         System.out.println( decode( "asdf&amp;" ) );
 108  
 
 109  0
         System.out.println( decode( "&amp;asdf" ) );
 110  
 
 111  0
     }
 112  
 
 113  
 }