Coverage Report -
Classes in this File Line Coverage Branch Coverage Complexity
  * Copyright 1999,2004 The Apache Software Foundation.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  * Given an XML document pull out the encoding or the default (UTF-8) if not
  * specified.
  * @author <a href="">Kevin A. Burton</a>
 27  0
 public class XMLEncodingParser {
     public static final String ENCODING = "encoding=\"";
     public static String parse( byte[] content ) throws Exception {
         //this isn't really pretty but it is fast.
         //just use the first 100 bytes
         String str;
 43  0
         if ( content.length > 100 ) {
 44  0
             str = new String( content, 0, 100 );
         } else {
 46  0
             str = new String( content );
 49  0
         String result = getEncodingFromBOM( content );
 51  0
         if ( result != null )
 52  0
             return result;
 54  0
         int end = str.indexOf( ">" );
 56  0
         if ( end == -1 )
 57  0
             return "UTF-8";
 59  0
         String decl = str.substring( 0, end );
 61  0
         int index = decl.indexOf( ENCODING );
 63  0
         if ( index != -1 ) {
 65  0
             String encoding = decl.substring( index + ENCODING.length(),
                                               decl.length() );
 68  0
             end = encoding.indexOf( "\"" );
 70  0
             if ( end == -1 )
 71  0
                 return "UTF-8";
 73  0
             encoding = encoding.substring( 0, end);
 74  0
             encoding = encoding.toUpperCase();
 76  0
             if ( "UTF8".equals( encoding ) )
 77  0
                 encoding = "UTF-8";
 79  0
             return encoding;
 83  0
         return "UTF-8";
     private static String getEncodingFromBOM( byte[] content ) {
         // Technically speaking if we see a BOM is specified we're supposed to
         // return UTF-16 or UTF-32 but because we only care about anything UTF
         // returning UTF-8 is incorrect but acceptable.
 95  0
         if ( content.length > 2 ) {
             //perform UTF-16 tests
 98  0
             if ( content[0] == -1 &&
                  content[1] == -2 ) 
 100  0
                 return "UTF-16";
 102  0
             if ( content[0] == -2 &&
                  content[1] == -1 ) 
 104  0
                 return "UTF-16";
 108  0
         if ( content.length > 4 ) {
             //perform UTF-16 tests
 111  0
             if ( content[0] == 0 &&
                  content[1] == 0 &&
                  content[2] == -2 &&
                  content[3] == -1 ) 
 115  0
                 return "UTF-32";
 117  0
             if ( content[0] == -1 &&
                  content[1] == -2 &&
                  content[2] == 0 &&
                  content[3] == 0 ) 
 121  0
                 return "UTF-32";
 125  0
         return null;
     public static void main( String[] args ) throws Exception {
 131  0
         System.out.println( parse( "<?xml encoding=\"utf-8\"?>".getBytes() ) );
 132  0
         System.out.println( parse( "<?xml encoding=\"UTF-8\"?>".getBytes() ) );
 133  0
         System.out.println( parse( "<?xml encoding=\"utf8\"?>".getBytes() ) );
 135  0