1 package org.apache.maven.shared.utils; 2 3 /* 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 */ 21 22 import java.io.ByteArrayInputStream; 23 import java.io.IOException; 24 import java.io.InputStream; 25 import java.io.SequenceInputStream; 26 27 28 import junit.framework.ComparisonFailure; 29 import junit.framework.TestCase; 30 import org.apache.maven.shared.utils.io.IOUtil; 31 import org.apache.maven.shared.utils.xml.XmlStreamReader; 32 33 /** 34 * 35 * @author <a href="mailto:hboutemy@apache.org">Hervé Boutemy</a> 36 */ 37 public class XmlStreamReaderTest 38 extends TestCase 39 { 40 /** french */ 41 private static final String TEXT_LATIN1 = "eacute: \u00E9"; 42 /** greek */ 43 private static final String TEXT_LATIN7 = "alpha: \u03B1"; 44 /** euro support */ 45 private static final String TEXT_LATIN15 = "euro: \u20AC"; 46 /** japanese */ 47 private static final String TEXT_EUC_JP = "hiragana A: \u3042"; 48 /** Unicode: support everything */ 49 private static final String TEXT_UNICODE = 50 TEXT_LATIN1 + ", " + 51 TEXT_LATIN7 + ", " + 52 TEXT_LATIN15 + ", " + 53 TEXT_EUC_JP; 54 /** see http://unicode.org/faq/utf_bom.html#BOM */ 55 private static final byte[] BOM_UTF8 = { (byte)0xEF, (byte)0xBB, (byte)0xBF }; 56 private static final byte[] BOM_UTF16BE = { (byte)0xFE, (byte)0xFF }; 57 private static final byte[] BOM_UTF16LE = { (byte)0xFF, (byte)0xFE }; 58 private static final byte[] BOM_UTF32BE = { (byte)0x00, (byte)0x00, (byte)0xFF, (byte)0xFE }; 59 private static final byte[] BOM_UTF32LE = { (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00 }; 60 61 private static String createXmlContent( String text, String encoding ) 62 { 63 String xmlDecl = "<?xml version=\"1.0\"?>"; 64 if ( encoding != null ) 65 { 66 xmlDecl = "<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>"; 67 } 68 return xmlDecl + "\n<text>" + text + "</text>"; 69 } 70 71 private static void checkXmlContent( String xml, String encoding ) 72 throws IOException 73 { 74 checkXmlContent( xml, encoding, null ); 75 } 76 77 private static void checkXmlContent( String xml, String encoding, byte[] bom ) 78 throws IOException 79 { 80 byte[] xmlContent = xml.getBytes( encoding ); 81 InputStream in = new ByteArrayInputStream( xmlContent ); 82 83 if ( bom != null ) 84 { 85 in = new SequenceInputStream( new ByteArrayInputStream( bom ), in ); 86 } 87 88 XmlStreamReader reader = new XmlStreamReader( in ); 89 assertEquals( encoding, reader.getEncoding() ); 90 String result = IOUtil.toString( reader ); 91 assertEquals( xml, result ); 92 } 93 94 private static void checkXmlStreamReader( String text, String encoding, String effectiveEncoding ) 95 throws IOException 96 { 97 checkXmlStreamReader( text, encoding, effectiveEncoding, null ); 98 } 99 100 private static void checkXmlStreamReader( String text, String encoding ) 101 throws IOException 102 { 103 checkXmlStreamReader( text, encoding, encoding, null ); 104 } 105 106 private static void checkXmlStreamReader( String text, String encoding, byte[] bom ) 107 throws IOException 108 { 109 checkXmlStreamReader( text, encoding, encoding, bom ); 110 } 111 112 private static void checkXmlStreamReader( String text, String encoding, String effectiveEncoding, byte[] bom ) 113 throws IOException 114 { 115 String xml = createXmlContent( text, encoding ); 116 checkXmlContent( xml, effectiveEncoding, bom ); 117 } 118 119 public void testNoXmlHeader() 120 throws IOException 121 { 122 String xml = "<text>text with no XML header</text>"; 123 checkXmlContent( xml, "UTF-8" ); 124 checkXmlContent( xml, "UTF-8", BOM_UTF8 ); 125 } 126 127 public void testDefaultEncoding() 128 throws IOException 129 { 130 checkXmlStreamReader( TEXT_UNICODE, null, "UTF-8" ); 131 checkXmlStreamReader( TEXT_UNICODE, null, "UTF-8", BOM_UTF8 ); 132 } 133 134 public void testUTF8Encoding() 135 throws IOException 136 { 137 checkXmlStreamReader( TEXT_UNICODE, "UTF-8" ); 138 checkXmlStreamReader( TEXT_UNICODE, "UTF-8", BOM_UTF8 ); 139 } 140 141 public void testUTF16Encoding() 142 throws IOException 143 { 144 checkXmlStreamReader( TEXT_UNICODE, "UTF-16", "UTF-16BE", null ); 145 checkXmlStreamReader( TEXT_UNICODE, "UTF-16", "UTF-16LE", BOM_UTF16LE ); 146 checkXmlStreamReader( TEXT_UNICODE, "UTF-16", "UTF-16BE", BOM_UTF16BE ); 147 } 148 149 public void testUTF16BEEncoding() 150 throws IOException 151 { 152 checkXmlStreamReader( TEXT_UNICODE, "UTF-16BE" ); 153 } 154 155 public void testUTF16LEEncoding() 156 throws IOException 157 { 158 checkXmlStreamReader( TEXT_UNICODE, "UTF-16LE" ); 159 } 160 161 public void testLatin1Encoding() 162 throws IOException 163 { 164 checkXmlStreamReader( TEXT_LATIN1, "ISO-8859-1" ); 165 } 166 167 public void testLatin7Encoding() 168 throws IOException 169 { 170 checkXmlStreamReader( TEXT_LATIN7, "ISO-8859-7" ); 171 } 172 173 public void testLatin15Encoding() 174 throws IOException 175 { 176 checkXmlStreamReader( TEXT_LATIN15, "ISO-8859-15" ); 177 } 178 179 public void testEUC_JPEncoding() 180 throws IOException 181 { 182 checkXmlStreamReader( TEXT_EUC_JP, "EUC-JP" ); 183 } 184 185 public void testEBCDICEncoding() 186 throws IOException 187 { 188 checkXmlStreamReader( "simple text in EBCDIC", "CP1047" ); 189 } 190 191 public void testInappropriateEncoding() 192 throws IOException 193 { 194 try 195 { 196 checkXmlStreamReader( TEXT_UNICODE, "ISO-8859-2" ); 197 fail( "Check should have failed, since some characters are not available in the specified encoding" ); 198 } 199 catch ( ComparisonFailure cf ) 200 { 201 // expected failure, since the encoding does not contain some characters 202 } 203 } 204 205 public void testEncodingAttribute() 206 throws IOException 207 { 208 String xml = "<?xml version='1.0' encoding='US-ASCII'?><element encoding='attribute value'/>"; 209 checkXmlContent( xml, "US-ASCII" ); 210 211 xml = "<?xml version='1.0' encoding = 'US-ASCII' ?><element encoding='attribute value'/>"; 212 checkXmlContent( xml, "US-ASCII" ); 213 214 xml = "<?xml version='1.0'?><element encoding='attribute value'/>"; 215 checkXmlContent( xml, "UTF-8" ); 216 217 xml = "<?xml\nversion='1.0'\nencoding\n=\n'US-ASCII'\n?>\n<element encoding='attribute value'/>"; 218 checkXmlContent( xml, "US-ASCII" ); 219 220 xml = "<?xml\nversion='1.0'\n?>\n<element encoding='attribute value'/>"; 221 checkXmlContent( xml, "UTF-8" ); 222 223 xml = "<element encoding='attribute value'/>"; 224 checkXmlContent( xml, "UTF-8" ); 225 } 226 }