View Javadoc
1   package org.apache.maven.shared.utils;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *  http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.ByteArrayInputStream;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.SequenceInputStream;
26  
27  
28  import junit.framework.ComparisonFailure;
29  import junit.framework.TestCase;
30  import org.apache.maven.shared.utils.io.IOUtil;
31  import org.apache.maven.shared.utils.xml.XmlStreamReader;
32  
33  /**
34   * 
35   * @author <a href="mailto:hboutemy@apache.org">Hervé Boutemy</a>
36   */
37  public class XmlStreamReaderTest
38      extends TestCase
39  {
40      /** french */
41      private static final String TEXT_LATIN1 = "eacute: \u00E9";
42      /** greek */
43      private static final String TEXT_LATIN7 = "alpha: \u03B1";
44      /** euro support */
45      private static final String TEXT_LATIN15 = "euro: \u20AC";
46      /** japanese */
47      private static final String TEXT_EUC_JP = "hiragana A: \u3042";
48      /** Unicode: support everything */
49      private static final String TEXT_UNICODE =
50          TEXT_LATIN1 + ", " +
51          TEXT_LATIN7 + ", " +
52          TEXT_LATIN15 + ", " +
53          TEXT_EUC_JP;
54      /** see http://unicode.org/faq/utf_bom.html#BOM */
55      private static final byte[] BOM_UTF8 = { (byte)0xEF, (byte)0xBB, (byte)0xBF };
56      private static final byte[] BOM_UTF16BE = { (byte)0xFE, (byte)0xFF };
57      private static final byte[] BOM_UTF16LE = { (byte)0xFF, (byte)0xFE };
58      private static final byte[] BOM_UTF32BE = { (byte)0x00, (byte)0x00, (byte)0xFF, (byte)0xFE };
59      private static final byte[] BOM_UTF32LE = { (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00 };
60  
61      private static String createXmlContent( String text, String encoding )
62      {
63          String xmlDecl = "<?xml version=\"1.0\"?>";
64          if ( encoding != null )
65          {
66              xmlDecl = "<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>";
67          }
68          return xmlDecl + "\n<text>" + text + "</text>";
69      }
70  
71      private static void checkXmlContent( String xml, String encoding )
72      throws IOException
73      {
74          checkXmlContent( xml, encoding, null );
75      }
76  
77      private static void checkXmlContent( String xml, String encoding, byte[] bom )
78      throws IOException
79      {
80          byte[] xmlContent = xml.getBytes( encoding );
81          InputStream in = new ByteArrayInputStream( xmlContent );
82  
83          if ( bom != null )
84          {
85              in = new SequenceInputStream( new ByteArrayInputStream( bom ), in );
86          }
87  
88          XmlStreamReader reader = new XmlStreamReader( in );
89          assertEquals( encoding, reader.getEncoding() );
90          String result = IOUtil.toString( reader );
91          assertEquals( xml, result );
92      }
93  
94      private static void checkXmlStreamReader( String text, String encoding, String effectiveEncoding )
95      throws IOException
96      {
97          checkXmlStreamReader( text, encoding, effectiveEncoding, null );
98      }
99  
100     private static void checkXmlStreamReader( String text, String encoding )
101     throws IOException
102     {
103         checkXmlStreamReader( text, encoding, encoding, null );
104     }
105 
106     private static void checkXmlStreamReader( String text, String encoding, byte[] bom )
107     throws IOException
108     {
109         checkXmlStreamReader( text, encoding, encoding, bom );
110     }
111 
112     private static void checkXmlStreamReader( String text, String encoding, String effectiveEncoding, byte[] bom )
113     throws IOException
114     {
115         String xml = createXmlContent( text, encoding );
116         checkXmlContent( xml, effectiveEncoding, bom );
117     }
118 
119     public void testNoXmlHeader()
120     throws IOException
121     {
122         String xml = "<text>text with no XML header</text>";
123         checkXmlContent( xml, "UTF-8" );
124         checkXmlContent( xml, "UTF-8", BOM_UTF8 );
125     }
126 
127     public void testDefaultEncoding()
128     throws IOException
129     {
130         checkXmlStreamReader( TEXT_UNICODE, null, "UTF-8" );
131         checkXmlStreamReader( TEXT_UNICODE, null, "UTF-8", BOM_UTF8 );
132     }
133 
134     public void testUTF8Encoding()
135     throws IOException
136     {
137         checkXmlStreamReader( TEXT_UNICODE, "UTF-8" );
138         checkXmlStreamReader( TEXT_UNICODE, "UTF-8", BOM_UTF8 );
139     }
140 
141     public void testUTF16Encoding()
142     throws IOException
143     {
144         checkXmlStreamReader( TEXT_UNICODE, "UTF-16", "UTF-16BE", null );
145         checkXmlStreamReader( TEXT_UNICODE, "UTF-16", "UTF-16LE", BOM_UTF16LE );
146         checkXmlStreamReader( TEXT_UNICODE, "UTF-16", "UTF-16BE", BOM_UTF16BE );
147     }
148 
149     public void testUTF16BEEncoding()
150     throws IOException
151     {
152         checkXmlStreamReader( TEXT_UNICODE, "UTF-16BE" );
153     }
154 
155     public void testUTF16LEEncoding()
156     throws IOException
157     {
158         checkXmlStreamReader( TEXT_UNICODE, "UTF-16LE" );
159     }
160 
161     public void testLatin1Encoding()
162     throws IOException
163     {
164         checkXmlStreamReader( TEXT_LATIN1, "ISO-8859-1" );
165     }
166 
167     public void testLatin7Encoding()
168     throws IOException
169     {
170         checkXmlStreamReader( TEXT_LATIN7, "ISO-8859-7" );
171     }
172 
173     public void testLatin15Encoding()
174     throws IOException
175     {
176         checkXmlStreamReader( TEXT_LATIN15, "ISO-8859-15" );
177     }
178 
179     public void testEUC_JPEncoding()
180     throws IOException
181     {
182         checkXmlStreamReader( TEXT_EUC_JP, "EUC-JP" );
183     }
184 
185     public void testEBCDICEncoding()
186     throws IOException
187     {
188         checkXmlStreamReader( "simple text in EBCDIC", "CP1047" );
189     }
190 
191     public void testInappropriateEncoding()
192     throws IOException
193     {
194         try
195         {
196             checkXmlStreamReader( TEXT_UNICODE, "ISO-8859-2" );
197             fail( "Check should have failed, since some characters are not available in the specified encoding" );
198         }
199         catch ( ComparisonFailure cf )
200         {
201             // expected failure, since the encoding does not contain some characters
202         }
203     }
204 
205     public void testEncodingAttribute()
206     throws IOException
207     {
208         String xml = "<?xml version='1.0' encoding='US-ASCII'?><element encoding='attribute value'/>";
209         checkXmlContent( xml, "US-ASCII" );
210 
211         xml = "<?xml version='1.0' encoding  =  'US-ASCII'  ?><element encoding='attribute value'/>";
212         checkXmlContent( xml, "US-ASCII" );
213 
214         xml = "<?xml version='1.0'?><element encoding='attribute value'/>";
215         checkXmlContent( xml, "UTF-8" );
216 
217         xml = "<?xml\nversion='1.0'\nencoding\n=\n'US-ASCII'\n?>\n<element encoding='attribute value'/>";
218         checkXmlContent( xml, "US-ASCII" );
219 
220         xml = "<?xml\nversion='1.0'\n?>\n<element encoding='attribute value'/>";
221         checkXmlContent( xml, "UTF-8" );
222 
223         xml = "<element encoding='attribute value'/>";
224         checkXmlContent( xml, "UTF-8" );
225     }
226 }