1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.io.input;
18
19 import static org.junit.jupiter.api.Assertions.assertEquals;
20 import static org.junit.jupiter.api.Assertions.assertThrows;
21 import static org.junit.jupiter.api.Assertions.assertTrue;
22 import static org.junit.jupiter.api.Assertions.fail;
23
24 import java.io.ByteArrayInputStream;
25 import java.io.ByteArrayOutputStream;
26 import java.io.File;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.OutputStreamWriter;
30 import java.io.Writer;
31 import java.net.URL;
32 import java.net.URLConnection;
33 import java.nio.charset.Charset;
34 import java.nio.charset.StandardCharsets;
35 import java.nio.file.Files;
36 import java.nio.file.Path;
37 import java.nio.file.Paths;
38 import java.nio.file.StandardOpenOption;
39 import java.text.MessageFormat;
40 import java.util.HashMap;
41 import java.util.Locale;
42 import java.util.Map;
43
44 import org.apache.commons.io.CharsetsTest;
45 import org.apache.commons.io.IOUtils;
46 import org.apache.commons.io.function.IOFunction;
47 import org.junit.jupiter.api.Test;
48 import org.junit.jupiter.params.ParameterizedTest;
49 import org.junit.jupiter.params.provider.MethodSource;
50 import org.junitpioneer.jupiter.DefaultLocale;
51
52
53
54
55 public class XmlStreamReaderTest {
56
57 private static final String ISO_8859_1 = StandardCharsets.ISO_8859_1.name();
58 private static final String US_ASCII = StandardCharsets.US_ASCII.name();
59 private static final String UTF_16 = StandardCharsets.UTF_16.name();
60 private static final String UTF_16LE = StandardCharsets.UTF_16LE.name();
61 private static final String UTF_16BE = StandardCharsets.UTF_16BE.name();
62 private static final String UTF_32 = "UTF-32";
63 private static final String UTF_32LE = "UTF-32LE";
64 private static final String UTF_32BE = "UTF-32BE";
65 private static final String UTF_8 = StandardCharsets.UTF_8.name();
66
67 private static final String XML7 = "xml-prolog-encoding-no-version";
68 private static final String XML6 = "xml-prolog-encoding-new-line";
69 private static final String XML5 = "xml-prolog-encoding-spaced-single-quotes";
70 private static final String XML4 = "xml-prolog-encoding-single-quotes";
71 private static final String XML3 = "xml-prolog-encoding-double-quotes";
72 private static final String XML2 = "xml-prolog";
73 private static final String XML1 = "xml";
74
75 private static final String ENCODING_ATTRIBUTE_XML = "<?xml version=\"1.0\" ?> \n"
76 + "<atom:feed xmlns:atom=\"http://www.w3.org/2005/Atom\">\n"
77 + "\n"
78 + " <atom:entry>\n"
79 + " <atom:title encoding='base64'><![CDATA\n"
80 + "aW5nTGluZSIgLz4";
81
82 private static final int[] NO_BOM_BYTES = {};
83
84 private static final int[] UTF_16BE_BOM_BYTES = {0xFE, 0xFF};
85
86 private static final int[] UTF_16LE_BOM_BYTES = {0xFF, 0XFE};
87
88 private static final int[] UTF_32BE_BOM_BYTES = {0x00, 0x00, 0xFE, 0xFF};
89
90 private static final int[] UTF_32LE_BOM_BYTES = {0xFF, 0XFE, 0x00, 0x00};
91
92 private static final int[] UTF_8_BOM_BYTES = {0xEF, 0xBB, 0xBF};
93
94 private static final Map<String, int[]> BOMs = new HashMap<>();
95
96 static {
97 BOMs.put("no-bom", NO_BOM_BYTES);
98 BOMs.put("UTF-16BE-bom", UTF_16BE_BOM_BYTES);
99 BOMs.put("UTF-16LE-bom", UTF_16LE_BOM_BYTES);
100 BOMs.put("UTF-32BE-bom", UTF_32BE_BOM_BYTES);
101 BOMs.put("UTF-32LE-bom", UTF_32LE_BOM_BYTES);
102 BOMs.put("UTF-16-bom", NO_BOM_BYTES);
103 BOMs.put("UTF-8-bom", UTF_8_BOM_BYTES);
104 }
105
106 private static final MessageFormat XML = new MessageFormat(
107 "<root>{2}</root>");
108
109 private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
110 "<?xml version=\"1.0\"?>\n<root>{2}</root>");
111
112 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_NEW_LINES = new MessageFormat(
113 "<?xml\nversion\n=\n\"1.0\"\nencoding\n=\n\"{1}\"\n?>\n<root>{2}</root>");
114
115 private static final MessageFormat XML_EXTERNAL_PARSED_ENTITY_NO_VERSION = new MessageFormat(
116 "<?xml\nencoding\n=\n\"{1}\"\n?>\n<root>{2}</root>");
117
118 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat(
119 "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
120
121 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES = new MessageFormat(
122 "<?xml version=\"1.0\" encoding=''{1}''?>\n<root>{2}</root>");
123
124 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES = new MessageFormat(
125 "<?xml version=\"1.0\" encoding = \t \n \r''{1}''?>\n<root>{2}</root>");
126
127 private static final MessageFormat INFO = new MessageFormat(
128 "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
129
130 private static final Map<String, MessageFormat> XMLs = new HashMap<>();
131
132 static {
133 XMLs.put(XML1, XML);
134 XMLs.put(XML2, XML_WITH_PROLOG);
135 XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES);
136 XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES);
137 XMLs.put(XML5, XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES);
138 XMLs.put(XML6, XML_WITH_PROLOG_AND_ENCODING_NEW_LINES);
139 XMLs.put(XML7, XML_EXTERNAL_PARSED_ENTITY_NO_VERSION);
140 }
141
142
143
144
145 private String getXML(final String bomType, final String xmlType,
146 final String streamEnc, final String prologEnc) {
147 final MessageFormat xml = XMLs.get(xmlType);
148 final String info = INFO.format(new Object[]{bomType, xmlType, prologEnc});
149 return xml.format(new Object[]{streamEnc, prologEnc, info});
150 }
151
152
153
154
155
156
157
158
159
160 protected InputStream getXmlInputStream(final String bomType, final String xmlType,
161 final String streamEnc, final String prologEnc) throws IOException {
162 final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
163 int[] bom = BOMs.get(bomType);
164 if (bom == null) {
165 bom = new int[0];
166 }
167 for (final int element : bom) {
168 baos.write(element);
169 }
170 try (Writer writer = new OutputStreamWriter(baos, streamEnc)) {
171 final String xmlDoc = getXML(bomType, xmlType, streamEnc, prologEnc);
172 writer.write(xmlDoc);
173
174
175 writer.write("<da>\n");
176 for (int i = 0; i < 10000; i++) {
177 writer.write("<do/>\n");
178 }
179 writer.write("</da>\n");
180
181 }
182 return new ByteArrayInputStream(baos.toByteArray());
183 }
184
185 private void parseCharset(final String hdr, final String enc, final IOFunction<InputStream, XmlStreamReader> factory) throws Exception {
186 try (final InputStream stream = new ByteArrayInputStream(hdr.getBytes(StandardCharsets.UTF_8))) {
187 try (final XmlStreamReader xml = factory.apply(stream)) {
188 assertEquals(enc.toUpperCase(Locale.ROOT), xml.getEncoding(), enc);
189 }
190 }
191 }
192
193 public void testAlternateDefaultEncoding(final String contentType, final String bomEnc, final String streamEnc, final String prologEnc,
194 final String alternateEnc) throws Exception {
195 try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML1 : XML3, streamEnc, prologEnc);
196 XmlStreamReader xmlReader = new XmlStreamReader(is, contentType, false, alternateEnc)) {
197 testAlternateDefaultEncoding(streamEnc, alternateEnc, xmlReader);
198 }
199 try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML1 : XML3, streamEnc, prologEnc);
200
201 XmlStreamReader xmlReader = XmlStreamReader.builder()
202 .setInputStream(is)
203 .setHttpContentType(contentType)
204 .setLenient(false)
205 .setCharset(alternateEnc)
206 .get()) {
207
208 testAlternateDefaultEncoding(streamEnc, alternateEnc, xmlReader);
209 }
210 }
211
212 private void testAlternateDefaultEncoding(final String streamEnc, final String alternateEnc, final XmlStreamReader xmlReader) {
213 assertEquals(xmlReader.getDefaultEncoding(), alternateEnc);
214 if (!streamEnc.equals(UTF_16)) {
215
216
217
218 final String enc = alternateEnc != null ? alternateEnc : streamEnc;
219 assertEquals(xmlReader.getEncoding(), enc);
220 } else {
221
222 assertEquals(xmlReader.getEncoding().substring(0, streamEnc.length()), streamEnc);
223 }
224 }
225
226 @Test
227 protected void testConstructorFileInput() throws IOException {
228 try (XmlStreamReader reader = new XmlStreamReader(new File("pom.xml"))) {
229
230 }
231 try (XmlStreamReader reader = XmlStreamReader.builder().setFile("pom.xml").get()) {
232
233 }
234 }
235
236 @Test
237 protected void testConstructorFileInputNull() {
238 assertThrows(NullPointerException.class, () -> new XmlStreamReader((File) null));
239 }
240
241 @Test
242 protected void testConstructorFileInputOpenOptions() throws IOException {
243 try (XmlStreamReader reader = new XmlStreamReader(new File("pom.xml"))) {
244
245 }
246 try (XmlStreamReader reader = XmlStreamReader.builder().setFile("pom.xml").setOpenOptions(StandardOpenOption.READ).get()) {
247
248 }
249 }
250
251 @Test
252 protected void testConstructorInputStreamInput() throws IOException {
253 final Path path = Paths.get("pom.xml");
254 try (XmlStreamReader reader = new XmlStreamReader(Files.newInputStream(path))) {
255
256 }
257 try (@SuppressWarnings("resource")
258 XmlStreamReader reader = XmlStreamReader.builder().setInputStream(Files.newInputStream(path)).get()) {
259
260 }
261 }
262
263 @Test
264 protected void testConstructorInputStreamInputNull() {
265 assertThrows(NullPointerException.class, () -> new XmlStreamReader((InputStream) null));
266 }
267
268 @Test
269 protected void testConstructorPathInput() throws IOException {
270 try (XmlStreamReader reader = new XmlStreamReader(Paths.get("pom.xml"))) {
271
272 }
273 try (XmlStreamReader reader = XmlStreamReader.builder().setPath("pom.xml").get()) {
274
275 }
276 }
277
278 @Test
279 protected void testConstructorPathInputNull() {
280 assertThrows(NullPointerException.class, () -> new XmlStreamReader((Path) null));
281 }
282
283 @Test
284 protected void testConstructorURLConnectionInput() throws IOException {
285 try (XmlStreamReader reader = new XmlStreamReader(new URL("https://www.apache.org/").openConnection(), UTF_8)) {
286
287 }
288 }
289
290 @Test
291 protected void testConstructorURLConnectionInputNull() {
292 assertThrows(NullPointerException.class, () -> new XmlStreamReader((URLConnection) null, US_ASCII));
293 }
294
295 @Test
296 protected void testConstructorURLInput() throws IOException {
297 try (XmlStreamReader reader = new XmlStreamReader(new URL("https://www.apache.org/"))) {
298
299 }
300 }
301
302 @Test
303 protected void testConstructorURLInputNull() {
304 assertThrows(NullPointerException.class, () -> new XmlStreamReader((URL) null));
305 }
306
307
308
309 @Test
310 public void testEncodingAttributeXML() throws Exception {
311 try (InputStream is = new ByteArrayInputStream(ENCODING_ATTRIBUTE_XML.getBytes(StandardCharsets.UTF_8));
312 XmlStreamReader xmlReader = new XmlStreamReader(is, "", true)) {
313 assertEquals(xmlReader.getEncoding(), UTF_8);
314 }
315 try (InputStream is = new ByteArrayInputStream(ENCODING_ATTRIBUTE_XML.getBytes(StandardCharsets.UTF_8));
316
317 XmlStreamReader xmlReader = XmlStreamReader.builder()
318 .setInputStream(is)
319 .setHttpContentType("")
320 .setLenient(true)
321 .get()) {
322
323 assertEquals(xmlReader.getEncoding(), UTF_8);
324 }
325 }
326
327 @Test
328 public void testHttp() throws Exception {
329
330
331
332
333 testHttpValid("application/xml", "UTF-8-bom", UTF_8, null);
334 testHttpValid("application/xml", "UTF-8-bom", UTF_8, UTF_8);
335 testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", UTF_8, null);
336 testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom", UTF_8, null);
337 testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", UTF_8, null);
338 testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", UTF_8, UTF_8);
339 testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, null);
340 testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16);
341 testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16BE);
342
343 testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, null);
344 testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16);
345 testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16BE);
346
347 testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, null);
348 testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32);
349 testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32BE);
350
351 testHttpInvalid("application/xml", "UTF-8-bom", US_ASCII, US_ASCII);
352 testHttpInvalid("application/xml;charset=UTF-16", UTF_16LE, UTF_8, UTF_8);
353 testHttpInvalid("application/xml;charset=UTF-16", "no-bom", UTF_16BE, UTF_16BE);
354 testHttpInvalid("application/xml;charset=UTF-32", UTF_32LE, UTF_8, UTF_8);
355 testHttpInvalid("application/xml;charset=UTF-32", "no-bom", UTF_32BE, UTF_32BE);
356
357 testHttpValid("text/xml", "no-bom", US_ASCII, null);
358 testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", UTF_8, UTF_8);
359 testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", UTF_8, null);
360 testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, null);
361 testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16);
362 testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16BE);
363 testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, null);
364 testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, UTF_32);
365 testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, UTF_32BE);
366 testHttpValid("text/xml", "UTF-8-bom", US_ASCII, null);
367
368 testAlternateDefaultEncoding("application/xml", "UTF-8-bom", UTF_8, null, null);
369 testAlternateDefaultEncoding("application/xml", "no-bom", US_ASCII, null, US_ASCII);
370 testAlternateDefaultEncoding("application/xml", "UTF-8-bom", UTF_8, null, UTF_8);
371 testAlternateDefaultEncoding("text/xml", "no-bom", US_ASCII, null, null);
372 testAlternateDefaultEncoding("text/xml", "no-bom", US_ASCII, null, US_ASCII);
373 testAlternateDefaultEncoding("text/xml", "no-bom", US_ASCII, null, UTF_8);
374
375 testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, null);
376 testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16);
377 testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16BE);
378 testHttpInvalid("text/xml;charset=UTF-16", "no-bom", UTF_16BE, UTF_16BE);
379 testHttpInvalid("text/xml;charset=UTF-16", "no-bom", UTF_16BE, null);
380
381 testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, null);
382 testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32);
383 testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32BE);
384 testHttpInvalid("text/xml;charset=UTF-32", "no-bom", UTF_32BE, UTF_32BE);
385 testHttpInvalid("text/xml;charset=UTF-32", "no-bom", UTF_32BE, null);
386
387 testHttpLenient("text/xml", "no-bom", US_ASCII, null, US_ASCII);
388 testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", UTF_8, UTF_8, UTF_8);
389 testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", UTF_8, null, UTF_8);
390 testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, null, UTF_16BE);
391 testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16, UTF_16);
392 testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16BE, UTF_16BE);
393 testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, null, UTF_32BE);
394 testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, UTF_32, UTF_32);
395 testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, UTF_32BE, UTF_32BE);
396 testHttpLenient("text/xml", "UTF-8-bom", US_ASCII, null, US_ASCII);
397
398 testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, null, UTF_16BE);
399 testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16, UTF_16);
400 testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16BE, UTF_16BE);
401 testHttpLenient("text/xml;charset=UTF-16", "no-bom", UTF_16BE, UTF_16BE, UTF_16BE);
402 testHttpLenient("text/xml;charset=UTF-16", "no-bom", UTF_16BE, null, UTF_16);
403
404 testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, null, UTF_32BE);
405 testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32, UTF_32);
406 testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32BE, UTF_32BE);
407 testHttpLenient("text/xml;charset=UTF-32", "no-bom", UTF_32BE, UTF_32BE, UTF_32BE);
408 testHttpLenient("text/xml;charset=UTF-32", "no-bom", UTF_32BE, null, UTF_32);
409
410 testHttpLenient("text/html", "no-bom", US_ASCII, US_ASCII, US_ASCII);
411 testHttpLenient("text/html", "no-bom", US_ASCII, null, US_ASCII);
412 testHttpLenient("text/html;charset=UTF-8", "no-bom", US_ASCII, UTF_8, UTF_8);
413 testHttpLenient("text/html;charset=UTF-16BE", "no-bom", US_ASCII, UTF_8, UTF_8);
414 testHttpLenient("text/html;charset=UTF-32BE", "no-bom", US_ASCII, UTF_8, UTF_8);
415 }
416
417 @Test
418 public void testHttpContent() throws Exception {
419 final String encoding = UTF_8;
420 final String xml = getXML("no-bom", XML3, encoding, encoding);
421 try (XmlStreamReader xmlReader = new XmlStreamReader(CharSequenceInputStream.builder().setCharSequence(xml).setCharset(encoding).get())) {
422 assertEquals(xmlReader.getEncoding(), encoding, "Check encoding");
423 assertEquals(xml, IOUtils.toString(xmlReader), "Check content");
424 }
425 }
426
427 protected void testHttpInvalid(final String cT, final String bomEnc, final String streamEnc,
428 final String prologEnc) throws Exception {
429 try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML2 : XML3, streamEnc, prologEnc)) {
430 try {
431 new XmlStreamReader(is, cT, false).close();
432 fail("It should have failed for HTTP Content-type " + cT + ", BOM " + bomEnc + ", streamEnc " + streamEnc + " and prologEnc " + prologEnc);
433 } catch (final IOException ex) {
434 assertTrue(ex.getMessage().contains("Illegal encoding,"));
435 }
436 }
437 }
438
439 protected void testHttpLenient(final String cT, final String bomEnc, final String streamEnc,
440 final String prologEnc, final String shouldBe) throws Exception {
441 try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML2 : XML3, streamEnc, prologEnc);
442 XmlStreamReader xmlReader = new XmlStreamReader(is, cT, true)) {
443 assertEquals(xmlReader.getEncoding(), shouldBe);
444 }
445 }
446
447 public void testHttpValid(final String cT, final String bomEnc, final String streamEnc,
448 final String prologEnc) throws Exception {
449 try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML1 : XML3, streamEnc, prologEnc);
450 XmlStreamReader xmlReader = new XmlStreamReader(is, cT, false)) {
451 if (!streamEnc.equals(UTF_16)) {
452
453
454
455 assertEquals(xmlReader.getEncoding(), streamEnc);
456 } else {
457 assertEquals(xmlReader.getEncoding().substring(0, streamEnc.length()), streamEnc);
458 }
459 }
460 }
461
462 @ParameterizedTest(name = "{0}")
463 @MethodSource(CharsetsTest.AVAIL_CHARSETS)
464 public void testIO_815(final String csName) throws Exception {
465 final MessageFormat fmt = new MessageFormat("<?xml version=\"1.0\" encoding=''{0}''?>\n<root>text</root>");
466 final IOFunction<InputStream, XmlStreamReader> factoryCtor = XmlStreamReader::new;
467 final IOFunction<InputStream, XmlStreamReader> factoryBuilder = stream -> XmlStreamReader.builder().setInputStream(stream).get();
468 parseCharset(fmt.format(new Object[] { csName }), csName, factoryCtor);
469 parseCharset(fmt.format(new Object[] { csName }), csName, factoryBuilder);
470 for (final String alias : Charset.forName(csName).aliases()) {
471 parseCharset(fmt.format(new Object[] { alias }), alias, factoryCtor);
472 parseCharset(fmt.format(new Object[] { alias }), alias, factoryBuilder);
473 }
474 }
475
476
477 @Test
478 @DefaultLocale(language = "tr")
479 public void testLowerCaseEncodingWithTurkishLocale_IO_557() throws Exception {
480 final String[] encodings = { "iso8859-1", "us-ascii", "utf-8" };
481 for (final String encoding : encodings) {
482 final String xml = getXML("no-bom", XML3, encoding, encoding);
483 try (ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding));
484 XmlStreamReader xmlReader = new XmlStreamReader(is)) {
485 assertTrue(encoding.equalsIgnoreCase(xmlReader.getEncoding()), "Check encoding : " + encoding);
486 assertEquals(xml, IOUtils.toString(xmlReader), "Check content");
487 }
488 }
489 }
490
491 @SuppressWarnings("resource")
492 protected void testRawBomInvalid(final String bomEnc, final String streamEnc,
493 final String prologEnc) throws Exception {
494 final InputStream is = getXmlInputStream(bomEnc, XML3, streamEnc, prologEnc);
495 XmlStreamReader xmlReader = null;
496 try {
497 xmlReader = XmlStreamReader.builder().setInputStream(is).setLenient(false).get();
498 final String foundEnc = xmlReader.getEncoding();
499 fail("Expected IOException for BOM " + bomEnc + ", streamEnc " + streamEnc + " and prologEnc " + prologEnc
500 + ": found " + foundEnc);
501 } catch (final IOException ex) {
502 assertTrue(ex.getMessage().contains("Illegal encoding,"));
503 }
504 if (xmlReader != null) {
505 xmlReader.close();
506 }
507 }
508
509 @Test
510 public void testRawBomUtf16() throws Exception {
511 testRawBomValid(UTF_16BE);
512 testRawBomValid(UTF_16LE);
513 testRawBomValid(UTF_16);
514
515 testRawBomInvalid("UTF-16BE-bom", UTF_16BE, UTF_16LE);
516 testRawBomInvalid("UTF-16LE-bom", UTF_16LE, UTF_16BE);
517 testRawBomInvalid("UTF-16LE-bom", UTF_16LE, UTF_8);
518 }
519
520 @Test
521 public void testRawBomUtf32() throws Exception {
522 testRawBomValid(UTF_32BE);
523 testRawBomValid(UTF_32LE);
524 testRawBomValid(UTF_32);
525
526 testRawBomInvalid("UTF-32BE-bom", UTF_32BE, UTF_32LE);
527 testRawBomInvalid("UTF-32LE-bom", UTF_32LE, UTF_32BE);
528 testRawBomInvalid("UTF-32LE-bom", UTF_32LE, UTF_8);
529 }
530
531 @Test
532 public void testRawBomUtf8() throws Exception {
533 testRawBomValid(UTF_8);
534 testRawBomInvalid("UTF-8-bom", US_ASCII, US_ASCII);
535 testRawBomInvalid("UTF-8-bom", ISO_8859_1, ISO_8859_1);
536 testRawBomInvalid("UTF-8-bom", UTF_8, UTF_16);
537 testRawBomInvalid("UTF-8-bom", UTF_8, UTF_16BE);
538 testRawBomInvalid("UTF-8-bom", UTF_8, UTF_16LE);
539 testRawBomInvalid("UTF-16BE-bom", UTF_16BE, UTF_16LE);
540 testRawBomInvalid("UTF-16LE-bom", UTF_16LE, UTF_16BE);
541 testRawBomInvalid("UTF-16LE-bom", UTF_16LE, UTF_8);
542 testRawBomInvalid("UTF-32BE-bom", UTF_32BE, UTF_32LE);
543 testRawBomInvalid("UTF-32LE-bom", UTF_32LE, UTF_32BE);
544 testRawBomInvalid("UTF-32LE-bom", UTF_32LE, UTF_8);
545 }
546
547 protected void testRawBomValid(final String encoding) throws Exception {
548 try (InputStream is = getXmlInputStream(encoding + "-bom", XML3, encoding, encoding);
549 XmlStreamReader xmlReader = new XmlStreamReader(is, false)) {
550 if (!encoding.equals(UTF_16) && !encoding.equals(UTF_32)) {
551 assertEquals(xmlReader.getEncoding(), encoding);
552 } else {
553 assertEquals(xmlReader.getEncoding().substring(0, encoding.length()), encoding);
554 }
555 }
556 }
557
558 @Test
559 public void testRawContent() throws Exception {
560 final String encoding = UTF_8;
561 final String xml = getXML("no-bom", XML3, encoding, encoding);
562 try (XmlStreamReader xmlReader = new XmlStreamReader(CharSequenceInputStream.builder().setCharSequence(xml).setCharset(encoding).get())) {
563 assertEquals(xmlReader.getEncoding(), encoding, "Check encoding");
564 assertEquals(xml, IOUtils.toString(xmlReader), "Check content");
565 }
566 }
567
568 @Test
569 public void testRawNoBomCp1047() throws Exception {
570 testRawNoBomValid("CP1047");
571 }
572
573 protected void testRawNoBomInvalid(final String encoding) throws Exception {
574 try (final InputStream is = getXmlInputStream("no-bom", XML3, encoding, encoding)) {
575 final XmlStreamReader xmlStreamReader = new XmlStreamReader(is, false);
576 final IOException ex = assertThrows(IOException.class, xmlStreamReader::close);
577 assertTrue(ex.getMessage().contains("Invalid encoding,"));
578 }
579 }
580
581 @Test
582 public void testRawNoBomIso8859_1() throws Exception {
583 testRawNoBomValid(ISO_8859_1);
584 }
585
586 @Test
587 public void testRawNoBomUsAscii() throws Exception {
588 testRawNoBomValid(US_ASCII);
589 }
590
591 @Test
592 public void testRawNoBomUtf16BE() throws Exception {
593 testRawNoBomValid(UTF_16BE);
594 }
595
596 @Test
597 public void testRawNoBomUtf16LE() throws Exception {
598 testRawNoBomValid(UTF_16LE);
599 }
600
601 @Test
602 public void testRawNoBomUtf32BE() throws Exception {
603 testRawNoBomValid(UTF_32BE);
604 }
605
606 @Test
607 public void testRawNoBomUtf32LE() throws Exception {
608 testRawNoBomValid(UTF_32LE);
609 }
610
611 @Test
612 public void testRawNoBomUtf8() throws Exception {
613 testRawNoBomValid(UTF_8);
614 }
615
616 protected void testRawNoBomValid(final String encoding) throws Exception {
617 InputStream is = getXmlInputStream("no-bom", XML1, encoding, encoding);
618 XmlStreamReader xmlReader = new XmlStreamReader(is, false);
619 assertEquals(xmlReader.getEncoding(), UTF_8);
620 xmlReader.close();
621
622 is = getXmlInputStream("no-bom", XML2, encoding, encoding);
623 xmlReader = new XmlStreamReader(is);
624 assertEquals(xmlReader.getEncoding(), UTF_8);
625 xmlReader.close();
626
627 is = getXmlInputStream("no-bom", XML3, encoding, encoding);
628 xmlReader = new XmlStreamReader(is);
629 assertEquals(xmlReader.getEncoding(), encoding);
630 xmlReader.close();
631
632 is = getXmlInputStream("no-bom", XML4, encoding, encoding);
633 xmlReader = new XmlStreamReader(is);
634 assertEquals(xmlReader.getEncoding(), encoding);
635 xmlReader.close();
636
637 is = getXmlInputStream("no-bom", XML5, encoding, encoding);
638 xmlReader = new XmlStreamReader(is);
639 assertEquals(xmlReader.getEncoding(), encoding);
640 xmlReader.close();
641
642 is = getXmlInputStream("no-bom", XML6, encoding, encoding);
643 xmlReader = new XmlStreamReader(is);
644 assertEquals(xmlReader.getEncoding(), encoding);
645 xmlReader.close();
646
647 is = getXmlInputStream("no-bom", XML7, encoding, encoding);
648 xmlReader = new XmlStreamReader(is);
649 assertEquals(xmlReader.getEncoding(), encoding);
650 xmlReader.close();
651 }
652 }