1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.io.input;
18
19 import static org.junit.jupiter.api.Assertions.assertEquals;
20 import static org.junit.jupiter.api.Assertions.assertFalse;
21 import static org.junit.jupiter.api.Assertions.assertNotNull;
22 import static org.junit.jupiter.api.Assertions.assertNull;
23 import static org.junit.jupiter.api.Assertions.assertThrows;
24 import static org.junit.jupiter.api.Assertions.assertTrue;
25 import static org.junit.jupiter.api.Assumptions.assumeFalse;
26 import static org.junit.jupiter.api.Assumptions.assumeTrue;
27
28 import java.io.ByteArrayInputStream;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.Reader;
32 import java.nio.charset.Charset;
33 import java.nio.charset.StandardCharsets;
34
35 import javax.xml.parsers.DocumentBuilder;
36 import javax.xml.parsers.DocumentBuilderFactory;
37 import javax.xml.parsers.ParserConfigurationException;
38
39 import org.apache.commons.io.ByteOrderMark;
40 import org.junit.jupiter.api.Test;
41 import org.w3c.dom.Document;
42 import org.xml.sax.InputSource;
43 import org.xml.sax.SAXException;
44 import org.xml.sax.SAXParseException;
45
46
47
48
49 @SuppressWarnings("ResultOfMethodCallIgnored")
50 public class BOMInputStreamTest {
51
52
53
54
55 private static final class ExpectCloseInputStream extends InputStream {
56 private boolean _closeCalled;
57
58 public void assertCloseCalled() {
59 assertTrue(_closeCalled);
60 }
61
62 @Override
63 public void close() throws IOException {
64 _closeCalled = true;
65 }
66
67 @Override
68 public int read() throws IOException {
69 return -1;
70 }
71 }
72
73 private void assertData(final byte[] expected, final byte[] actual, final int len) {
74 assertEquals(expected.length, len, "length");
75 for (int ii = 0; ii < expected.length; ii++) {
76 assertEquals(expected[ii], actual[ii], "byte " + ii);
77 }
78 }
79
80
81
82
83 private InputStream createUtf16BeDataStream(final byte[] baseData, final boolean addBOM) {
84 byte[] data = baseData;
85 if (addBOM) {
86 data = new byte[baseData.length + 2];
87 data[0] = (byte) 0xFE;
88 data[1] = (byte) 0xFF;
89 System.arraycopy(baseData, 0, data, 2, baseData.length);
90 }
91 return new ByteArrayInputStream(data);
92 }
93
94
95
96
97 private InputStream createUtf16LeDataStream(final byte[] baseData, final boolean addBOM) {
98 byte[] data = baseData;
99 if (addBOM) {
100 data = new byte[baseData.length + 2];
101 data[0] = (byte) 0xFF;
102 data[1] = (byte) 0xFE;
103 System.arraycopy(baseData, 0, data, 2, baseData.length);
104 }
105 return new ByteArrayInputStream(data);
106 }
107
108
109
110
111 private InputStream createUtf32BeDataStream(final byte[] baseData, final boolean addBOM) {
112 byte[] data = baseData;
113 if (addBOM) {
114 data = new byte[baseData.length + 4];
115 data[0] = 0;
116 data[1] = 0;
117 data[2] = (byte) 0xFE;
118 data[3] = (byte) 0xFF;
119 System.arraycopy(baseData, 0, data, 4, baseData.length);
120 }
121 return new ByteArrayInputStream(data);
122 }
123
124
125
126
127 private InputStream createUtf32LeDataStream(final byte[] baseData, final boolean addBOM) {
128 byte[] data = baseData;
129 if (addBOM) {
130 data = new byte[baseData.length + 4];
131 data[0] = (byte) 0xFF;
132 data[1] = (byte) 0xFE;
133 data[2] = 0;
134 data[3] = 0;
135 System.arraycopy(baseData, 0, data, 4, baseData.length);
136 }
137 return new ByteArrayInputStream(data);
138 }
139
140
141
142
143 private InputStream createUtf8Input(final byte[] baseData, final boolean addBOM) {
144 byte[] data = baseData;
145 if (addBOM) {
146 data = new byte[baseData.length + 3];
147 data[0] = (byte) 0xEF;
148 data[1] = (byte) 0xBB;
149 data[2] = (byte) 0xBF;
150 System.arraycopy(baseData, 0, data, 3, baseData.length);
151 }
152 return new ByteArrayInputStream(data);
153 }
154
155 private boolean doesSaxSupportCharacterSet(final String charsetName) throws ParserConfigurationException, SAXException, IOException {
156 final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
157 try (InputStream byteStream = CharSequenceInputStream.builder().setCharSequence("<?xml version=\"1.0\" encoding=\"" + charsetName + "\"?><Z/>")
158 .setCharset(charsetName).get()) {
159 final InputSource is = new InputSource(byteStream);
160 is.setEncoding(charsetName);
161 documentBuilder.parse(is);
162 } catch (final SAXParseException e) {
163 if (e.getMessage().contains(charsetName)) {
164 return false;
165 }
166 }
167 return true;
168 }
169
170 private boolean jvmAndSaxBothSupportCharset(final String charSetName) throws ParserConfigurationException, SAXException, IOException {
171 return Charset.isSupported(charSetName) && doesSaxSupportCharacterSet(charSetName);
172 }
173
174 private void parseXml(final InputStream in) throws SAXException, IOException, ParserConfigurationException {
175 final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
176 assertNotNull(doc);
177 assertEquals("X", doc.getFirstChild().getNodeName());
178 }
179
180 private void parseXml(final Reader in) throws SAXException, IOException, ParserConfigurationException {
181 final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
182 assertNotNull(doc);
183 assertEquals("X", doc.getFirstChild().getNodeName());
184 }
185
186 private void readBOMInputStreamTwice(final String resource) throws Exception {
187 try (InputStream inputStream = this.getClass().getResourceAsStream(resource)) {
188 assertNotNull(inputStream);
189 try (BOMInputStream bomInputStream = BOMInputStream.builder().setInputStream(inputStream).get()) {
190 bomInputStream.mark(1_000_000);
191
192 this.readFile(bomInputStream);
193 bomInputStream.reset();
194 this.readFile(bomInputStream);
195 inputStream.close();
196 }
197 }
198 }
199
200 private void readFile(final BOMInputStream bomInputStream) throws Exception {
201 int bytes;
202 final byte[] bytesFromStream = new byte[100];
203 do {
204 bytes = bomInputStream.read(bytesFromStream);
205 } while (bytes > 0);
206 }
207
208 @Test
209 public void testAvailableWithBOM() throws Exception {
210 final byte[] data = { 'A', 'B', 'C', 'D' };
211 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
212 assertEquals(7, in.available());
213 }
214 }
215
216 @Test
217 public void testAvailableWithoutBOM() throws Exception {
218 final byte[] data = { 'A', 'B', 'C', 'D' };
219 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
220 assertEquals(4, in.available());
221 }
222 }
223
224 @Test
225 public void testBuilderGet() {
226
227 assertThrows(IllegalStateException.class, () -> BOMInputStream.builder().get());
228 }
229
230 @Test
231
232 public void testClose() throws Exception {
233 try (ExpectCloseInputStream del = new ExpectCloseInputStream()) {
234 try (InputStream in = new BOMInputStream(del)) {
235
236 }
237 del.assertCloseCalled();
238 }
239 }
240
241 @Test
242 public void testEmptyBufferWithBOM() throws Exception {
243 final byte[] data = {};
244 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
245 final byte[] buf = new byte[1024];
246 assertEquals(-1, in.read(buf));
247 }
248 }
249
250 @Test
251 public void testEmptyBufferWithoutBOM() throws Exception {
252 final byte[] data = {};
253 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
254 final byte[] buf = new byte[1024];
255 assertEquals(-1, in.read(buf));
256 }
257 }
258
259 @Test
260 public void testGetBOMFirstThenRead() throws Exception {
261 final byte[] data = { 'A', 'B', 'C' };
262 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
263 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
264 assertTrue(in.hasBOM(), "hasBOM()");
265 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
266 assertEquals('A', in.read());
267 assertEquals('B', in.read());
268 assertEquals('C', in.read());
269 assertEquals(-1, in.read());
270 }
271 }
272
273 @Test
274 public void testGetBOMFirstThenReadInclude() throws Exception {
275 final byte[] data = { 'A', 'B', 'C' };
276 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).setInclude(true).get()) {
277 assertTrue(in.hasBOM(), "hasBOM()");
278 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
279 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
280 assertEquals(0xEF, in.read());
281 assertEquals(0xBB, in.read());
282 assertEquals(0xBF, in.read());
283 assertEquals('A', in.read());
284 assertEquals('B', in.read());
285 assertEquals('C', in.read());
286 assertEquals(-1, in.read());
287 }
288 }
289
290 @Test
291 public void testLargeBufferWithBOM() throws Exception {
292 final byte[] data = { 'A', 'B', 'C' };
293 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
294 final byte[] buf = new byte[1024];
295 assertData(data, buf, in.read(buf));
296 }
297 }
298
299 @Test
300 public void testLargeBufferWithoutBOM() throws Exception {
301 final byte[] data = { 'A', 'B', 'C' };
302 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
303 final byte[] buf = new byte[1024];
304 assertData(data, buf, in.read(buf));
305 }
306 }
307
308 @Test
309 public void testLeadingNonBOMBufferedRead() throws Exception {
310 final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
311 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
312 final byte[] buf = new byte[1024];
313 assertData(data, buf, in.read(buf));
314 }
315 }
316
317 @Test
318 public void testLeadingNonBOMSingleRead() throws Exception {
319 final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
320 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
321 assertEquals(0xEF, in.read());
322 assertEquals(0xAB, in.read());
323 assertEquals(0xCD, in.read());
324 assertEquals(-1, in.read());
325 }
326 }
327
328 @Test
329 public void testMarkResetAfterReadWithBOM() throws Exception {
330 final byte[] data = { 'A', 'B', 'C', 'D' };
331 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
332 assertTrue(in.markSupported());
333
334 in.read();
335 in.mark(10);
336
337 in.read();
338 in.read();
339 in.reset();
340 assertEquals('B', in.read());
341 }
342 }
343
344 @Test
345 public void testMarkResetAfterReadWithoutBOM() throws Exception {
346 final byte[] data = { 'A', 'B', 'C', 'D' };
347 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
348 assertTrue(in.markSupported());
349
350 in.read();
351 in.mark(10);
352
353 in.read();
354 in.read();
355 in.reset();
356 assertEquals('B', in.read());
357 }
358 }
359
360 @Test
361 public void testMarkResetBeforeReadWithBOM() throws Exception {
362 final byte[] data = { 'A', 'B', 'C', 'D' };
363 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
364 assertTrue(in.markSupported());
365
366 in.mark(10);
367
368 in.read();
369 in.read();
370 in.reset();
371 assertEquals('A', in.read());
372 }
373 }
374
375 @Test
376 public void testMarkResetBeforeReadWithoutBOM() throws Exception {
377 final byte[] data = { 'A', 'B', 'C', 'D' };
378 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
379 assertTrue(in.markSupported());
380
381 in.mark(10);
382
383 in.read();
384 in.read();
385 in.reset();
386 assertEquals('A', in.read());
387 }
388 }
389
390 @Test
391 public void testNoBoms() throws Exception {
392 final byte[] data = { 'A', 'B', 'C' };
393 assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, (ByteOrderMark[]) null).close());
394 assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, new ByteOrderMark[0]).close());
395
396 try (final BOMInputStream bomInputStream = BOMInputStream.builder()
397 .setInputStream(createUtf8Input(data, true))
398 .setInclude(true)
399 .setByteOrderMarks((ByteOrderMark[]) null)
400 .get()) {
401 assertEquals(BOMInputStream.Builder.getDefaultByteOrderMark(), bomInputStream.getBOM());
402 }
403 assertThrows(IllegalArgumentException.class, () -> BOMInputStream.builder()
404 .setInputStream(createUtf8Input(data, true))
405 .setInclude(true)
406 .setByteOrderMarks()
407 .get()
408 .close());
409 }
410
411 @Test
412 public void testReadEmpty() throws Exception {
413 final byte[] data = {};
414 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
415 assertEquals(-1, in.read());
416 assertFalse(in.hasBOM(), "hasBOM()");
417 assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
418 assertNull(in.getBOM(), "getBOM");
419 }
420 }
421
422 @Test
423 public void testReadSmall() throws Exception {
424 final byte[] data = { 'A', 'B' };
425 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
426 assertEquals('A', in.read());
427 assertEquals('B', in.read());
428 assertEquals(-1, in.read());
429 assertFalse(in.hasBOM(), "hasBOM()");
430 assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
431 assertNull(in.getBOM(), "getBOM");
432 }
433 }
434
435 @Test
436 public void testReadTwiceWithBOM() throws Exception {
437 this.readBOMInputStreamTwice("/org/apache/commons/io/testfileBOM.xml");
438 }
439
440 @Test
441 public void testReadTwiceWithoutBOM() throws Exception {
442 this.readBOMInputStreamTwice("/org/apache/commons/io/testfileNoBOM.xml");
443 }
444
445 @Test
446 public void testReadWithBOMInclude() throws Exception {
447 final byte[] data = { 'A', 'B', 'C' };
448 try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), true)) {
449 assertEquals(0xEF, in.read());
450 assertEquals(0xBB, in.read());
451 assertEquals(0xBF, in.read());
452 assertEquals('A', in.read());
453 assertEquals('B', in.read());
454 assertEquals('C', in.read());
455 assertEquals(-1, in.read());
456 assertTrue(in.hasBOM(), "hasBOM()");
457 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
458 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
459 }
460 }
461
462 @Test
463 public void testReadWithBOMUtf16Be() throws Exception {
464 final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16BE);
465 try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
466 assertEquals(0, in.read());
467 assertEquals('A', in.read());
468 assertEquals(0, in.read());
469 assertEquals('B', in.read());
470 assertEquals(0, in.read());
471 assertEquals('C', in.read());
472 assertEquals(-1, in.read());
473 assertTrue(in.hasBOM(), "hasBOM()");
474 assertTrue(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
475 assertEquals(ByteOrderMark.UTF_16BE, in.getBOM(), "getBOM");
476 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16LE));
477 }
478 }
479
480 @Test
481 public void testReadWithBOMUtf16Le() throws Exception {
482 final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16LE);
483 try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
484 assertEquals('A', in.read());
485 assertEquals(0, in.read());
486 assertEquals('B', in.read());
487 assertEquals(0, in.read());
488 assertEquals('C', in.read());
489 assertEquals(0, in.read());
490 assertEquals(-1, in.read());
491 assertTrue(in.hasBOM(), "hasBOM()");
492 assertTrue(in.hasBOM(ByteOrderMark.UTF_16LE), "hasBOM(UTF-16LE)");
493 assertEquals(ByteOrderMark.UTF_16LE, in.getBOM(), "getBOM");
494 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
495 }
496 }
497
498 @Test
499 public void testReadWithBOMUtf32Be() throws Exception {
500 assumeTrue(Charset.isSupported("UTF_32BE"));
501 final byte[] data = "ABC".getBytes("UTF_32BE");
502 try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true),
503 ByteOrderMark.UTF_32BE)) {
504 assertEquals(0, in.read());
505 assertEquals(0, in.read());
506 assertEquals(0, in.read());
507 assertEquals('A', in.read());
508 assertEquals(0, in.read());
509 assertEquals(0, in.read());
510 assertEquals(0, in.read());
511 assertEquals('B', in.read());
512 assertEquals(0, in.read());
513 assertEquals(0, in.read());
514 assertEquals(0, in.read());
515 assertEquals('C', in.read());
516 assertEquals(-1, in.read());
517 assertTrue(in.hasBOM(), "hasBOM()");
518 assertTrue(in.hasBOM(ByteOrderMark.UTF_32BE), "hasBOM(UTF-32BE)");
519 assertEquals(ByteOrderMark.UTF_32BE, in.getBOM(), "getBOM");
520 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32LE));
521 }
522 }
523
524 @Test
525 public void testReadWithBOMUtf32Le() throws Exception {
526 assumeTrue(Charset.isSupported("UTF_32LE"));
527 final byte[] data = "ABC".getBytes("UTF_32LE");
528 try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true),
529 ByteOrderMark.UTF_32LE)) {
530 assertEquals('A', in.read());
531 assertEquals(0, in.read());
532 assertEquals(0, in.read());
533 assertEquals(0, in.read());
534 assertEquals('B', in.read());
535 assertEquals(0, in.read());
536 assertEquals(0, in.read());
537 assertEquals(0, in.read());
538 assertEquals('C', in.read());
539 assertEquals(0, in.read());
540 assertEquals(0, in.read());
541 assertEquals(0, in.read());
542 assertEquals(-1, in.read());
543 assertTrue(in.hasBOM(), "hasBOM()");
544 assertTrue(in.hasBOM(ByteOrderMark.UTF_32LE), "hasBOM(UTF-32LE)");
545 assertEquals(ByteOrderMark.UTF_32LE, in.getBOM(), "getBOM");
546 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32BE));
547 }
548 }
549
550 @Test
551 public void testReadWithBOMUtf8() throws Exception {
552 final byte[] data = "ABC".getBytes(StandardCharsets.UTF_8);
553 try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_8)) {
554 assertEquals('A', in.read());
555 assertEquals('B', in.read());
556 assertEquals('C', in.read());
557 assertEquals(-1, in.read());
558 assertTrue(in.hasBOM(), "hasBOM()");
559 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
560 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
561 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
562 }
563 }
564
565 @Test
566 public void testReadWithMultipleBOM() throws Exception {
567 final byte[] data = { 'A', 'B', 'C' };
568 try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_16BE,
569 ByteOrderMark.UTF_8)) {
570 assertEquals('A', in.read());
571 assertEquals('B', in.read());
572 assertEquals('C', in.read());
573 assertEquals(-1, in.read());
574 assertTrue(in.hasBOM(), "hasBOM()");
575 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
576 assertFalse(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
577 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
578 }
579 }
580
581 @Test
582 public void testReadWithoutBOM() throws Exception {
583 final byte[] data = { 'A', 'B', 'C' };
584 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
585 assertEquals('A', in.read());
586 assertEquals('B', in.read());
587 assertEquals('C', in.read());
588 assertEquals(-1, in.read());
589 assertFalse(in.hasBOM(), "hasBOM()");
590 assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
591 assertNull(in.getBOM(), "getBOM");
592 }
593 }
594
595 @Test
596 public void testReadXmlWithBOMUcs2() throws Exception {
597 assumeFalse(System.getProperty("java.vendor").contains("IBM"), "This test does not pass on some IBM VMs xml parsers");
598
599
600 assumeTrue(Charset.isSupported("ISO-10646-UCS-2"));
601 final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-2\"?><X/>".getBytes("ISO-10646-UCS-2");
602 try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
603 parseXml(in);
604 }
605 parseXml(createUtf16BeDataStream(data, true));
606 }
607
608 @Test
609 public void testReadXmlWithBOMUcs4() throws Exception {
610
611
612 assumeTrue(Charset.isSupported("ISO-10646-UCS-4"));
613 final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-4\"?><X/>".getBytes("ISO-10646-UCS-4");
614
615 try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
616 parseXml(in);
617
618 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
619 }
620 parseXml(createUtf32BeDataStream(data, true));
621 }
622
623 @Test
624 public void testReadXmlWithBOMUtf16Be() throws Exception {
625 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16BE\"?><X/>".getBytes(StandardCharsets.UTF_16BE);
626 try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
627 parseXml(in);
628 }
629 parseXml(createUtf16BeDataStream(data, true));
630 }
631
632 @Test
633 public void testReadXmlWithBOMUtf16Le() throws Exception {
634 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16LE\"?><X/>".getBytes(StandardCharsets.UTF_16LE);
635 try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
636 parseXml(in);
637 }
638 parseXml(createUtf16LeDataStream(data, true));
639 }
640
641 @Test
642 public void testReadXmlWithBOMUtf32Be() throws Exception {
643 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
644 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE");
645 try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
646 parseXml(in);
647 }
648
649 try (XmlStreamReader in = new XmlStreamReader(createUtf32BeDataStream(data, true))) {
650 parseXml(in);
651 }
652 }
653
654 @Test
655 public void testReadXmlWithBOMUtf32Le() throws Exception {
656 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
657 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
658 try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE)) {
659 parseXml(in);
660 }
661
662 try (XmlStreamReader in = new XmlStreamReader(createUtf32LeDataStream(data, true))) {
663 parseXml(in);
664 }
665 }
666
667 @Test
668 public void testReadXmlWithBOMUtf8() throws Exception {
669 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><X/>".getBytes(StandardCharsets.UTF_8);
670 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
671 parseXml(in);
672 }
673 parseXml(createUtf8Input(data, true));
674 }
675
676 @Test
677 public void testReadXmlWithoutBOMUtf32Be() throws Exception {
678 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
679 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF_32BE\"?><X/>".getBytes("UTF_32BE");
680 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
681 parseXml(in);
682 }
683 parseXml(createUtf32BeDataStream(data, false));
684 }
685
686 @Test
687 public void testReadXmlWithoutBOMUtf32Le() throws Exception {
688 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
689 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
690 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
691 parseXml(in);
692 }
693 parseXml(createUtf32BeDataStream(data, false));
694 }
695
696 @Test
697 public void testSkipReturnValueWithBom() throws IOException {
698 final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
699 try (BOMInputStream is1 = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
700 assertEquals(2, is1.skip(2));
701 assertEquals((byte) 0x33, is1.read());
702 }
703 }
704
705 @Test
706 public void testSkipReturnValueWithoutBom() throws IOException {
707 final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
708 try (BOMInputStream is2 = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
709 assertEquals(2, is2.skip(2));
710 assertEquals((byte) 0x33, is2.read());
711 }
712 }
713
714 @Test
715 public void testSkipWithBOM() throws Exception {
716 final byte[] data = { 'A', 'B', 'C', 'D' };
717 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
718 in.skip(2L);
719 assertEquals('C', in.read());
720 }
721 }
722
723 @Test
724 public void testSkipWithoutBOM() throws Exception {
725 final byte[] data = { 'A', 'B', 'C', 'D' };
726 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
727 in.skip(2L);
728 assertEquals('C', in.read());
729 }
730 }
731
732 @Test
733 public void testSmallBufferWithBOM() throws Exception {
734 final byte[] data = { 'A', 'B', 'C' };
735 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
736 final byte[] buf = new byte[1024];
737 assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
738 assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
739 }
740 }
741
742 @Test
743 public void testSmallBufferWithoutBOM() throws Exception {
744 final byte[] data = { 'A', 'B', 'C' };
745 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
746 final byte[] buf = new byte[1024];
747 assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
748 assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
749 }
750 }
751
752 @Test
753
754 public void testSupportCode() throws Exception {
755 try (InputStream in = createUtf8Input(new byte[] { 'A', 'B' }, true)) {
756 final byte[] buf = new byte[1024];
757 final int len = in.read(buf);
758 assertEquals(5, len);
759 assertEquals(0xEF, buf[0] & 0xFF);
760 assertEquals(0xBB, buf[1] & 0xFF);
761 assertEquals(0xBF, buf[2] & 0xFF);
762 assertEquals('A', buf[3] & 0xFF);
763 assertEquals('B', buf[4] & 0xFF);
764
765 assertData(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF, 'A', 'B' }, buf, len);
766 }
767 }
768 }