1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.io.input;
18
19 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
20 import static org.junit.jupiter.api.Assertions.assertEquals;
21 import static org.junit.jupiter.api.Assertions.assertNotEquals;
22 import static org.junit.jupiter.api.Assertions.assertNotNull;
23 import static org.junit.jupiter.api.Assertions.assertThrows;
24 import static org.junit.jupiter.api.Assertions.assertTrue;
25 import static org.junit.jupiter.api.Assertions.fail;
26
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.StringReader;
30 import java.nio.ByteBuffer;
31 import java.nio.CharBuffer;
32 import java.nio.charset.Charset;
33 import java.nio.charset.CharsetEncoder;
34 import java.nio.charset.CoderResult;
35 import java.nio.charset.CodingErrorAction;
36 import java.nio.charset.StandardCharsets;
37 import java.nio.charset.UnmappableCharacterException;
38 import java.util.Random;
39
40 import org.apache.commons.io.CharsetsTest;
41 import org.apache.commons.io.IOUtils;
42 import org.junit.jupiter.api.Test;
43 import org.junit.jupiter.params.ParameterizedTest;
44 import org.junit.jupiter.params.provider.MethodSource;
45
46 public class CharSequenceInputStreamTest {
47
48 private static final String UTF_16 = StandardCharsets.UTF_16.name();
49 private static final String UTF_8 = StandardCharsets.UTF_8.name();
50 private static final String ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
51 private static final String LARGE_TEST_STRING;
52
53 private static final String TEST_STRING = "\u00e0 peine arriv\u00e9s nous entr\u00e2mes dans sa chambre";
54
55 static {
56 final StringBuilder buffer = new StringBuilder();
57 for (int i = 0; i < 100; i++) {
58 buffer.append(TEST_STRING);
59 }
60 LARGE_TEST_STRING = buffer.toString();
61 }
62
63 private final Random random = new Random();
64
65 private int checkAvail(final InputStream is, final int min) throws Exception {
66 final int available = is.available();
67 assertTrue(available >= min, "avail should be >= " + min + ", but was " + available);
68 return available;
69 }
70
71 private boolean isAvailabilityTestableForCharset(final String csName) {
72 return Charset.forName(csName).canEncode()
73 && !"COMPOUND_TEXT".equalsIgnoreCase(csName) && !"x-COMPOUND_TEXT".equalsIgnoreCase(csName)
74 && !isOddBallLegacyCharsetThatDoesNotSupportFrenchCharacters(csName);
75 }
76
77 private boolean isOddBallLegacyCharsetThatDoesNotSupportFrenchCharacters(final String csName) {
78 return "x-IBM1388".equalsIgnoreCase(csName) ||
79 "ISO-2022-CN".equalsIgnoreCase(csName) ||
80 "ISO-2022-JP".equalsIgnoreCase(csName) ||
81 "Shift_JIS".equalsIgnoreCase(csName);
82 }
83
84
85
86
87 @Test
88 public void testAvailable() throws IOException {
89 final Charset charset = Charset.forName("Big5");
90 final CharSequenceInputStream in = new CharSequenceInputStream("\uD800\uDC00", charset);
91 final int available = in.available();
92 final byte[] data = new byte[available];
93 final int bytesRead = in.read(data);
94 assertEquals(available, bytesRead);
95 }
96
97 @ParameterizedTest(name = "{0}")
98 @MethodSource(CharsetsTest.AVAIL_CHARSETS)
99 public void testAvailable(final String csName) throws Exception {
100
101
102
103
104
105
106
107
108
109
110
111 try {
112 if (isAvailabilityTestableForCharset(csName)) {
113 testAvailableSkip(csName);
114 testAvailableRead(csName);
115 }
116 } catch (final UnsupportedOperationException e) {
117 fail("Operation not supported for " + csName);
118 }
119 }
120
121 private void testAvailableRead(final String csName) throws Exception {
122 final String input = "test";
123 try (InputStream r = new CharSequenceInputStream(input, csName)) {
124 int available = checkAvail(r, input.length());
125 assertEquals(available - 1, r.skip(available - 1));
126 available = checkAvail(r, 1);
127 final byte[] buff = new byte[available];
128 assertEquals(available, r.read(buff, 0, available));
129 }
130 }
131
132 private void testAvailableSkip(final String csName) throws Exception {
133 final String input = "test";
134 try (InputStream r = new CharSequenceInputStream(input, csName)) {
135 int available = checkAvail(r, input.length());
136 assertEquals(available - 1, r.skip(available - 1));
137 available = checkAvail(r, 1);
138 assertEquals(1, r.skip(1));
139 available = checkAvail(r, 0);
140 }
141 }
142
143 private void testBufferedRead(final String testString, final String charsetName) throws IOException {
144 final byte[] expected = testString.getBytes(charsetName);
145 try (InputStream in = new CharSequenceInputStream(testString, charsetName, 512)) {
146 final byte[] buffer = new byte[128];
147 int offset = 0; while (true) {
148 int bufferOffset = random.nextInt(64);
149 final int bufferLength = random.nextInt(64);
150 int read = in.read(buffer, bufferOffset, bufferLength);
151 if (read == -1) {
152 assertEquals(expected.length, offset, "EOF: offset should equal length for charset " + charsetName);
153 break;
154 }
155 assertTrue(read <= bufferLength, "Read " + read + " <= " + bufferLength);
156 while (read > 0) {
157 assertTrue(offset < expected.length,
158 "offset for " + charsetName + " " + offset + " < " + expected.length);
159 assertEquals(expected[offset], buffer[bufferOffset], "bytes should agree for " + charsetName);
160 offset++;
161 bufferOffset++;
162 read--;
163 }
164 }
165 }
166 }
167
168
169
170
171
172
173
174
175 @ParameterizedTest(name = "{0}")
176 @MethodSource(CharsetsTest.AVAIL_CHARSETS)
177 public void testBufferedRead_AvailableCharset(final String csName) throws IOException {
178
179 if (isAvailabilityTestableForCharset(csName)) {
180 testBufferedRead(TEST_STRING, csName);
181 }
182 }
183
184 @ParameterizedTest
185 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
186 public void testBufferedRead_RequiredCharset(final String csName) throws IOException {
187 testBufferedRead(TEST_STRING, csName);
188 }
189
190 @Test
191 public void testBufferedRead_UTF8() throws IOException {
192 testBufferedRead(TEST_STRING, UTF_8);
193 }
194
195 @Test
196 public void testCharacterCodingException() throws IOException {
197 final Charset charset = StandardCharsets.US_ASCII;
198 final CharSequenceInputStream in = CharSequenceInputStream.builder()
199 .setCharsetEncoder(charset.newEncoder().onUnmappableCharacter(CodingErrorAction.REPORT))
200 .setCharSequence("\u0080")
201 .get();
202 assertEquals(0, in.available());
203 assertThrows(UnmappableCharacterException.class, in::read);
204 }
205
206 private void testCharsetMismatchInfiniteLoop(final String csName) throws IOException {
207
208 final char[] inputChars = { (char) 0xE0, (char) 0xB2, (char) 0xA0 };
209 final Charset charset = Charset.forName(csName);
210 try (InputStream stream = new CharSequenceInputStream(new String(inputChars), charset, 512)) {
211 IOUtils.toCharArray(stream, charset);
212 }
213 try (InputStream stream = CharSequenceInputStream.builder().setCharSequence(new String(inputChars)).setCharset(charset).setBufferSize(512).get()) {
214 IOUtils.toCharArray(stream, charset);
215 }
216 }
217
218 @ParameterizedTest
219 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
220 public void testCharsetMismatchInfiniteLoop_RequiredCharsets(final String csName) throws IOException {
221 testCharsetMismatchInfiniteLoop(csName);
222 }
223
224
225
226
227 private void testIO_356(final int bufferSize, final int dataSize, final int readFirst, final String csName) throws Exception {
228 final byte[] data1;
229 final byte[] data2;
230 try (CharSequenceInputStream is = new CharSequenceInputStream(ALPHABET, csName, bufferSize)) {
231 for (int i = 0; i < readFirst; i++) {
232 final int ch = is.read();
233 assertNotEquals(-1, ch);
234 }
235
236 is.mark(dataSize);
237
238 data1 = new byte[dataSize];
239 final int readCount1 = is.read(data1);
240 assertEquals(dataSize, readCount1);
241
242 is.reset();
243
244 data2 = new byte[dataSize];
245 final int readCount2 = is.read(data2);
246 assertEquals(dataSize, readCount2);
247 }
248
249
250 assertArrayEquals(data1, data2, "bufferSize=" + bufferSize + " dataSize=" + dataSize);
251 }
252
253 @Test
254 public void testIO_356_B10_D10_S0_UTF16() throws Exception {
255 testIO_356(10, 10, 0, UTF_16);
256 }
257
258 @Test
259 public void testIO_356_B10_D10_S0_UTF8() throws Exception {
260 testIO_356(10, 10, 0, UTF_8);
261 }
262
263 @Test
264 public void testIO_356_B10_D10_S1_UTF8() throws Exception {
265 testIO_356(10, 10, 1, UTF_8);
266 }
267
268 @Test
269 public void testIO_356_B10_D10_S2_UTF8() throws Exception {
270 testIO_356(10, 10, 2, UTF_8);
271 }
272
273 @Test
274 public void testIO_356_B10_D13_S0_UTF8() throws Exception {
275 testIO_356(10, 13, 0, UTF_8);
276 }
277
278 @Test
279 public void testIO_356_B10_D13_S1_UTF8() throws Exception {
280 testIO_356(10, 13, 1, UTF_8);
281 }
282
283 @Test
284 public void testIO_356_B10_D20_S0_UTF8() throws Exception {
285 testIO_356(10, 20, 0, UTF_8);
286 }
287
288 private void testIO_356_Loop(final String csName, final int maxBytesPerChar) throws Exception {
289 for (int bufferSize = maxBytesPerChar; bufferSize <= 10; bufferSize++) {
290 for (int dataSize = 1; dataSize <= 20; dataSize++) {
291 testIO_356(bufferSize, dataSize, 0, csName);
292 }
293 }
294 }
295
296 @Test
297 public void testIO_356_Loop_UTF16() throws Exception {
298 final Charset charset = StandardCharsets.UTF_16;
299 testIO_356_Loop(charset.displayName(), (int) ReaderInputStream.minBufferSize(charset.newEncoder()));
300 }
301
302 @Test
303 public void testIO_356_Loop_UTF8() throws Exception {
304 final Charset charset = StandardCharsets.UTF_8;
305 testIO_356_Loop(charset.displayName(), (int) ReaderInputStream.minBufferSize(charset.newEncoder()));
306 }
307
308 @ParameterizedTest
309 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
310 public void testLargeBufferedRead_RequiredCharsets(final String csName) throws IOException {
311 testBufferedRead(LARGE_TEST_STRING, csName);
312 }
313
314 @Test
315 public void testLargeBufferedRead_UTF8() throws IOException {
316 testBufferedRead(LARGE_TEST_STRING, UTF_8);
317 }
318
319 @ParameterizedTest
320 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
321 public void testLargeSingleByteRead_RequiredCharsets(final String csName) throws IOException {
322 testSingleByteRead(LARGE_TEST_STRING, csName);
323 }
324
325 @Test
326 public void testLargeSingleByteRead_UTF8() throws IOException {
327 testSingleByteRead(LARGE_TEST_STRING, UTF_8);
328 }
329
330
331
332 private void testMarkReset(final String csName) throws Exception {
333 try (InputStream r = new CharSequenceInputStream("test", csName)) {
334 assertEquals(2, r.skip(2));
335 r.mark(0);
336 assertEquals('s', r.read(), csName);
337 assertEquals('t', r.read(), csName);
338 assertEquals(-1, r.read(), csName);
339 r.reset();
340 assertEquals('s', r.read(), csName);
341 assertEquals('t', r.read(), csName);
342 assertEquals(-1, r.read(), csName);
343 r.reset();
344 r.reset();
345 }
346 }
347
348 @ParameterizedTest
349 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
350 public void testMarkReset_RequiredCharsets(final String csName) throws Exception {
351 testMarkResetMultiByteChars(csName);
352 }
353
354 @Test
355 public void testMarkReset_USASCII() throws Exception {
356 testMarkReset(StandardCharsets.US_ASCII.name());
357 }
358
359 @Test
360 public void testMarkReset_UTF8() throws Exception {
361 testMarkReset(UTF_8);
362 }
363
364 private void testMarkResetMultiByteChars(final String csName) throws IOException {
365
366 final String sequenceEnglish = "Test Sequence";
367 final String sequenceCJK = "\u4e01\u4f23\u5045\u5167\u5289\u53ab";
368 final String[] sequences = {sequenceEnglish, sequenceCJK};
369 for (final String testSequence : sequences) {
370 final CharsetEncoder charsetEncoder = Charset.forName(csName).newEncoder();
371 final ByteBuffer byteBuffer = ByteBuffer.allocate(testSequence.length() * 3);
372 final CharBuffer charBuffer = CharBuffer.wrap(testSequence);
373 final CoderResult result = charsetEncoder.encode(charBuffer, byteBuffer, true);
374 if (result.isUnmappable()) {
375 continue;
376 }
377 final byte[] expectedBytes = byteBuffer.array();
378
379 final int bLength = byteBuffer.position();
380 final int skip = bLength - 4;
381 try (InputStream r = new CharSequenceInputStream(testSequence, csName)) {
382 assertEquals(skip, r.skip(skip));
383 r.mark(0);
384 assertEquals(expectedBytes[bLength - 4], (byte) r.read(), csName);
385 assertEquals(expectedBytes[bLength - 3], (byte) r.read(), csName);
386 assertEquals(expectedBytes[bLength - 2], (byte) r.read(), csName);
387 assertEquals(expectedBytes[bLength - 1], (byte) r.read(), csName);
388 assertEquals(-1, (byte) r.read(), csName);
389 r.reset();
390 assertEquals(expectedBytes[bLength - 4], (byte) r.read(), csName);
391 assertEquals(expectedBytes[bLength - 3], (byte) r.read(), csName);
392 assertEquals(expectedBytes[bLength - 2], (byte) r.read(), csName);
393 assertEquals(expectedBytes[bLength - 1], (byte) r.read(), csName);
394 assertEquals(-1, (byte) r.read(), csName);
395 r.reset();
396 assertEquals(expectedBytes[bLength - 4], (byte) r.read(), csName);
397 assertEquals(expectedBytes[bLength - 3], (byte) r.read(), csName);
398 assertEquals(expectedBytes[bLength - 2], (byte) r.read(), csName);
399 assertEquals(expectedBytes[bLength - 1], (byte) r.read(), csName);
400 assertEquals(-1, (byte) r.read(), csName);
401 }
402 }
403 }
404
405 @Test
406 public void testMarkSupported() throws Exception {
407 try (@SuppressWarnings("deprecation")
408 InputStream r = new CharSequenceInputStream("test", UTF_8)) {
409 assertTrue(r.markSupported());
410 }
411 try (InputStream r = CharSequenceInputStream.builder().setCharSequence("test").setCharset(UTF_8).get()) {
412 assertTrue(r.markSupported());
413 }
414 }
415
416 @Test
417 public void testNullCharset() throws IOException {
418 try (CharSequenceInputStream in = new CharSequenceInputStream("A", (Charset) null)) {
419 IOUtils.toByteArray(in);
420 assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
421 }
422 try (CharSequenceInputStream in = CharSequenceInputStream.builder().setCharSequence("test").setCharset((Charset) null).get()) {
423 IOUtils.toByteArray(in);
424 assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
425 }
426 }
427
428 @Test
429 public void testNullCharsetName() throws IOException {
430 try (CharSequenceInputStream in = new CharSequenceInputStream("A", (String) null)) {
431 IOUtils.toByteArray(in);
432 assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
433 }
434 try (CharSequenceInputStream in = CharSequenceInputStream.builder().setCharSequence("test").setCharset((String) null).get()) {
435 IOUtils.toByteArray(in);
436 assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
437 }
438 }
439
440 private void testReadZero(final String csName) throws Exception {
441 try (InputStream r = new CharSequenceInputStream("test", csName)) {
442 final byte[] bytes = new byte[30];
443 assertEquals(0, r.read(bytes, 0, 0));
444 }
445 }
446
447 @Test
448 public void testReadZero_EmptyString() throws Exception {
449 try (InputStream r = new CharSequenceInputStream("", UTF_8)) {
450 final byte[] bytes = new byte[30];
451 assertEquals(0, r.read(bytes, 0, 0));
452 }
453 }
454
455 @ParameterizedTest
456 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
457 public void testReadZero_RequiredCharsets(final String csName) throws Exception {
458 testReadZero(csName);
459 }
460
461 private void testResetBeforeEnd(final CharSequenceInputStream inputStream) throws IOException {
462 inputStream.mark(1);
463 assertEquals('1', inputStream.read());
464 inputStream.reset();
465 assertEquals('1', inputStream.read());
466 assertEquals('2', inputStream.read());
467 inputStream.reset();
468 assertEquals('1', inputStream.read());
469 assertEquals('2', inputStream.read());
470 assertEquals('3', inputStream.read());
471 inputStream.reset();
472 assertEquals('1', inputStream.read());
473 assertEquals('2', inputStream.read());
474 assertEquals('3', inputStream.read());
475 assertEquals('4', inputStream.read());
476 inputStream.reset();
477 assertEquals('1', inputStream.read());
478 }
479
480 @Test
481 public void testResetBeforeEndSetCharSequence() throws IOException {
482 try (final CharSequenceInputStream inputStream = CharSequenceInputStream.builder().setCharSequence("1234").get()) {
483 testResetBeforeEnd(inputStream);
484 }
485 }
486
487 @Test
488 public void testResetCharset() {
489 assertNotNull(CharSequenceInputStream.builder().setReader(new StringReader("\uD800")).setCharset((Charset) null).getCharset());
490 }
491
492 @Test
493 public void testResetCharsetEncoder() {
494 assertNotNull(CharSequenceInputStream.builder().setReader(new StringReader("\uD800")).setCharsetEncoder(null).getCharsetEncoder());
495 }
496
497 @Test
498 public void testResetCharsetName() {
499 assertNotNull(CharSequenceInputStream.builder().setReader(new StringReader("\uD800")).setCharset((String) null).getCharset());
500 }
501
502 private void testSingleByteRead(final String testString, final String charsetName) throws IOException {
503 final byte[] bytes = testString.getBytes(charsetName);
504 try (InputStream in = new CharSequenceInputStream(testString, charsetName, 512)) {
505 for (final byte b : bytes) {
506 final int read = in.read();
507 assertTrue(read >= 0, "read " + read + " >=0 ");
508 assertTrue(read <= 255, "read " + read + " <= 255");
509 assertEquals(b, (byte) read, "Should agree with input");
510 }
511 assertEquals(-1, in.read());
512 }
513 }
514
515 @ParameterizedTest
516 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
517 public void testSingleByteRead_RequiredCharsets(final String csName) throws IOException {
518 testSingleByteRead(TEST_STRING, csName);
519 }
520
521 @Test
522 public void testSingleByteRead_UTF16() throws IOException {
523 testSingleByteRead(TEST_STRING, UTF_16);
524 }
525
526 @Test
527 public void testSingleByteRead_UTF8() throws IOException {
528 testSingleByteRead(TEST_STRING, UTF_8);
529 }
530
531 @ParameterizedTest
532 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
533 public void testSkip_RequiredCharsets(final String csName) throws Exception {
534 try (InputStream r = new CharSequenceInputStream("test", csName)) {
535 assertEquals(1, r.skip(1));
536 assertEquals(2, r.skip(2));
537 r.skip(100);
538 assertEquals(-1, r.read(), csName);
539 }
540 }
541 }