View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.lz4;
20  
21  import static org.junit.jupiter.api.Assertions.assertArrayEquals;
22  import static org.junit.jupiter.api.Assertions.assertEquals;
23  import static org.junit.jupiter.api.Assertions.assertFalse;
24  import static org.junit.jupiter.api.Assertions.assertTrue;
25  
26  import java.io.ByteArrayOutputStream;
27  import java.io.IOException;
28  import java.util.Arrays;
29  
30  import org.apache.commons.compress.compressors.lz77support.LZ77Compressor;
31  import org.apache.commons.lang3.ArrayFill;
32  import org.junit.jupiter.api.Disabled;
33  import org.junit.jupiter.api.Test;
34  
35  public class BlockLZ4CompressorOutputStreamTest {
36  
37      @Test
38      @Disabled("would pass if the algorithm used for rewriting the final pairs was smarter")
39      public void canWriteBackReferenceFollowedByShortLiteralIfLengthIsBigEnough() {
40          final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
41          p.setBackReference(new LZ77Compressor.BackReference(1, 10));
42          assertTrue(p.canBeWritten(5));
43      }
44  
45      @Test
46      @Disabled("would pass if the algorithm used for rewriting the final pairs was smarter")
47      public void canWriteBackReferenceFollowedByShortLiteralIfOffsetIsBigEnough() {
48          final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
49          p.setBackReference(new LZ77Compressor.BackReference(10, 4));
50          assertTrue(p.canBeWritten(5));
51      }
52  
53      private byte[] compress(final byte[] input, final int... lengthOfTrailers) throws IOException {
54          try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
55                  BlockLZ4CompressorOutputStream lo = new BlockLZ4CompressorOutputStream(baos)) {
56              lo.write(input);
57              for (int i = 0; i < lengthOfTrailers.length; i++) {
58                  final int lengthOfTrailer = lengthOfTrailers[i];
59                  for (int j = 0; j < lengthOfTrailer; j++) {
60                      lo.write(i + 1);
61                  }
62              }
63              lo.close();
64              return baos.toByteArray();
65          }
66      }
67  
68      private byte[] compress(final int length) throws IOException {
69          return compress(length, 0);
70      }
71  
72      private byte[] compress(final int lengthBeforeTrailer, final int... lengthOfTrailers) throws IOException {
73          final byte[] b = prepareExpected(lengthBeforeTrailer);
74          return compress(b, lengthOfTrailers);
75      }
76  
77      private byte[] prepareExpected(final int length) {
78          return ArrayFill.fill(new byte[length], (byte) -1);
79      }
80  
81      @Test
82      public void testCantWriteBackReferenceFollowedByLiteralThatIsTooShort() {
83          final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
84          p.setBackReference(new LZ77Compressor.BackReference(10, 14));
85          assertFalse(p.canBeWritten(4));
86      }
87  
88      @Test
89      public void testCantWriteBackReferenceIfAccumulatedOffsetIsTooShort() {
90          final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
91          p.setBackReference(new LZ77Compressor.BackReference(1, 4));
92          assertFalse(p.canBeWritten(5));
93      }
94  
95      @Test
96      public void testCanWriteBackReferenceFollowedByLongLiteral() {
97          final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
98          p.setBackReference(new LZ77Compressor.BackReference(1, 4));
99          // a length of 11 would be enough according to the spec, but
100         // the algorithm we use for rewriting the last block requires
101         // 16 bytes
102         assertTrue(p.canBeWritten(16));
103     }
104 
105     @Test
106     public void testCanWritePairWithoutBackReference() throws IOException {
107         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
108         final byte[] b = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
109         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4));
110         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
111         p.writeTo(bos);
112         assertArrayEquals(new byte[] { 4 << 4, 2, 3, 4, 5 }, bos.toByteArray());
113     }
114 
115     @Test
116     public void testCanWritePairWithoutLiterals() throws IOException {
117         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
118         p.setBackReference(new LZ77Compressor.BackReference(1, 4));
119         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
120         p.writeTo(bos);
121         assertArrayEquals(new byte[] { 0, 1, 0 }, bos.toByteArray());
122     }
123 
124     @Test
125     public void testPairAccumulatesLengths() {
126         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
127         p.setBackReference(new LZ77Compressor.BackReference(1, 4));
128         final byte[] b = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
129         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4));
130         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 2, 5));
131         assertEquals(13, p.length());
132     }
133 
134     @Test
135     public void testPairSeesBackReferenceWhenSet() {
136         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
137         assertFalse(p.hasBackReference());
138         p.setBackReference(new LZ77Compressor.BackReference(1, 4));
139         assertTrue(p.hasBackReference());
140     }
141 
142     @Test
143     public void testRewritingOfFinalBlockWithoutTrailingLZ77Literals() throws IOException {
144         for (int i = 1; i < 13; i++) {
145             // according to the spec these are all too short be compressed
146             // LZ77Compressor will create a single byte literal
147             // followed by a back-reference starting with i = 5,
148             // though. (4 is the minimum length for a back-reference
149             // in LZ4
150             final byte[] compressed = compress(i);
151             final byte[] expected = prepareExpected(i + 1);
152             expected[0] = (byte) (i << 4);
153             assertArrayEquals(expected, compressed, "input length is " + i);
154         }
155 
156         for (int i = 13; i < 17; i++) {
157             // LZ77Compressor will still create a single byte literal
158             // followed by a back-reference
159             // according to the spec the back-reference could be split
160             // as we can cut out a five byte literal and the offset
161             // would be big enough, but our algorithm insists on a
162             // twelve byte literal trailer and the back-reference
163             // would fall below the minimal size
164             final byte[] compressed = compress(i);
165             final byte[] expected = prepareExpected(i < 15 ? i + 1 : i + 2);
166             if (i < 15) {
167                 expected[0] = (byte) (i << 4);
168             } else {
169                 expected[0] = (byte) (15 << 4);
170                 expected[1] = (byte) (i - 15);
171             }
172             assertArrayEquals(expected, compressed, "input length is " + i);
173         }
174 
175         for (int i = 17; i < 20; i++) {
176             // LZ77Compressor will still create a single byte literal
177             // followed by a back-reference
178             // this time even our algorithm is willing to break up the
179             // back-reference
180             final byte[] compressed = compress(i);
181             final byte[] expected = prepareExpected(17);
182             expected[0] = (byte) (1 << 4 | i - 17);
183             // two-byte offset
184             expected[2] = 1;
185             expected[3] = 0;
186             expected[4] = (byte) (12 << 4);
187             assertArrayEquals(expected, compressed, "input length is " + i);
188         }
189     }
190 
191     @Test
192     public void testRewritingOfFinalBlockWithTrailingLZ77Literals() throws IOException {
193         for (int i = 1; i < 5; i++) {
194             // LZ77Compressor will create a single byte literal
195             // followed by a back-reference of length 15 followed by a
196             // literal of length i
197             // we can split the back-reference and merge it with the literal
198             final byte[] compressed = compress(16, i);
199             final byte[] expected = prepareExpected(17);
200             expected[0] = (byte) (1 << 4 | i - 1);
201             // two-byte offset
202             expected[2] = 1;
203             expected[3] = 0;
204             expected[4] = (byte) (12 << 4);
205             for (int j = 0; j < i; j++) {
206                 expected[expected.length - 1 - j] = 1;
207             }
208             assertArrayEquals(expected, compressed, "trailer length is " + i);
209         }
210         for (int i = 5; i < 12; i++) {
211             // LZ77Compressor will create a single byte literal
212             // followed by a back-reference of length 15 followed by
213             // another single byte literal and another back-reference
214             // of length i-1
215             // according to the spec we could completely satisfy the
216             // requirements by just rewriting the last Pair, but our
217             // algorithm will chip off a few bytes from the first Pair
218             final byte[] compressed = compress(16, i);
219             final byte[] expected = prepareExpected(17);
220             expected[0] = (byte) (1 << 4 | i - 1);
221             // two-byte offset
222             expected[2] = 1;
223             expected[3] = 0;
224             expected[4] = (byte) (12 << 4);
225             for (int j = 0; j < i; j++) {
226                 expected[expected.length - 1 - j] = 1;
227             }
228             assertArrayEquals(expected, compressed, "trailer length is " + i);
229         }
230         for (int i = 12; i < 15; i++) {
231             // LZ77Compressor will create a single byte literal
232             // followed by a back-reference of length 15 followed by
233             // another single byte literal and another back-reference
234             // of length i-1
235             // this shouldn't affect the first pair at all as
236             // rewriting the second one is sufficient
237             final byte[] compressed = compress(16, i);
238             final byte[] expected = prepareExpected(i + 5);
239             expected[0] = (byte) (1 << 4 | 11);
240             // two-byte offset
241             expected[2] = 1;
242             expected[3] = 0;
243             expected[4] = (byte) (i << 4);
244             for (int j = 0; j < i; j++) {
245                 expected[expected.length - 1 - j] = 1;
246             }
247             assertArrayEquals(expected, compressed, "trailer length is " + i);
248         }
249     }
250 
251     @Test
252     public void testRewritingOfFourPairs() throws IOException {
253         // LZ77Compressor creates three times a literal block followed
254         // by a back-reference (once 5 bytes long and twice four bytes
255         // long and a final literal block of length 1
256         // in the result the three last pairs are merged into a single
257         // literal and one byte is chopped off of the first pair's
258         // back-reference
259         final byte[] compressed = compress(6, 5, 5, 1);
260         final byte[] expected = prepareExpected(17);
261         expected[0] = (byte) (1 << 4);
262         // two-byte offset
263         expected[2] = 1;
264         expected[3] = 0;
265         expected[4] = (byte) (12 << 4);
266         for (int i = 6; i < 11; i++) {
267             expected[i] = 1;
268         }
269         for (int i = 11; i < 16; i++) {
270             expected[i] = 2;
271         }
272         expected[16] = 3;
273         assertArrayEquals(expected, compressed);
274     }
275 
276     @Test
277     public void testRewritingWithFinalBackreferenceAndOffsetBiggerThan1() throws IOException {
278         // this caused trouble when expandFromList() fell into the "offsetRemaining is negative" self-copy case as the
279         // calculation of copyOffset was wrong
280         final byte[] toCompress = prepareExpected(25);
281         for (int i = 0; i < toCompress.length; i += 4) {
282             toCompress[i] = 1;
283         }
284         // LZ77Compressor creates a four byte literal and a back-reference with offset 4 and length 21
285         // we'll need to split the back-reference and chop off the last 12 bytes
286         final byte[] compressed = compress(toCompress);
287         final byte[] expected = prepareExpected(1 + 4 + 2 + 1 + 12);
288         expected[0] = (byte) (4 << 4 | 5);
289         expected[1] = 1;
290         expected[5] = 4;
291         expected[6] = 0;
292         expected[7] = (byte) (12 << 4);
293         for (int i = 11; i < expected.length; i += 4) {
294             expected[i] = 1;
295         }
296         assertArrayEquals(expected, compressed);
297     }
298 
299     @Test
300     public void testWritesCompletePair() throws IOException {
301         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
302         final byte[] b = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
303         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4));
304         b[2] = 19;
305         p.setBackReference(new LZ77Compressor.BackReference(1, 5));
306         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
307         p.writeTo(bos);
308         assertArrayEquals(new byte[] { (4 << 4) + 1, 2, 3, 4, 5, 1, 0 }, bos.toByteArray());
309     }
310 
311     @Test
312     public void testWritesCorrectSizeFor15ByteLengthLiteral() throws IOException {
313         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
314         final byte[] b = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
315         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 9));
316         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 6));
317         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
318         p.writeTo(bos);
319         assertArrayEquals(new byte[] { (byte) (15 << 4), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6 }, bos.toByteArray());
320     }
321 
322     @Test
323     public void testWritesCorrectSizeFor19ByteLengthBackReference() throws IOException {
324         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
325         p.setBackReference(new LZ77Compressor.BackReference(1, 19));
326         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
327         p.writeTo(bos);
328         assertArrayEquals(new byte[] { 15, 1, 0, 0 }, bos.toByteArray());
329     }
330 
331     @Test
332     public void testWritesCorrectSizeFor269ByteLengthLiteral() throws IOException {
333         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
334         final byte[] b = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
335         for (int i = 0; i < 26; i++) {
336             p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 10));
337         }
338         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 9));
339         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
340         p.writeTo(bos);
341         assertArrayEquals(new byte[] { (byte) (15 << 4), (byte) 254, 1 }, Arrays.copyOfRange(bos.toByteArray(), 0, 3));
342     }
343 
344     @Test
345     public void testWritesCorrectSizeFor270ByteLengthLiteral() throws IOException {
346         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
347         final byte[] b = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
348         for (int i = 0; i < 27; i++) {
349             p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 10));
350         }
351         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
352         p.writeTo(bos);
353         assertArrayEquals(new byte[] { (byte) (15 << 4), (byte) 255, 0, 1 }, Arrays.copyOfRange(bos.toByteArray(), 0, 4));
354     }
355 
356     @Test
357     public void testWritesCorrectSizeFor273ByteLengthBackReference() throws IOException {
358         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
359         p.setBackReference(new LZ77Compressor.BackReference(1, 273));
360         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
361         p.writeTo(bos);
362         assertArrayEquals(new byte[] { 15, 1, 0, (byte) 254 }, bos.toByteArray());
363     }
364 
365     @Test
366     public void testWritesCorrectSizeFor274ByteLengthBackReference() throws IOException {
367         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
368         p.setBackReference(new LZ77Compressor.BackReference(1, 274));
369         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
370         p.writeTo(bos);
371         assertArrayEquals(new byte[] { 15, 1, 0, (byte) 255, 0 }, bos.toByteArray());
372     }
373 }