View Javadoc
1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.commons.codec.digest;
19  
20  import java.io.FileNotFoundException;
21  import java.io.FileOutputStream;
22  import java.io.PrintStream;
23  import java.lang.reflect.Constructor;
24  import java.util.ArrayList;
25  import java.util.List;
26  import java.util.Properties;
27  import java.util.Random;
28  import java.util.zip.CRC32;
29  import java.util.zip.Checksum;
30  
31  import org.junit.Assert;
32  import org.junit.Test;
33  
34  /**
35   * Unit test to verify that the pure-Java CRC32 algorithm gives
36   * the same results as the built-in implementation.
37   *
38   * Copied from Hadoop 2.6.3 (Renamed TestPureJavaCrc32 to PureJavaCrc32Test).
39   * @since 1.11
40   */
41  public class PureJavaCrc32Test {
42    private final CRC32 theirs = new CRC32();
43    private final PureJavaCrc32 ours = new PureJavaCrc32();
44  
45    @Test
46    public void testCorrectness() throws Exception {
47      checkSame();
48  
49      theirs.update(104);
50      ours.update(104);
51      checkSame();
52  
53      checkOnBytes(new byte[] {40, 60, 97, -70}, false);
54  
55      checkOnBytes("hello world!".getBytes("UTF-8"), false);
56  
57      final Random random1 = new Random();
58      final Random random2 = new Random();
59      for (int i = 0; i < 10000; i++) {
60        final byte randomBytes[] = new byte[random1.nextInt(2048)];
61        random2.nextBytes(randomBytes);
62        checkOnBytes(randomBytes, false);
63      }
64  
65    }
66  
67    private void checkOnBytes(final byte[] bytes, final boolean print) {
68      theirs.reset();
69      ours.reset();
70      checkSame();
71  
72      for (final byte b : bytes) {
73        ours.update(b);
74        theirs.update(b);
75        checkSame();
76      }
77  
78      if (print) {
79        System.out.println("theirs:\t" + Long.toHexString(theirs.getValue())
80                           + "\nours:\t" + Long.toHexString(ours.getValue()));
81      }
82  
83      theirs.reset();
84      ours.reset();
85  
86      ours.update(bytes, 0, bytes.length);
87      theirs.update(bytes, 0, bytes.length);
88      if (print) {
89        System.out.println("theirs:\t" + Long.toHexString(theirs.getValue())
90                           + "\nours:\t" + Long.toHexString(ours.getValue()));
91      }
92  
93      checkSame();
94  
95      if (bytes.length >= 10) {
96        ours.update(bytes, 5, 5);
97        theirs.update(bytes, 5, 5);
98        checkSame();
99      }
100   }
101 
102   private void checkSame() {
103     Assert.assertEquals(theirs.getValue(), ours.getValue());
104   }
105 
106   /**
107    * Generate a table to perform checksums based on the same CRC-32 polynomial
108    * that java.util.zip.CRC32 uses.
109    */
110   public static class Table {
111     private final int[][] tables;
112 
113     private Table(final int nBits, final int nTables,
114         final long polynomial) {
115       tables = new int[nTables][];
116       final int size = 1 << nBits;
117       for(int i = 0; i < tables.length; i++) {
118         tables[i] = new int[size];
119       }
120 
121       //compute the first table
122       final int[] first = tables[0];
123       for (int i = 0; i < first.length; i++) {
124         int crc = i;
125         for (int j = 0; j < nBits; j++) {
126           if ((crc & 1) == 1) {
127             crc >>>= 1;
128             crc ^= polynomial;
129           } else {
130             crc >>>= 1;
131           }
132         }
133         first[i] = crc;
134       }
135 
136       //compute the remaining tables
137       final int mask = first.length - 1;
138       for(int j = 1; j < tables.length; j++) {
139         final int[] previous = tables[j-1];
140         final int[] current = tables[j];
141         for (int i = 0; i < current.length; i++) {
142           current[i] = (previous[i] >>> nBits) ^ first[previous[i] & mask];
143         }
144       }
145     }
146 
147     String[] toStrings(final String nameformat) {
148       final String[] s = new String[tables.length];
149       for (int j = 0; j < tables.length; j++) {
150         final int[] t = tables[j];
151         final StringBuilder b = new StringBuilder();
152         b.append(String.format("    /* "+ nameformat +" */", j));
153         for (int i = 0; i < t.length;) {
154           b.append("\n    ");
155           for(int k = 0; k < 4; k++) {
156             b.append(String.format("0x%08X, ", t[i++]));
157           }
158         }
159         s[j] = b.toString();
160       }
161       return s;
162     }
163 
164     @Override
165     public String toString() {
166       final StringBuilder b = new StringBuilder();
167 
168       final String tableFormat = String.format("T%d_",
169         Integer.numberOfTrailingZeros(tables[0].length)) + "%d";
170       final String startFormat = "  private static final int "+tableFormat+"_start = %d*256;";
171 
172       for (int j = 0; j < tables.length; j++) {
173         b.append(String.format(startFormat, j, j));
174         b.append("\n");
175       }
176 
177       b.append("  private static final int[] T = new int[] {");
178       for(final String s : toStrings(tableFormat)) {
179         b.append("\n");
180         b.append(s);
181       }
182       b.setCharAt(b.length() - 2, '\n');
183       b.append(" };\n");
184       return b.toString();
185     }
186 
187     /** Generate CRC-32 lookup tables */
188     public static void main(final String[] args) throws FileNotFoundException {
189       if (args.length != 1) {
190         System.err.println("Usage: " + Table.class.getName() +
191             " <polynomial>");
192         System.exit(1);
193       }
194       final long polynomial = Long.parseLong(args[0], 16);
195 
196       final int i = 8;
197       final Table t = new Table(i, 16, polynomial);
198       final String s = t.toString();
199       System.out.println(s);
200 
201       //print to a file
202       final PrintStream out = new PrintStream(
203           new FileOutputStream("table" + i + ".txt"), true);
204       try {
205         out.println(s);
206       } finally {
207         out.close();
208       }
209     }
210   }
211 
212   /**
213    * Performance tests to compare performance of the Pure Java implementation
214    * to the built-in java.util.zip implementation. This can be run from the
215    * command line with:
216    *
217    *   java -cp path/to/test/classes:path/to/common/classes \
218    *      'org.apache.hadoop.util.TestPureJavaCrc32$PerformanceTest'
219    *
220    * The output is in JIRA table format.
221    */
222   public static class PerformanceTest {
223     public static final int MAX_LEN = 32*1024*1024; // up to 32MB chunks
224     public static final int BYTES_PER_SIZE = MAX_LEN * 4;
225 
226     static final Class<? extends Checksum> zip = CRC32.class;
227     static final List<Class<? extends Checksum>> CRCS = new ArrayList<Class<? extends Checksum>>();
228     static {
229       CRCS.add(zip);
230       CRCS.add(PureJavaCrc32.class);
231     }
232 
233 
234     public static void main(final String args[]) throws Exception {
235       printSystemProperties(System.out);
236       doBench(CRCS, System.out);
237     }
238 
239     private static void printCell(final String s, final int width, final PrintStream out) {
240       final int w = s.length() > width? s.length(): width;
241       out.printf(" %" + w + "s |", s);
242     }
243 
244     private static void doBench(final List<Class<? extends Checksum>> crcs,
245         final PrintStream out) throws Exception {
246       final byte[] bytes = new byte[MAX_LEN];
247       new Random().nextBytes(bytes);
248 
249       // Print header
250       out.printf("\nPerformance Table (The unit is MB/sec; #T = #Theads)\n");
251 
252       // Warm up implementations to get jit going.
253       for (final Class<? extends Checksum> c : crcs) {
254         doBench(c, 1, bytes, 2);
255         doBench(c, 1, bytes, 2101);
256       }
257 
258       // Test on a variety of sizes with different number of threads
259       for (int size = 32; size <= MAX_LEN; size <<= 1) {
260         doBench(crcs, bytes, size, out);
261       }
262     }
263 
264     private static void doBench(final List<Class<? extends Checksum>> crcs,
265         final byte[] bytes, final int size, final PrintStream out) throws Exception {
266       final String numBytesStr = " #Bytes ";
267       final String numThreadsStr = "#T";
268       final String diffStr = "% diff";
269 
270       out.print('|');
271       printCell(numBytesStr, 0, out);
272       printCell(numThreadsStr, 0, out);
273       for (int i = 0; i < crcs.size(); i++) {
274         final Class<? extends Checksum> c = crcs.get(i);
275         out.print('|');
276         printCell(c.getSimpleName(), 8, out);
277         for(int j = 0; j < i; j++) {
278           printCell(diffStr, diffStr.length(), out);
279         }
280       }
281       out.printf("\n");
282 
283       for(int numThreads = 1; numThreads <= 16; numThreads <<= 1) {
284         out.printf("|");
285         printCell(String.valueOf(size), numBytesStr.length(), out);
286         printCell(String.valueOf(numThreads), numThreadsStr.length(), out);
287 
288         BenchResult expected = null;
289         final List<BenchResult> previous = new ArrayList<BenchResult>();
290         for(final Class<? extends Checksum> c : crcs) {
291           System.gc();
292 
293           final BenchResult result = doBench(c, numThreads, bytes, size);
294           printCell(String.format("%9.1f", result.mbps),
295               c.getSimpleName().length()+1, out);
296 
297           //check result
298           if(c == zip) {
299             expected = result;
300           } else if (expected == null) {
301             throw new RuntimeException("The first class is "
302                 + c.getName() + " but not " + zip.getName());
303           } else if (result.value != expected.value) {
304             throw new RuntimeException(c + " has bugs!");
305           }
306 
307           //compare result with previous
308           for(final BenchResult p : previous) {
309             final double diff = (result.mbps - p.mbps) / p.mbps * 100;
310             printCell(String.format("%5.1f%%", diff), diffStr.length(), out);
311           }
312           previous.add(result);
313         }
314         out.printf("\n");
315       }
316     }
317 
318     private static BenchResult doBench(final Class<? extends Checksum> clazz,
319         final int numThreads, final byte[] bytes, final int size)
320             throws Exception {
321 
322       final Thread[] threads = new Thread[numThreads];
323       final BenchResult[] results = new BenchResult[threads.length];
324 
325       {
326         final int trials = BYTES_PER_SIZE / size;
327         final double mbProcessed = trials * size / 1024.0 / 1024.0;
328         final Constructor<? extends Checksum> ctor = clazz.getConstructor();
329 
330         for(int i = 0; i < threads.length; i++) {
331           final int index = i;
332           threads[i] = new Thread() {
333             final Checksum crc = ctor.newInstance();
334 
335             @Override
336             public void run() {
337               final long st = System.nanoTime();
338               crc.reset();
339               for (int i = 0; i < trials; i++) {
340                 crc.update(bytes, 0, size);
341               }
342               final long et = System.nanoTime();
343               final double secsElapsed = (et - st) / 1000000000.0d;
344               results[index] = new BenchResult(crc.getValue(), mbProcessed/secsElapsed);
345             }
346           };
347         }
348       }
349 
350       for (final Thread thread : threads) {
351         thread.start();
352       }
353       for (final Thread thread : threads) {
354         thread.join();
355       }
356 
357       final long expected = results[0].value;
358       double sum = results[0].mbps;
359       for(int i = 1; i < results.length; i++) {
360         if (results[i].value != expected) {
361           throw new AssertionError(clazz.getSimpleName() + " results not matched.");
362         }
363         sum += results[i].mbps;
364       }
365       return new BenchResult(expected, sum/results.length);
366     }
367 
368     private static class BenchResult {
369       /** CRC value */
370       final long value;
371       /** Speed (MB per second) */
372       final double mbps;
373 
374       BenchResult(final long value, final double mbps) {
375         this.value = value;
376         this.mbps = mbps;
377       }
378     }
379 
380     private static void printSystemProperties(final PrintStream out) {
381       final String[] names = {
382           "java.version",
383           "java.runtime.name",
384           "java.runtime.version",
385           "java.vm.version",
386           "java.vm.vendor",
387           "java.vm.name",
388           "java.vm.specification.version",
389           "java.specification.version",
390           "os.arch",
391           "os.name",
392           "os.version"
393       };
394       final Properties p = System.getProperties();
395       for(final String n : names) {
396         out.println(n + " = " + p.getProperty(n));
397       }
398     }
399   }
400 }