1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.chukwa.extraction.demux.processor.mapper;
20
21
22 import java.io.IOException;
23 import junit.framework.TestCase;
24 import org.apache.hadoop.chukwa.Chunk;
25 import org.apache.hadoop.chukwa.ChunkImpl;
26 import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord;
27 import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey;
28 import org.apache.hadoop.mapred.OutputCollector;
29 import org.apache.hadoop.mapred.Reporter;
30
31
32
33
34
35
36 public class TestHadoopLogProcessor extends TestCase {
37
38 long serializedSize = 0;
39 OutputCollector<ChukwaRecordKey, ChukwaRecord> nullcollector = new OutputCollector<ChukwaRecordKey, ChukwaRecord>() {
40 public void collect(ChukwaRecordKey arg0, ChukwaRecord arg1)
41 throws IOException {
42 serializedSize += arg1.toString().length();
43 }
44 };
45
46 public void testHLPParseTimes() {
47 HadoopLogProcessor hlp = new HadoopLogProcessor();
48
49 int LINES = 50000;
50 long bytes = 0;
51 long ts_start = System.currentTimeMillis();
52 for (int i = 0; i < LINES; ++i) {
53 Chunk c = getNewChunk();
54 bytes += c.getData().length;
55 hlp.process(null, c, nullcollector, Reporter.NULL);
56
57 }
58 long time = (System.currentTimeMillis() - ts_start);
59 System.out.println("parse took " + time + " milliseconds");
60 System.out.println("aka " + time * 1.0 / LINES + " ms per line or " + time
61 * 1000.0 / bytes + " ms per kilobyte of log data");
62 System.out.println("output records had total length of " + serializedSize);
63 }
64
65 java.util.Random r = new java.util.Random();
66
67 public Chunk getNewChunk() {
68 int ms = r.nextInt(1000);
69 String line = "2008-05-29 10:42:22," + ms
70 + " INFO org.apache.hadoop.dfs.DataNode: Some text goes here"
71 + r.nextInt() + "\n";
72 ChunkImpl c = new ChunkImpl("HadoopLogProcessor", "test",
73 line.length() , line.getBytes(), null);
74
75 return c;
76 }
77
78 }