1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.chukwa.extraction.demux.processor.mapper;
19
20
21 import junit.framework.TestCase;
22 import org.apache.hadoop.chukwa.Chunk;
23 import org.apache.hadoop.chukwa.ChunkBuilder;
24 import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord;
25 import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey;
26 import org.apache.hadoop.chukwa.extraction.demux.Demux;
27 import org.apache.hadoop.mapred.Reporter;
28 import org.apache.hadoop.mapred.JobConf;
29
30 import java.util.Map;
31 import java.util.Date;
32 import java.util.Calendar;
33 import java.text.SimpleDateFormat;
34
35 public class TestTsProcessor extends TestCase {
36
37 private static String DATA_TYPE = "testDataType";
38 private static String DATA_SOURCE = "testDataSource";
39
40 JobConf jobConf = null;
41
42 Date date = null;
43 Date dateWithoutMillis = null;
44
45 protected void setUp() throws Exception {
46 jobConf = new JobConf();
47 Demux.jobConf = jobConf;
48 date = new Date();
49
50
51
52
53 Calendar calendar = Calendar.getInstance();
54 calendar.setTime(date);
55 calendar.set(Calendar.MILLISECOND, 0);
56 dateWithoutMillis = calendar.getTime();
57 }
58
59 public void testDefaultFormat() {
60 String record = buildSampleSimpleRecord(date, "yyyy-MM-dd HH:mm:ss,SSS");
61 doTest(date, record);
62 }
63
64 public void testCustomDefaultFormat() {
65 jobConf.set("TsProcessor.default.time.format", "yyyy--MM--dd HH::mm::ss SSS");
66
67 String record = buildSampleSimpleRecord(date, "yyyy--MM--dd HH::mm::ss SSS");
68 doTest(date, record);
69 }
70
71 public void testCustomDefaultFormat2() {
72
73
74 jobConf.set("TsProcessor.default.time.format", "yyyy--MM--dd HH::mm::ss SSS,z");
75
76 String record = buildSampleSimpleRecord(date, "yyyy--MM--dd HH::mm::ss SSS,z");
77 doTest(date, record);
78 }
79
80 public void testCustomDataTypeFormat() {
81 jobConf.set("TsProcessor.time.format." + DATA_TYPE, "yyyy--MM--dd HH::mm::ss SSS");
82
83 String record = buildSampleSimpleRecord(date, "yyyy--MM--dd HH::mm::ss SSS");
84 doTest(date, record);
85 }
86
87 public void testCustomDefaultFormatWithCustomDataTypeFormat() {
88 jobConf.set("TsProcessor.default.time.format", "yyyy/MM/dd HH:mm:ss SSS");
89 jobConf.set("TsProcessor.time.format." + DATA_TYPE, "yyyy--MM--dd HH::mm::ss SSS");
90
91 String record = buildSampleSimpleRecord(date, "yyyy--MM--dd HH::mm::ss SSS");
92 doTest(date, record);
93 }
94
95 public void testCustomApacheDefaultFormat() {
96 jobConf.set("TsProcessor.default.time.format", "dd/MMM/yyyy:HH:mm:ss Z");
97 jobConf.set("TsProcessor.default.time.regex",
98 "^(?:[\\d.]+) \\[(\\d{2}/\\w{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [-+]\\d{4})\\] .*");
99
100
101 String record = buildSampleApacheRecord(dateWithoutMillis, "dd/MMM/yyyy:HH:mm:ss Z");
102 doTest(dateWithoutMillis, record);
103 }
104
105 public void testCustomApacheDataTypeFormat() {
106 jobConf.set("TsProcessor.time.format." + DATA_TYPE, "dd/MMM/yyyy:HH:mm:ss Z");
107 jobConf.set("TsProcessor.time.regex." + DATA_TYPE,
108 "^(?:[\\d.]+) \\[(\\d{2}/\\w{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [-+]\\d{4})\\] .*");
109
110
111 String record = buildSampleApacheRecord(dateWithoutMillis, "dd/MMM/yyyy:HH:mm:ss Z");
112 doTest(dateWithoutMillis, record);
113 }
114
115 private static String buildSampleSimpleRecord(Date date, String dateFormat) {
116 SimpleDateFormat sdf = new SimpleDateFormat(dateFormat);
117 return "" + sdf.format(date) + " some sample record data";
118 }
119
120 private static String buildSampleApacheRecord(Date date, String dateFormat) {
121 SimpleDateFormat sdf = new SimpleDateFormat(dateFormat);
122 return "10.10.182.49 [" + sdf.format(date) +
123 "] \"\" 200 \"-\" \"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3\" \"some.site.com:8076\"";
124 }
125
126 public void doTest(Date date, String recordData) {
127 ChunkBuilder cb = new ChunkBuilder();
128 cb.addRecord(recordData.getBytes());
129
130 Chunk chunk = cb.getChunk();
131 chunk.setDataType(DATA_TYPE);
132 chunk.setSource(DATA_SOURCE);
133
134 ChukwaTestOutputCollector<ChukwaRecordKey, ChukwaRecord> output =
135 new ChukwaTestOutputCollector<ChukwaRecordKey, ChukwaRecord>();
136
137 TsProcessor p = new TsProcessor();
138 p.reset(chunk);
139 p.process(null, chunk, output, Reporter.NULL);
140
141 ChukwaRecordKey key = buildKey(date, DATA_SOURCE, DATA_TYPE);
142 Map<ChukwaRecordKey, ChukwaRecord> outputData = output.data;
143
144 assertNotNull("No output data found.", outputData);
145 assertEquals("Output data size not correct.", 1, outputData.size());
146
147 ChukwaRecord record = outputData.get(key);
148 assertNotNull("Output record not found.", record);
149 assertEquals("Output record time not correct.", date.getTime(), record.getTime());
150 assertEquals("Output record body not correct.", recordData,
151 new String(record.getMapFields().get("body").get()));
152 }
153
154 private static ChukwaRecordKey buildKey(Date date, String dataSource, String dataType) {
155 Calendar calendar = Calendar.getInstance();
156 calendar.setTime(date);
157 calendar.set(Calendar.MINUTE, 0);
158 calendar.set(Calendar.SECOND, 0);
159 calendar.set(Calendar.MILLISECOND, 0);
160
161 ChukwaRecordKey key = new ChukwaRecordKey();
162 key.setKey("" + calendar.getTimeInMillis() + "/" + dataSource + "/" + date.getTime());
163 key.setReduceType(dataType);
164
165 return key;
166 }
167
168 }