View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.chukwa.extraction.demux.processor.mapper;
19  
20  
21  import junit.framework.TestCase;
22  import org.apache.hadoop.chukwa.Chunk;
23  import org.apache.hadoop.chukwa.ChunkBuilder;
24  import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord;
25  import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey;
26  import org.apache.hadoop.chukwa.extraction.demux.Demux;
27  import org.apache.hadoop.mapred.Reporter;
28  import org.apache.hadoop.mapred.JobConf;
29  
30  import java.util.Map;
31  import java.util.Date;
32  import java.util.Calendar;
33  import java.text.SimpleDateFormat;
34  
35  public class TestTsProcessor extends TestCase {
36  
37    private static String DATA_TYPE = "testDataType";
38    private static String DATA_SOURCE = "testDataSource";
39  
40    JobConf jobConf = null;
41  
42    Date date = null;
43    Date dateWithoutMillis = null;
44  
45    protected void setUp() throws Exception {
46      jobConf = new JobConf();
47      Demux.jobConf = jobConf;
48      date = new Date();
49  
50      //if our format doesn't contain millis, then our final record date won't
51      //have them either. let's create a sample date without millis for those tests
52      //so our assertions will pass
53      Calendar calendar = Calendar.getInstance();
54      calendar.setTime(date);
55      calendar.set(Calendar.MILLISECOND, 0);
56      dateWithoutMillis = calendar.getTime();
57    }
58  
59    public void testDefaultFormat() {
60      String record = buildSampleSimpleRecord(date, "yyyy-MM-dd HH:mm:ss,SSS");
61      doTest(date, record);
62    }
63  
64    public void testCustomDefaultFormat() {
65      jobConf.set("TsProcessor.default.time.format", "yyyy--MM--dd HH::mm::ss SSS");
66  
67      String record = buildSampleSimpleRecord(date, "yyyy--MM--dd HH::mm::ss SSS");
68      doTest(date, record);
69    }
70  
71    public void testCustomDefaultFormat2() {
72      // this date format produces a date that longer than the format, since z
73      // expands to something like PDT
74      jobConf.set("TsProcessor.default.time.format", "yyyy--MM--dd HH::mm::ss SSS,z");
75  
76      String record = buildSampleSimpleRecord(date, "yyyy--MM--dd HH::mm::ss SSS,z");
77      doTest(date, record);
78    }
79  
80    public void testCustomDataTypeFormat() {
81      jobConf.set("TsProcessor.time.format." + DATA_TYPE, "yyyy--MM--dd HH::mm::ss SSS");
82  
83      String record = buildSampleSimpleRecord(date, "yyyy--MM--dd HH::mm::ss SSS");
84      doTest(date, record);
85    }
86  
87    public void testCustomDefaultFormatWithCustomDataTypeFormat() {
88      jobConf.set("TsProcessor.default.time.format", "yyyy/MM/dd HH:mm:ss SSS");
89      jobConf.set("TsProcessor.time.format." + DATA_TYPE, "yyyy--MM--dd HH::mm::ss SSS");
90  
91      String record = buildSampleSimpleRecord(date, "yyyy--MM--dd HH::mm::ss SSS");
92      doTest(date, record);
93    }
94  
95    public void testCustomApacheDefaultFormat() {
96      jobConf.set("TsProcessor.default.time.format", "dd/MMM/yyyy:HH:mm:ss Z");
97      jobConf.set("TsProcessor.default.time.regex",
98              "^(?:[\\d.]+) \\[(\\d{2}/\\w{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [-+]\\d{4})\\] .*");
99  
100 
101     String record = buildSampleApacheRecord(dateWithoutMillis, "dd/MMM/yyyy:HH:mm:ss Z");
102     doTest(dateWithoutMillis, record);
103   }
104 
105   public void testCustomApacheDataTypeFormat() {
106     jobConf.set("TsProcessor.time.format." + DATA_TYPE, "dd/MMM/yyyy:HH:mm:ss Z");
107     jobConf.set("TsProcessor.time.regex." + DATA_TYPE,
108             "^(?:[\\d.]+) \\[(\\d{2}/\\w{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [-+]\\d{4})\\] .*");
109 
110 
111     String record = buildSampleApacheRecord(dateWithoutMillis, "dd/MMM/yyyy:HH:mm:ss Z");
112     doTest(dateWithoutMillis, record);
113   }
114 
115   private static String buildSampleSimpleRecord(Date date, String dateFormat) {
116     SimpleDateFormat sdf = new SimpleDateFormat(dateFormat);
117     return "" + sdf.format(date) + " some sample record data";
118   }
119 
120   private static String buildSampleApacheRecord(Date date, String dateFormat) {
121     SimpleDateFormat sdf = new SimpleDateFormat(dateFormat);
122     return "10.10.182.49 [" + sdf.format(date) +
123             "] \"\" 200 \"-\" \"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3\" \"some.site.com:8076\"";
124   }
125 
126   public void doTest(Date date, String recordData) {
127     ChunkBuilder cb = new ChunkBuilder();
128     cb.addRecord(recordData.getBytes());
129 
130     Chunk chunk = cb.getChunk();
131     chunk.setDataType(DATA_TYPE);
132     chunk.setSource(DATA_SOURCE);
133 
134     ChukwaTestOutputCollector<ChukwaRecordKey, ChukwaRecord> output =
135             new ChukwaTestOutputCollector<ChukwaRecordKey, ChukwaRecord>();
136 
137     TsProcessor p = new TsProcessor();
138     p.reset(chunk);
139     p.process(null, chunk, output, Reporter.NULL);
140 
141     ChukwaRecordKey key = buildKey(date, DATA_SOURCE, DATA_TYPE);
142     Map<ChukwaRecordKey, ChukwaRecord> outputData = output.data;
143 
144     assertNotNull("No output data found.", outputData);
145     assertEquals("Output data size not correct.", 1, outputData.size());
146 
147     ChukwaRecord record = outputData.get(key);
148     assertNotNull("Output record not found.", record);
149     assertEquals("Output record time not correct.", date.getTime(), record.getTime());
150     assertEquals("Output record body not correct.", recordData,
151             new String(record.getMapFields().get("body").get()));
152   }
153 
154   private static ChukwaRecordKey buildKey(Date date, String dataSource, String dataType) {
155     Calendar calendar = Calendar.getInstance();
156     calendar.setTime(date);
157     calendar.set(Calendar.MINUTE, 0);
158     calendar.set(Calendar.SECOND, 0);
159     calendar.set(Calendar.MILLISECOND, 0);
160 
161     ChukwaRecordKey key = new ChukwaRecordKey();
162     key.setKey("" + calendar.getTimeInMillis() + "/" + dataSource + "/" + date.getTime());
163     key.setReduceType(dataType);
164 
165     return key;
166   }
167 
168 }