1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.chukwa.util;
19
20 import java.util.ArrayList;
21 import java.util.List;
22 import java.util.regex.Pattern;
23 import java.util.regex.PatternSyntaxException;
24 import org.apache.commons.lang.ArrayUtils;
25 import org.apache.hadoop.chukwa.Chunk;
26 import org.apache.hadoop.chukwa.extraction.engine.RecordUtil;
27
28
29 public class Filter {
30
31 private static final String[] SEARCH_TARGS =
32 {"datatype", "name", "host", "cluster", "content"};
33 static final String SEPARATOR="&";
34
35 private static class SearchRule {
36 Pattern p;
37 String targ;
38
39 SearchRule(Pattern p, String t) {
40 this.p = p;
41 this.targ = t;
42 }
43
44 boolean matches(Chunk chunk) {
45 if(targ.equals("datatype")) {
46 return p.matcher(chunk.getDataType()).matches();
47 } else if(targ.equals("name")) {
48 return p.matcher(chunk.getStreamName()).matches();
49 } else if(targ.equals("host")) {
50 return p.matcher(chunk.getSource()).matches();
51 } else if(targ.equals("cluster")) {
52 String cluster = RecordUtil.getClusterName(chunk);
53 return p.matcher(cluster).matches();
54 } else if(targ.equals("content")) {
55 String content = new String(chunk.getData());
56 return p.matcher(content).matches();
57 } else if(targ.startsWith("tags.")) {
58 String tagName = targ.substring("tags.".length());
59 String tagVal = chunk.getTag(tagName);
60 if(tagVal == null)
61 return false;
62 return p.matcher(tagVal).matches();
63 } else {
64 assert false: "unknown target: " +targ;
65 return false;
66 }
67 }
68
69 public String toString() {
70 return targ + "=" +p.toString();
71 }
72 }
73
74 List<SearchRule> compiledPatterns;
75
76 public Filter(String listOfPatterns) throws PatternSyntaxException{
77 compiledPatterns = new ArrayList<SearchRule>();
78
79 String[] patterns = listOfPatterns.split(SEPARATOR);
80 for(String p: patterns) {
81 int equalsPos = p.indexOf('=');
82
83 if(equalsPos < 0 || equalsPos > (p.length() -2)) {
84 throw new PatternSyntaxException(
85 "pattern must be of form targ=pattern", p, -1);
86 }
87
88 String targ = p.substring(0, equalsPos);
89 if(!targ.startsWith("tags.") && !ArrayUtils.contains(SEARCH_TARGS, targ)) {
90 throw new PatternSyntaxException(
91 "pattern doesn't start with recognized search target", p, -1);
92 }
93
94 Pattern pat = Pattern.compile(p.substring(equalsPos+1), Pattern.DOTALL);
95 compiledPatterns.add(new SearchRule(pat, targ));
96 }
97 }
98
99 public boolean matches(Chunk chunk) {
100 for(SearchRule r: compiledPatterns) {
101 if(!r.matches(chunk))
102 return false;
103 }
104 return true;
105 }
106
107 int size() {
108 return compiledPatterns.size();
109 }
110
111 public String toString() {
112 StringBuilder sb = new StringBuilder();
113 sb.append(compiledPatterns.get(0));
114 for(int i=1; i < compiledPatterns.size(); ++i) {
115 sb.append(" & ");
116 sb.append(compiledPatterns.get(i));
117 }
118 return sb.toString();
119 }
120
121 private static final class MatchAll extends Filter {
122 public MatchAll() {
123 super("datatype=.*");
124 }
125
126 public boolean matches(Chunk c) {
127 return true;
128 }
129
130 public String toString() {
131 return "ALL";
132 }
133 }
134 public static final Filter ALL = new MatchAll();
135
136 }