1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.any23.extractor.html; |
19 | |
|
20 | |
import org.w3c.dom.Node; |
21 | |
|
22 | |
import java.util.ArrayList; |
23 | |
import java.util.Arrays; |
24 | |
import java.util.Collection; |
25 | |
import java.util.Collections; |
26 | |
import java.util.HashMap; |
27 | |
import java.util.List; |
28 | |
import java.util.Map; |
29 | |
|
30 | |
import static org.apache.any23.extractor.html.HTMLDocument.TextField; |
31 | |
|
32 | |
|
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | 0 | public class HCardName { |
39 | |
|
40 | |
public static final String GIVEN_NAME = "given-name"; |
41 | |
public static final String FAMILY_NAME = "family-name"; |
42 | |
public static final String ADDITIONAL_NAME = "additional-name"; |
43 | |
public static final String NICKNAME = "nickname"; |
44 | |
public static final String HONORIFIC_PREFIX = "honorific-prefix"; |
45 | |
public static final String HONORIFIC_SUFFIX = "honorific-suffix"; |
46 | |
|
47 | 0 | public static final String[] FIELDS = { |
48 | |
GIVEN_NAME, |
49 | |
FAMILY_NAME, |
50 | |
ADDITIONAL_NAME, |
51 | |
NICKNAME, |
52 | |
HONORIFIC_PREFIX, |
53 | |
HONORIFIC_SUFFIX |
54 | |
}; |
55 | |
|
56 | 0 | private static final String[] NAME_COMPONENTS = { |
57 | |
HONORIFIC_PREFIX, |
58 | |
GIVEN_NAME, |
59 | |
ADDITIONAL_NAME, |
60 | |
FAMILY_NAME, |
61 | |
HONORIFIC_SUFFIX |
62 | |
}; |
63 | |
|
64 | 0 | private Map<String, FieldValue> fields = new HashMap<String, FieldValue>(); |
65 | 0 | private TextField[] fullName = null; |
66 | 0 | private TextField organization = null; |
67 | 0 | private TextField unit = null; |
68 | |
|
69 | |
private static TextField join(TextField[] sarray, String delimiter) { |
70 | 0 | StringBuilder builder = new StringBuilder(); |
71 | 0 | final int sarrayLengthMin2 = sarray.length - 1; |
72 | 0 | for(int i = 0; i < sarray.length; i++) { |
73 | 0 | builder.append(sarray[i].value()); |
74 | 0 | if( i < sarrayLengthMin2) { |
75 | 0 | builder.append(delimiter); |
76 | |
} |
77 | |
} |
78 | 0 | return new TextField( builder.toString(), sarray[0].source() ) ; |
79 | |
} |
80 | |
|
81 | |
|
82 | |
|
83 | |
|
84 | |
public void reset() { |
85 | 0 | fields.clear(); |
86 | 0 | fullName = null; |
87 | 0 | organization = null; |
88 | 0 | unit = null; |
89 | 0 | } |
90 | |
|
91 | |
public void setField(String fieldName, TextField nd) { |
92 | 0 | final String value = fixWhiteSpace( nd.value() ); |
93 | 0 | if (value == null) return; |
94 | 0 | FieldValue fieldValue = fields.get(fieldName); |
95 | 0 | if(fieldValue == null) { |
96 | 0 | fieldValue = new FieldValue(); |
97 | 0 | fields.put(fieldName, fieldValue); |
98 | |
} |
99 | 0 | fieldValue.addValue( new TextField(value, nd.source()) ); |
100 | 0 | } |
101 | |
|
102 | |
public void setFullName(TextField nd) { |
103 | 0 | final String value = fixWhiteSpace( nd.value() ); |
104 | 0 | if (value == null) return; |
105 | 0 | String[] split = value.split("\\s+"); |
106 | |
|
107 | 0 | final String split0 = split[0]; |
108 | 0 | final int split0Length = split0.length(); |
109 | 0 | if(split.length > 1 && split0.charAt(split0Length -1) == ',') { |
110 | 0 | String swap = split[1]; |
111 | 0 | split[1] = split0.substring(0, split0Length -1); |
112 | 0 | split[0] = swap; |
113 | |
} |
114 | 0 | TextField[] splitFields = new TextField[split.length]; |
115 | 0 | for(int i = 0; i < split.length; i++) { |
116 | 0 | splitFields[i] = new TextField(split[i], nd.source()); |
117 | |
} |
118 | 0 | this.fullName = splitFields; |
119 | 0 | } |
120 | |
|
121 | |
public void setOrganization(TextField nd) { |
122 | 0 | final String value = fixWhiteSpace( nd.value() ); |
123 | 0 | if (value == null) return; |
124 | 0 | this.organization = new TextField(value, nd.source()); |
125 | 0 | } |
126 | |
|
127 | |
public boolean isMultiField(String fieldName) { |
128 | 0 | FieldValue fieldValue = fields.get(fieldName); |
129 | 0 | return fieldValue != null && fieldValue.isMultiField(); |
130 | |
} |
131 | |
|
132 | |
public boolean containsField(String fieldName) { |
133 | 0 | return GIVEN_NAME.equals(fieldName) || FAMILY_NAME.equals(fieldName) || fields.containsKey(fieldName); |
134 | |
} |
135 | |
|
136 | |
public TextField getField(String fieldName) { |
137 | 0 | if (GIVEN_NAME.equals(fieldName)) { |
138 | 0 | return getFullNamePart(GIVEN_NAME, 0); |
139 | |
} |
140 | 0 | if (FAMILY_NAME.equals(fieldName)) { |
141 | 0 | return getFullNamePart(FAMILY_NAME, Integer.MAX_VALUE); |
142 | |
} |
143 | 0 | FieldValue v = fields.get(fieldName); |
144 | 0 | return v == null ? null : v.getValue(); |
145 | |
} |
146 | |
|
147 | |
public Collection<TextField> getFields(String fieldName) { |
148 | 0 | FieldValue v = fields.get(fieldName); |
149 | 0 | return v == null ? Collections.<TextField>emptyList() : v.getValues(); |
150 | |
} |
151 | |
|
152 | |
private TextField getFullNamePart(String fieldName, int index) { |
153 | 0 | if (fields.containsKey(fieldName)) { |
154 | 0 | return fields.get(fieldName).getValue(); |
155 | |
} |
156 | 0 | if (fullName == null) return null; |
157 | |
|
158 | 0 | if (organization != null && fullName[0].value().equals(organization.value())) { |
159 | 0 | return null; |
160 | |
} |
161 | 0 | if (index != Integer.MAX_VALUE && fullName.length <= index) return null; |
162 | 0 | return fullName[ index == Integer.MAX_VALUE ? fullName.length - 1 : index]; |
163 | |
} |
164 | |
|
165 | |
public boolean hasField(String fieldName) { |
166 | 0 | return getField(fieldName) != null; |
167 | |
} |
168 | |
|
169 | |
public boolean hasAnyField() { |
170 | 0 | for (String fieldName : FIELDS) { |
171 | 0 | if (hasField(fieldName)) return true; |
172 | |
} |
173 | 0 | return false; |
174 | |
} |
175 | |
|
176 | |
public TextField getFullName() { |
177 | 0 | if (fullName != null) return join(fullName, " "); |
178 | 0 | StringBuffer s = new StringBuffer(); |
179 | 0 | boolean empty = true; |
180 | 0 | Node first = null; |
181 | |
TextField current; |
182 | 0 | for (String fieldName : NAME_COMPONENTS) { |
183 | 0 | if (!hasField(fieldName)) continue; |
184 | 0 | if (!empty) { |
185 | 0 | s.append(' '); |
186 | |
} |
187 | 0 | current = getField(fieldName); |
188 | 0 | if(first == null) { first = current.source(); } |
189 | 0 | s.append( current.value() ); |
190 | 0 | empty = false; |
191 | |
} |
192 | 0 | if (empty) return null; |
193 | 0 | return new TextField( s.toString(), first); |
194 | |
} |
195 | |
|
196 | |
public TextField getOrganization() { |
197 | 0 | return organization; |
198 | |
} |
199 | |
|
200 | |
public void setOrganizationUnit(TextField nd) { |
201 | 0 | final String value = fixWhiteSpace( nd.value() ); |
202 | 0 | if (value == null) return; |
203 | 0 | this.unit = new TextField(value, nd.source() ); |
204 | 0 | } |
205 | |
|
206 | |
public TextField getOrganizationUnit() { |
207 | 0 | return unit; |
208 | |
} |
209 | |
|
210 | |
private String fixWhiteSpace(String s) { |
211 | 0 | if (s == null) return null; |
212 | 0 | s = s.trim().replaceAll("\\s+", " "); |
213 | 0 | if ("".equals(s)) return null; |
214 | 0 | return s; |
215 | |
} |
216 | |
|
217 | |
|
218 | |
|
219 | |
|
220 | 0 | private class FieldValue { |
221 | |
|
222 | |
private TextField value; |
223 | 0 | private List<TextField> multiValue = new ArrayList<TextField>(); |
224 | |
|
225 | 0 | FieldValue() {} |
226 | |
|
227 | |
void addValue(TextField v) { |
228 | 0 | if(value == null && multiValue == null) { |
229 | 0 | value = v; |
230 | 0 | } else if(multiValue == null) { |
231 | 0 | multiValue = new ArrayList<TextField>(); |
232 | 0 | multiValue.add(value); |
233 | 0 | value = null; |
234 | 0 | multiValue.add(v); |
235 | |
} else { |
236 | 0 | multiValue.add(v); |
237 | |
} |
238 | 0 | } |
239 | |
|
240 | |
boolean isMultiField() { |
241 | 0 | return value == null; |
242 | |
} |
243 | |
|
244 | |
TextField getValue() { |
245 | 0 | return value != null ? value : multiValue.get(0); |
246 | |
} |
247 | |
|
248 | |
Collection<TextField> getValues() { |
249 | 0 | return value != null ? Arrays.asList(value) : multiValue; |
250 | |
} |
251 | |
} |
252 | |
|
253 | |
} |