1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.commons.codec.language; |
19 | |
|
20 | |
import org.apache.commons.codec.EncoderException; |
21 | |
import org.apache.commons.codec.StringEncoder; |
22 | |
|
23 | |
|
24 | |
|
25 | |
|
26 | |
|
27 | |
|
28 | |
|
29 | |
|
30 | |
|
31 | |
|
32 | |
|
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | |
public class DoubleMetaphone implements StringEncoder { |
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
private static final String VOWELS = "AEIOUY"; |
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | 1 | private static final String[] SILENT_START = |
48 | |
{ "GN", "KN", "PN", "WR", "PS" }; |
49 | 1 | private static final String[] L_R_N_M_B_H_F_V_W_SPACE = |
50 | |
{ "L", "R", "N", "M", "B", "H", "F", "V", "W", " " }; |
51 | 1 | private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER = |
52 | |
{ "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" }; |
53 | 1 | private static final String[] L_T_K_S_N_M_B_Z = |
54 | |
{ "L", "T", "K", "S", "N", "M", "B", "Z" }; |
55 | |
|
56 | |
|
57 | |
|
58 | |
|
59 | 24 | private int maxCodeLen = 4; |
60 | |
|
61 | |
|
62 | |
|
63 | |
|
64 | |
public DoubleMetaphone() { |
65 | 24 | super(); |
66 | 24 | } |
67 | |
|
68 | |
|
69 | |
|
70 | |
|
71 | |
|
72 | |
|
73 | |
|
74 | |
public String doubleMetaphone(String value) { |
75 | 82 | return doubleMetaphone(value, false); |
76 | |
} |
77 | |
|
78 | |
|
79 | |
|
80 | |
|
81 | |
|
82 | |
|
83 | |
|
84 | |
|
85 | |
public String doubleMetaphone(String value, boolean alternate) { |
86 | 6504 | value = cleanInput(value); |
87 | 6504 | if (value == null) { |
88 | 12 | return null; |
89 | |
} |
90 | |
|
91 | 6492 | boolean slavoGermanic = isSlavoGermanic(value); |
92 | 6492 | int index = isSilentStart(value) ? 1 : 0; |
93 | |
|
94 | 6492 | DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen()); |
95 | |
|
96 | 42088 | while (!result.isComplete() && index <= value.length() - 1) { |
97 | 35596 | switch (value.charAt(index)) { |
98 | |
case 'A': |
99 | |
case 'E': |
100 | |
case 'I': |
101 | |
case 'O': |
102 | |
case 'U': |
103 | |
case 'Y': |
104 | 13681 | index = handleAEIOUY(result, index); |
105 | 13681 | break; |
106 | |
case 'B': |
107 | 884 | result.append('P'); |
108 | 884 | index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1; |
109 | 884 | break; |
110 | |
case '\u00C7': |
111 | |
|
112 | 1 | result.append('S'); |
113 | 1 | index++; |
114 | 1 | break; |
115 | |
case 'C': |
116 | 1680 | index = handleC(value, result, index); |
117 | 1680 | break; |
118 | |
case 'D': |
119 | 1238 | index = handleD(value, result, index); |
120 | 1238 | break; |
121 | |
case 'F': |
122 | 646 | result.append('F'); |
123 | 646 | index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1; |
124 | 646 | break; |
125 | |
case 'G': |
126 | 801 | index = handleG(value, result, index, slavoGermanic); |
127 | 801 | break; |
128 | |
case 'H': |
129 | 521 | index = handleH(value, result, index); |
130 | 521 | break; |
131 | |
case 'J': |
132 | 87 | index = handleJ(value, result, index, slavoGermanic); |
133 | 87 | break; |
134 | |
case 'K': |
135 | 324 | result.append('K'); |
136 | 324 | index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1; |
137 | 324 | break; |
138 | |
case 'L': |
139 | 1797 | index = handleL(value, result, index); |
140 | 1797 | break; |
141 | |
case 'M': |
142 | 1241 | result.append('M'); |
143 | 1241 | index = conditionM0(value, index) ? index + 2 : index + 1; |
144 | 1241 | break; |
145 | |
case 'N': |
146 | 2777 | result.append('N'); |
147 | 2777 | index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1; |
148 | 2777 | break; |
149 | |
case '\u00D1': |
150 | |
|
151 | 1 | result.append('N'); |
152 | 1 | index++; |
153 | 1 | break; |
154 | |
case 'P': |
155 | 1144 | index = handleP(value, result, index); |
156 | 1144 | break; |
157 | |
case 'Q': |
158 | 80 | result.append('K'); |
159 | 80 | index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1; |
160 | 80 | break; |
161 | |
case 'R': |
162 | 2737 | index = handleR(value, result, index, slavoGermanic); |
163 | 2737 | break; |
164 | |
case 'S': |
165 | 2151 | index = handleS(value, result, index, slavoGermanic); |
166 | 2151 | break; |
167 | |
case 'T': |
168 | 2224 | index = handleT(value, result, index); |
169 | 2224 | break; |
170 | |
case 'V': |
171 | 406 | result.append('F'); |
172 | 406 | index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1; |
173 | 406 | break; |
174 | |
case 'W': |
175 | 519 | index = handleW(value, result, index); |
176 | 519 | break; |
177 | |
case 'X': |
178 | 152 | index = handleX(value, result, index); |
179 | 152 | break; |
180 | |
case 'Z': |
181 | 97 | index = handleZ(value, result, index, slavoGermanic); |
182 | 97 | break; |
183 | |
default: |
184 | 407 | index++; |
185 | 407 | break; |
186 | |
} |
187 | |
} |
188 | |
|
189 | 6492 | return alternate ? result.getAlternate() : result.getPrimary(); |
190 | |
} |
191 | |
|
192 | |
|
193 | |
|
194 | |
|
195 | |
|
196 | |
|
197 | |
|
198 | |
|
199 | |
|
200 | |
@Override |
201 | |
public Object encode(Object obj) throws EncoderException { |
202 | 35 | if (!(obj instanceof String)) { |
203 | 3 | throw new EncoderException("DoubleMetaphone encode parameter is not of type String"); |
204 | |
} |
205 | 32 | return doubleMetaphone((String) obj); |
206 | |
} |
207 | |
|
208 | |
|
209 | |
|
210 | |
|
211 | |
|
212 | |
|
213 | |
|
214 | |
@Override |
215 | |
public String encode(String value) { |
216 | 30 | return doubleMetaphone(value); |
217 | |
} |
218 | |
|
219 | |
|
220 | |
|
221 | |
|
222 | |
|
223 | |
|
224 | |
|
225 | |
|
226 | |
|
227 | |
|
228 | |
|
229 | |
public boolean isDoubleMetaphoneEqual(String value1, String value2) { |
230 | 22 | return isDoubleMetaphoneEqual(value1, value2, false); |
231 | |
} |
232 | |
|
233 | |
|
234 | |
|
235 | |
|
236 | |
|
237 | |
|
238 | |
|
239 | |
|
240 | |
|
241 | |
|
242 | |
|
243 | |
public boolean isDoubleMetaphoneEqual(String value1, String value2, boolean alternate) { |
244 | 1970 | return doubleMetaphone(value1, alternate).equals(doubleMetaphone(value2, alternate)); |
245 | |
} |
246 | |
|
247 | |
|
248 | |
|
249 | |
|
250 | |
|
251 | |
public int getMaxCodeLen() { |
252 | 19478 | return this.maxCodeLen; |
253 | |
} |
254 | |
|
255 | |
|
256 | |
|
257 | |
|
258 | |
|
259 | |
public void setMaxCodeLen(int maxCodeLen) { |
260 | 1 | this.maxCodeLen = maxCodeLen; |
261 | 1 | } |
262 | |
|
263 | |
|
264 | |
|
265 | |
|
266 | |
|
267 | |
|
268 | |
private int handleAEIOUY(DoubleMetaphoneResult result, int index) { |
269 | 13681 | if (index == 0) { |
270 | 1515 | result.append('A'); |
271 | |
} |
272 | 13681 | return index + 1; |
273 | |
} |
274 | |
|
275 | |
|
276 | |
|
277 | |
|
278 | |
private int handleC(String value, DoubleMetaphoneResult result, int index) { |
279 | 1680 | if (conditionC0(value, index)) { |
280 | 16 | result.append('K'); |
281 | 16 | index += 2; |
282 | 1664 | } else if (index == 0 && contains(value, index, 6, "CAESAR")) { |
283 | 6 | result.append('S'); |
284 | 6 | index += 2; |
285 | 1658 | } else if (contains(value, index, 2, "CH")) { |
286 | 156 | index = handleCH(value, result, index); |
287 | 1502 | } else if (contains(value, index, 2, "CZ") && |
288 | |
!contains(value, index - 2, 4, "WICZ")) { |
289 | |
|
290 | 7 | result.append('S', 'X'); |
291 | 7 | index += 2; |
292 | 1495 | } else if (contains(value, index + 1, 3, "CIA")) { |
293 | |
|
294 | 2 | result.append('X'); |
295 | 2 | index += 3; |
296 | 1493 | } else if (contains(value, index, 2, "CC") && |
297 | |
!(index == 1 && charAt(value, 0) == 'M')) { |
298 | |
|
299 | 109 | return handleCC(value, result, index); |
300 | 1384 | } else if (contains(value, index, 2, "CK", "CG", "CQ")) { |
301 | 111 | result.append('K'); |
302 | 111 | index += 2; |
303 | 1273 | } else if (contains(value, index, 2, "CI", "CE", "CY")) { |
304 | |
|
305 | 286 | if (contains(value, index, 3, "CIO", "CIE", "CIA")) { |
306 | 46 | result.append('S', 'X'); |
307 | |
} else { |
308 | 240 | result.append('S'); |
309 | |
} |
310 | 286 | index += 2; |
311 | |
} else { |
312 | 987 | result.append('K'); |
313 | 987 | if (contains(value, index + 1, 2, " C", " Q", " G")) { |
314 | |
|
315 | 4 | index += 3; |
316 | 983 | } else if (contains(value, index + 1, 1, "C", "K", "Q") && |
317 | |
!contains(value, index + 1, 2, "CE", "CI")) { |
318 | 9 | index += 2; |
319 | |
} else { |
320 | 974 | index++; |
321 | |
} |
322 | |
} |
323 | |
|
324 | 1571 | return index; |
325 | |
} |
326 | |
|
327 | |
|
328 | |
|
329 | |
|
330 | |
private int handleCC(String value, DoubleMetaphoneResult result, int index) { |
331 | 109 | if (contains(value, index + 2, 1, "I", "E", "H") && |
332 | |
!contains(value, index + 2, 2, "HU")) { |
333 | |
|
334 | 22 | if ((index == 1 && charAt(value, index - 1) == 'A') || |
335 | |
contains(value, index - 1, 5, "UCCEE", "UCCES")) { |
336 | |
|
337 | 14 | result.append("KS"); |
338 | |
} else { |
339 | |
|
340 | 8 | result.append('X'); |
341 | |
} |
342 | 22 | index += 3; |
343 | |
} else { |
344 | 87 | result.append('K'); |
345 | 87 | index += 2; |
346 | |
} |
347 | |
|
348 | 109 | return index; |
349 | |
} |
350 | |
|
351 | |
|
352 | |
|
353 | |
|
354 | |
private int handleCH(String value, DoubleMetaphoneResult result, int index) { |
355 | 156 | if (index > 0 && contains(value, index, 4, "CHAE")) { |
356 | 0 | result.append('K', 'X'); |
357 | 0 | return index + 2; |
358 | 156 | } else if (conditionCH0(value, index)) { |
359 | |
|
360 | 4 | result.append('K'); |
361 | 4 | return index + 2; |
362 | 152 | } else if (conditionCH1(value, index)) { |
363 | |
|
364 | 34 | result.append('K'); |
365 | 34 | return index + 2; |
366 | |
} else { |
367 | 118 | if (index > 0) { |
368 | 82 | if (contains(value, 0, 2, "MC")) { |
369 | 2 | result.append('K'); |
370 | |
} else { |
371 | 80 | result.append('X', 'K'); |
372 | |
} |
373 | |
} else { |
374 | 36 | result.append('X'); |
375 | |
} |
376 | 118 | return index + 2; |
377 | |
} |
378 | |
} |
379 | |
|
380 | |
|
381 | |
|
382 | |
|
383 | |
private int handleD(String value, DoubleMetaphoneResult result, int index) { |
384 | 1238 | if (contains(value, index, 2, "DG")) { |
385 | |
|
386 | 10 | if (contains(value, index + 2, 1, "I", "E", "Y")) { |
387 | 4 | result.append('J'); |
388 | 4 | index += 3; |
389 | |
|
390 | |
} else { |
391 | 6 | result.append("TK"); |
392 | 6 | index += 2; |
393 | |
} |
394 | 1228 | } else if (contains(value, index, 2, "DT", "DD")) { |
395 | 38 | result.append('T'); |
396 | 38 | index += 2; |
397 | |
} else { |
398 | 1190 | result.append('T'); |
399 | 1190 | index++; |
400 | |
} |
401 | 1238 | return index; |
402 | |
} |
403 | |
|
404 | |
|
405 | |
|
406 | |
|
407 | |
private int handleG(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) { |
408 | 801 | if (charAt(value, index + 1) == 'H') { |
409 | 106 | index = handleGH(value, result, index); |
410 | 695 | } else if (charAt(value, index + 1) == 'N') { |
411 | 26 | if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) { |
412 | 0 | result.append("KN", "N"); |
413 | 26 | } else if (!contains(value, index + 2, 2, "EY") && |
414 | |
charAt(value, index + 1) != 'Y' && !slavoGermanic) { |
415 | 22 | result.append("N", "KN"); |
416 | |
} else { |
417 | 4 | result.append("KN"); |
418 | |
} |
419 | 26 | index = index + 2; |
420 | 669 | } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) { |
421 | 4 | result.append("KL", "L"); |
422 | 4 | index += 2; |
423 | 665 | } else if (index == 0 && |
424 | |
(charAt(value, index + 1) == 'Y' || |
425 | |
contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) { |
426 | |
|
427 | 16 | result.append('K', 'J'); |
428 | 16 | index += 2; |
429 | 649 | } else if ((contains(value, index + 1, 2, "ER") || |
430 | |
charAt(value, index + 1) == 'Y') && |
431 | |
!contains(value, 0, 6, "DANGER", "RANGER", "MANGER") && |
432 | |
!contains(value, index - 1, 1, "E", "I") && |
433 | |
!contains(value, index - 1, 3, "RGY", "OGY")) { |
434 | |
|
435 | 22 | result.append('K', 'J'); |
436 | 22 | index += 2; |
437 | 627 | } else if (contains(value, index + 1, 1, "E", "I", "Y") || |
438 | |
contains(value, index - 1, 4, "AGGI", "OGGI")) { |
439 | |
|
440 | 182 | if (contains(value, 0 ,4, "VAN ", "VON ") || |
441 | |
contains(value, 0, 3, "SCH") || |
442 | |
contains(value, index + 1, 2, "ET")) { |
443 | |
|
444 | 2 | result.append('K'); |
445 | 180 | } else if (contains(value, index + 1, 3, "IER")) { |
446 | 4 | result.append('J'); |
447 | |
} else { |
448 | 176 | result.append('J', 'K'); |
449 | |
} |
450 | 182 | index += 2; |
451 | 445 | } else if (charAt(value, index + 1) == 'G') { |
452 | 34 | index += 2; |
453 | 34 | result.append('K'); |
454 | |
} else { |
455 | 411 | index++; |
456 | 411 | result.append('K'); |
457 | |
} |
458 | 801 | return index; |
459 | |
} |
460 | |
|
461 | |
|
462 | |
|
463 | |
|
464 | |
private int handleGH(String value, DoubleMetaphoneResult result, int index) { |
465 | 106 | if (index > 0 && !isVowel(charAt(value, index - 1))) { |
466 | 4 | result.append('K'); |
467 | 4 | index += 2; |
468 | 102 | } else if (index == 0) { |
469 | 8 | if (charAt(value, index + 2) == 'I') { |
470 | 4 | result.append('J'); |
471 | |
} else { |
472 | 4 | result.append('K'); |
473 | |
} |
474 | 8 | index += 2; |
475 | 94 | } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) || |
476 | |
(index > 2 && contains(value, index - 3, 1, "B", "H", "D")) || |
477 | |
(index > 3 && contains(value, index - 4, 1, "B", "H"))) { |
478 | |
|
479 | 28 | index += 2; |
480 | |
} else { |
481 | 66 | if (index > 2 && charAt(value, index - 1) == 'U' && |
482 | |
contains(value, index - 3, 1, "C", "G", "L", "R", "T")) { |
483 | |
|
484 | 22 | result.append('F'); |
485 | 44 | } else if (index > 0 && charAt(value, index - 1) != 'I') { |
486 | 8 | result.append('K'); |
487 | |
} |
488 | 66 | index += 2; |
489 | |
} |
490 | 106 | return index; |
491 | |
} |
492 | |
|
493 | |
|
494 | |
|
495 | |
|
496 | |
private int handleH(String value, DoubleMetaphoneResult result, int index) { |
497 | |
|
498 | 521 | if ((index == 0 || isVowel(charAt(value, index - 1))) && |
499 | |
isVowel(charAt(value, index + 1))) { |
500 | 387 | result.append('H'); |
501 | 387 | index += 2; |
502 | |
|
503 | |
} else { |
504 | 134 | index++; |
505 | |
} |
506 | 521 | return index; |
507 | |
} |
508 | |
|
509 | |
|
510 | |
|
511 | |
|
512 | |
private int handleJ(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) { |
513 | 87 | if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) { |
514 | |
|
515 | 11 | if ((index == 0 && (charAt(value, index + 4) == ' ') || |
516 | |
value.length() == 4) || contains(value, 0, 4, "SAN ")) { |
517 | 9 | result.append('H'); |
518 | |
} else { |
519 | 2 | result.append('J', 'H'); |
520 | |
} |
521 | 11 | index++; |
522 | |
} else { |
523 | 76 | if (index == 0 && !contains(value, index, 4, "JOSE")) { |
524 | 48 | result.append('J', 'A'); |
525 | 28 | } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic && |
526 | |
(charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) { |
527 | 10 | result.append('J', 'H'); |
528 | 18 | } else if (index == value.length() - 1) { |
529 | 0 | result.append('J', ' '); |
530 | 18 | } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) && |
531 | |
!contains(value, index - 1, 1, "S", "K", "L")) { |
532 | 18 | result.append('J'); |
533 | |
} |
534 | |
|
535 | 76 | if (charAt(value, index + 1) == 'J') { |
536 | 0 | index += 2; |
537 | |
} else { |
538 | 76 | index++; |
539 | |
} |
540 | |
} |
541 | 87 | return index; |
542 | |
} |
543 | |
|
544 | |
|
545 | |
|
546 | |
|
547 | |
private int handleL(String value, DoubleMetaphoneResult result, int index) { |
548 | 1797 | if (charAt(value, index + 1) == 'L') { |
549 | 353 | if (conditionL0(value, index)) { |
550 | 4 | result.appendPrimary('L'); |
551 | |
} else { |
552 | 349 | result.append('L'); |
553 | |
} |
554 | 353 | index += 2; |
555 | |
} else { |
556 | 1444 | index++; |
557 | 1444 | result.append('L'); |
558 | |
} |
559 | 1797 | return index; |
560 | |
} |
561 | |
|
562 | |
|
563 | |
|
564 | |
|
565 | |
private int handleP(String value, DoubleMetaphoneResult result, int index) { |
566 | 1144 | if (charAt(value, index + 1) == 'H') { |
567 | 82 | result.append('F'); |
568 | 82 | index += 2; |
569 | |
} else { |
570 | 1062 | result.append('P'); |
571 | 1062 | index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1; |
572 | |
} |
573 | 1144 | return index; |
574 | |
} |
575 | |
|
576 | |
|
577 | |
|
578 | |
|
579 | |
private int handleR(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) { |
580 | 2737 | if (index == value.length() - 1 && !slavoGermanic && |
581 | |
contains(value, index - 2, 2, "IE") && |
582 | |
!contains(value, index - 4, 2, "ME", "MA")) { |
583 | 12 | result.appendAlternate('R'); |
584 | |
} else { |
585 | 2725 | result.append('R'); |
586 | |
} |
587 | 2737 | return charAt(value, index + 1) == 'R' ? index + 2 : index + 1; |
588 | |
} |
589 | |
|
590 | |
|
591 | |
|
592 | |
|
593 | |
private int handleS(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) { |
594 | 2151 | if (contains(value, index - 1, 3, "ISL", "YSL")) { |
595 | |
|
596 | 12 | index++; |
597 | 2139 | } else if (index == 0 && contains(value, index, 5, "SUGAR")) { |
598 | |
|
599 | 4 | result.append('X', 'S'); |
600 | 4 | index++; |
601 | 2135 | } else if (contains(value, index, 2, "SH")) { |
602 | 78 | if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) { |
603 | |
|
604 | 6 | result.append('S'); |
605 | |
} else { |
606 | 72 | result.append('X'); |
607 | |
} |
608 | 78 | index += 2; |
609 | 2057 | } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) { |
610 | |
|
611 | 50 | if (slavoGermanic) { |
612 | 0 | result.append('S'); |
613 | |
} else { |
614 | 50 | result.append('S', 'X'); |
615 | |
} |
616 | 50 | index += 3; |
617 | 2007 | } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) || |
618 | |
contains(value, index + 1, 1, "Z")) { |
619 | |
|
620 | |
|
621 | |
|
622 | |
|
623 | 48 | result.append('S', 'X'); |
624 | 48 | index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1; |
625 | 1959 | } else if (contains(value, index, 2, "SC")) { |
626 | 114 | index = handleSC(value, result, index); |
627 | |
} else { |
628 | 1845 | if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) { |
629 | |
|
630 | 4 | result.appendAlternate('S'); |
631 | |
} else { |
632 | 1841 | result.append('S'); |
633 | |
} |
634 | 1845 | index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1; |
635 | |
} |
636 | 2151 | return index; |
637 | |
} |
638 | |
|
639 | |
|
640 | |
|
641 | |
|
642 | |
private int handleSC(String value, DoubleMetaphoneResult result, int index) { |
643 | 114 | if (charAt(value, index + 2) == 'H') { |
644 | |
|
645 | 38 | if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) { |
646 | |
|
647 | 10 | if (contains(value, index + 3, 2, "ER", "EN")) { |
648 | |
|
649 | 6 | result.append("X", "SK"); |
650 | |
} else { |
651 | 4 | result.append("SK"); |
652 | |
} |
653 | |
} else { |
654 | 28 | if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') { |
655 | 10 | result.append('X', 'S'); |
656 | |
} else { |
657 | 18 | result.append('X'); |
658 | |
} |
659 | |
} |
660 | 76 | } else if (contains(value, index + 2, 1, "I", "E", "Y")) { |
661 | 12 | result.append('S'); |
662 | |
} else { |
663 | 64 | result.append("SK"); |
664 | |
} |
665 | 114 | return index + 3; |
666 | |
} |
667 | |
|
668 | |
|
669 | |
|
670 | |
|
671 | |
private int handleT(String value, DoubleMetaphoneResult result, int index) { |
672 | 2224 | if (contains(value, index, 4, "TION")) { |
673 | 52 | result.append('X'); |
674 | 52 | index += 3; |
675 | 2172 | } else if (contains(value, index, 3, "TIA", "TCH")) { |
676 | 17 | result.append('X'); |
677 | 17 | index += 3; |
678 | 2155 | } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) { |
679 | 168 | if (contains(value, index + 2, 2, "OM", "AM") || |
680 | |
|
681 | |
contains(value, 0, 4, "VAN ", "VON ") || |
682 | |
contains(value, 0, 3, "SCH")) { |
683 | 10 | result.append('T'); |
684 | |
} else { |
685 | 158 | result.append('0', 'T'); |
686 | |
} |
687 | 168 | index += 2; |
688 | |
} else { |
689 | 1987 | result.append('T'); |
690 | 1987 | index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1; |
691 | |
} |
692 | 2224 | return index; |
693 | |
} |
694 | |
|
695 | |
|
696 | |
|
697 | |
|
698 | |
private int handleW(String value, DoubleMetaphoneResult result, int index) { |
699 | 519 | if (contains(value, index, 2, "WR")) { |
700 | |
|
701 | 12 | result.append('R'); |
702 | 12 | index += 2; |
703 | |
} else { |
704 | 507 | if (index == 0 && (isVowel(charAt(value, index + 1)) || |
705 | |
contains(value, index, 2, "WH"))) { |
706 | 216 | if (isVowel(charAt(value, index + 1))) { |
707 | |
|
708 | 186 | result.append('A', 'F'); |
709 | |
} else { |
710 | |
|
711 | 30 | result.append('A'); |
712 | |
} |
713 | 216 | index++; |
714 | 291 | } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) || |
715 | |
contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") || |
716 | |
contains(value, 0, 3, "SCH")) { |
717 | |
|
718 | 46 | result.appendAlternate('F'); |
719 | 46 | index++; |
720 | 245 | } else if (contains(value, index, 4, "WICZ", "WITZ")) { |
721 | |
|
722 | 16 | result.append("TS", "FX"); |
723 | 16 | index += 4; |
724 | |
} else { |
725 | 229 | index++; |
726 | |
} |
727 | |
} |
728 | 519 | return index; |
729 | |
} |
730 | |
|
731 | |
|
732 | |
|
733 | |
|
734 | |
private int handleX(String value, DoubleMetaphoneResult result, int index) { |
735 | 152 | if (index == 0) { |
736 | 5 | result.append('S'); |
737 | 5 | index++; |
738 | |
} else { |
739 | 147 | if (!((index == value.length() - 1) && |
740 | |
(contains(value, index - 3, 3, "IAU", "EAU") || |
741 | |
contains(value, index - 2, 2, "AU", "OU")))) { |
742 | |
|
743 | 141 | result.append("KS"); |
744 | |
} |
745 | 147 | index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1; |
746 | |
} |
747 | 152 | return index; |
748 | |
} |
749 | |
|
750 | |
|
751 | |
|
752 | |
|
753 | |
private int handleZ(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) { |
754 | 97 | if (charAt(value, index + 1) == 'H') { |
755 | |
|
756 | 2 | result.append('J'); |
757 | 2 | index += 2; |
758 | |
} else { |
759 | 95 | if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") || |
760 | |
(slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) { |
761 | 12 | result.append("S", "TS"); |
762 | |
} else { |
763 | 83 | result.append('S'); |
764 | |
} |
765 | 95 | index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1; |
766 | |
} |
767 | 97 | return index; |
768 | |
} |
769 | |
|
770 | |
|
771 | |
|
772 | |
|
773 | |
|
774 | |
|
775 | |
private boolean conditionC0(String value, int index) { |
776 | 1680 | if (contains(value, index, 4, "CHIA")) { |
777 | 2 | return true; |
778 | 1678 | } else if (index <= 1) { |
779 | 680 | return false; |
780 | 998 | } else if (isVowel(charAt(value, index - 2))) { |
781 | 357 | return false; |
782 | 641 | } else if (!contains(value, index - 1, 3, "ACH")) { |
783 | 621 | return false; |
784 | |
} else { |
785 | 20 | char c = charAt(value, index + 2); |
786 | 20 | return (c != 'I' && c != 'E') || |
787 | |
contains(value, index - 2, 6, "BACHER", "MACHER"); |
788 | |
} |
789 | |
} |
790 | |
|
791 | |
|
792 | |
|
793 | |
|
794 | |
private boolean conditionCH0(String value, int index) { |
795 | 156 | if (index != 0) { |
796 | 114 | return false; |
797 | 42 | } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") && |
798 | |
!contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) { |
799 | 38 | return false; |
800 | 4 | } else if (contains(value, 0, 5, "CHORE")) { |
801 | 0 | return false; |
802 | |
} else { |
803 | 4 | return true; |
804 | |
} |
805 | |
} |
806 | |
|
807 | |
|
808 | |
|
809 | |
|
810 | |
private boolean conditionCH1(String value, int index) { |
811 | 152 | return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) || |
812 | |
contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") || |
813 | |
contains(value, index + 2, 1, "T", "S") || |
814 | |
((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) && |
815 | |
(contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1))); |
816 | |
} |
817 | |
|
818 | |
|
819 | |
|
820 | |
|
821 | |
private boolean conditionL0(String value, int index) { |
822 | 353 | if (index == value.length() - 3 && |
823 | |
contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) { |
824 | 2 | return true; |
825 | 351 | } else if ((contains(value, value.length() - 2, 2, "AS", "OS") || |
826 | |
contains(value, value.length() - 1, 1, "A", "O")) && |
827 | |
contains(value, index - 1, 4, "ALLE")) { |
828 | 2 | return true; |
829 | |
} else { |
830 | 349 | return false; |
831 | |
} |
832 | |
} |
833 | |
|
834 | |
|
835 | |
|
836 | |
|
837 | |
private boolean conditionM0(String value, int index) { |
838 | 1241 | if (charAt(value, index + 1) == 'M') { |
839 | 100 | return true; |
840 | |
} |
841 | 1141 | return contains(value, index - 1, 3, "UMB") && |
842 | |
((index + 1) == value.length() - 1 || contains(value, index + 2, 2, "ER")); |
843 | |
} |
844 | |
|
845 | |
|
846 | |
|
847 | |
|
848 | |
|
849 | |
|
850 | |
|
851 | |
private boolean isSlavoGermanic(String value) { |
852 | 6492 | return value.indexOf('W') > -1 || value.indexOf('K') > -1 || |
853 | |
value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1; |
854 | |
} |
855 | |
|
856 | |
|
857 | |
|
858 | |
|
859 | |
private boolean isVowel(char ch) { |
860 | 2201 | return VOWELS.indexOf(ch) != -1; |
861 | |
} |
862 | |
|
863 | |
|
864 | |
|
865 | |
|
866 | |
|
867 | |
|
868 | |
private boolean isSilentStart(String value) { |
869 | 6492 | boolean result = false; |
870 | 38804 | for (String element : SILENT_START) { |
871 | 32380 | if (value.startsWith(element)) { |
872 | 68 | result = true; |
873 | 68 | break; |
874 | |
} |
875 | |
} |
876 | 6492 | return result; |
877 | |
} |
878 | |
|
879 | |
|
880 | |
|
881 | |
|
882 | |
private String cleanInput(String input) { |
883 | 6504 | if (input == null) { |
884 | 3 | return null; |
885 | |
} |
886 | 6501 | input = input.trim(); |
887 | 6501 | if (input.length() == 0) { |
888 | 9 | return null; |
889 | |
} |
890 | 6492 | return input.toUpperCase(java.util.Locale.ENGLISH); |
891 | |
} |
892 | |
|
893 | |
|
894 | |
|
895 | |
|
896 | |
|
897 | |
|
898 | |
protected char charAt(String value, int index) { |
899 | 17628 | if (index < 0 || index >= value.length()) { |
900 | 1502 | return Character.MIN_VALUE; |
901 | |
} |
902 | 16126 | return value.charAt(index); |
903 | |
} |
904 | |
|
905 | |
|
906 | |
|
907 | |
|
908 | |
private static boolean contains(String value, int start, int length, String criteria) { |
909 | 30236 | return contains(value, start, length, new String[] { criteria }); |
910 | |
} |
911 | |
|
912 | |
|
913 | |
|
914 | |
|
915 | |
private static boolean contains(String value, int start, int length, |
916 | |
String criteria1, String criteria2) { |
917 | 15344 | return contains(value, start, length, new String[] { criteria1, criteria2 }); |
918 | |
} |
919 | |
|
920 | |
|
921 | |
|
922 | |
|
923 | |
private static boolean contains(String value, int start, int length, |
924 | |
String criteria1, String criteria2, String criteria3) { |
925 | 6280 | return contains(value, start, length, new String[] { criteria1, criteria2, criteria3 }); |
926 | |
} |
927 | |
|
928 | |
|
929 | |
|
930 | |
|
931 | |
private static boolean contains(String value, int start, int length, |
932 | |
String criteria1, String criteria2, |
933 | |
String criteria3, String criteria4) { |
934 | 1037 | return contains(value, start, length, |
935 | |
new String[] { criteria1, criteria2, criteria3, criteria4 }); |
936 | |
} |
937 | |
|
938 | |
|
939 | |
|
940 | |
|
941 | |
private static boolean contains(String value, int start, int length, |
942 | |
String criteria1, String criteria2, |
943 | |
String criteria3, String criteria4, |
944 | |
String criteria5) { |
945 | 28 | return contains(value, start, length, |
946 | |
new String[] { criteria1, criteria2, criteria3, |
947 | |
criteria4, criteria5 }); |
948 | |
} |
949 | |
|
950 | |
|
951 | |
|
952 | |
|
953 | |
private static boolean contains(String value, int start, int length, |
954 | |
String criteria1, String criteria2, |
955 | |
String criteria3, String criteria4, |
956 | |
String criteria5, String criteria6) { |
957 | 38 | return contains(value, start, length, |
958 | |
new String[] { criteria1, criteria2, criteria3, |
959 | |
criteria4, criteria5, criteria6 }); |
960 | |
} |
961 | |
|
962 | |
|
963 | |
|
964 | |
|
965 | |
|
966 | |
protected static boolean contains(String value, int start, int length, |
967 | |
String[] criteria) { |
968 | 53209 | boolean result = false; |
969 | 53209 | if (start >= 0 && start + length <= value.length()) { |
970 | 45038 | String target = value.substring(start, start + length); |
971 | |
|
972 | 115116 | for (String element : criteria) { |
973 | 72502 | if (target.equals(element)) { |
974 | 2424 | result = true; |
975 | 2424 | break; |
976 | |
} |
977 | |
} |
978 | |
} |
979 | 53209 | return result; |
980 | |
} |
981 | |
|
982 | |
|
983 | |
|
984 | |
|
985 | |
|
986 | |
|
987 | |
public class DoubleMetaphoneResult { |
988 | |
|
989 | 6492 | private final StringBuilder primary = new StringBuilder(getMaxCodeLen()); |
990 | 6492 | private final StringBuilder alternate = new StringBuilder(getMaxCodeLen()); |
991 | |
private final int maxLength; |
992 | |
|
993 | 6492 | public DoubleMetaphoneResult(int maxLength) { |
994 | 6492 | this.maxLength = maxLength; |
995 | 6492 | } |
996 | |
|
997 | |
public void append(char value) { |
998 | 21356 | appendPrimary(value); |
999 | 21356 | appendAlternate(value); |
1000 | 21356 | } |
1001 | |
|
1002 | |
public void append(char primary, char alternate) { |
1003 | 863 | appendPrimary(primary); |
1004 | 863 | appendAlternate(alternate); |
1005 | 863 | } |
1006 | |
|
1007 | |
public void appendPrimary(char value) { |
1008 | 22223 | if (this.primary.length() < this.maxLength) { |
1009 | 22221 | this.primary.append(value); |
1010 | |
} |
1011 | 22223 | } |
1012 | |
|
1013 | |
public void appendAlternate(char value) { |
1014 | 22281 | if (this.alternate.length() < this.maxLength) { |
1015 | 22263 | this.alternate.append(value); |
1016 | |
} |
1017 | 22281 | } |
1018 | |
|
1019 | |
public void append(String value) { |
1020 | 233 | appendPrimary(value); |
1021 | 233 | appendAlternate(value); |
1022 | 233 | } |
1023 | |
|
1024 | |
public void append(String primary, String alternate) { |
1025 | 60 | appendPrimary(primary); |
1026 | 60 | appendAlternate(alternate); |
1027 | 60 | } |
1028 | |
|
1029 | |
public void appendPrimary(String value) { |
1030 | 293 | int addChars = this.maxLength - this.primary.length(); |
1031 | 293 | if (value.length() <= addChars) { |
1032 | 249 | this.primary.append(value); |
1033 | |
} else { |
1034 | 44 | this.primary.append(value.substring(0, addChars)); |
1035 | |
} |
1036 | 293 | } |
1037 | |
|
1038 | |
public void appendAlternate(String value) { |
1039 | 293 | int addChars = this.maxLength - this.alternate.length(); |
1040 | 293 | if (value.length() <= addChars) { |
1041 | 237 | this.alternate.append(value); |
1042 | |
} else { |
1043 | 56 | this.alternate.append(value.substring(0, addChars)); |
1044 | |
} |
1045 | 293 | } |
1046 | |
|
1047 | |
public String getPrimary() { |
1048 | 3299 | return this.primary.toString(); |
1049 | |
} |
1050 | |
|
1051 | |
public String getAlternate() { |
1052 | 3193 | return this.alternate.toString(); |
1053 | |
} |
1054 | |
|
1055 | |
public boolean isComplete() { |
1056 | 42088 | return this.primary.length() >= this.maxLength && |
1057 | |
this.alternate.length() >= this.maxLength; |
1058 | |
} |
1059 | |
} |
1060 | |
} |