1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language.bm;
19
20 import static org.junit.Assert.*;
21
22 import java.util.Arrays;
23 import java.util.HashSet;
24 import java.util.Map;
25 import java.util.TreeMap;
26
27 import org.junit.Test;
28
29
30
31
32
33
34 public class PhoneticEngineRegressionTest {
35
36 @Test
37 public void testSolrGENERIC() {
38 Map<String, String> args;
39
40
41 args = new TreeMap<String, String>();
42 args.put("nameType", "GENERIC");
43 assertEquals(encode(args, true, "Angelo"), "YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo");
44 args.put("ruleType", "EXACT");
45 assertEquals(encode(args, true, "Angelo"), "anZelo|andZelo|angelo|anhelo|anjelo|anxelo");
46 assertEquals(encode(args, true, "D'Angelo"), "(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)");
47 args.put("languageSet", "italian,greek,spanish");
48 assertEquals(encode(args, true, "Angelo"), "andZelo|angelo|anxelo");
49 assertEquals(encode(args, true, "1234"), "");
50
51
52 args = new TreeMap<String, String>();
53 assertEquals(encode(args, false, "Angelo"), "YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo");
54 args.put("ruleType", "EXACT");
55 assertEquals(encode(args, false, "Angelo"), "anZelo|andZelo|angelo|anhelo|anjelo|anxelo");
56 assertEquals(encode(args, false, "D'Angelo"), "(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)");
57 args.put("languageSet", "italian,greek,spanish");
58 assertEquals(encode(args, false, "Angelo"), "andZelo|angelo|anxelo");
59 assertEquals(encode(args, false, "1234"), "");
60
61
62 args = new TreeMap<String, String>();
63 assertEquals(encode(args, true, "Angelo"), "YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo");
64 args.put("ruleType", "APPROX");
65 assertEquals(encode(args, true, "Angelo"), "YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo");
66 assertEquals(encode(args, true, "D'Angelo"), "(YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo)-(dYngYlo|dYngilo|dagilo|dangYlo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongYlo|dongilo|doniilo|donilo|donxilo|donzilo)");
67 args.put("languageSet", "italian,greek,spanish");
68 assertEquals(encode(args, true, "Angelo"), "angilo|anxilo|anzilo|ongilo|onxilo|onzilo");
69 assertEquals(encode(args, true, "1234"), "");
70
71
72 args = new TreeMap<String, String>();
73 assertEquals(encode(args, false, "Angelo"), "YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo");
74 args.put("ruleType", "APPROX");
75 assertEquals(encode(args, false, "Angelo"), "YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo");
76 assertEquals(encode(args, false, "D'Angelo"), "(YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo)-(dYngYlo|dYngilo|dagilo|dangYlo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongYlo|dongilo|doniilo|donilo|donxilo|donzilo)");
77 args.put("languageSet", "italian,greek,spanish");
78 assertEquals(encode(args, false, "Angelo"), "angilo|anxilo|anzilo|ongilo|onxilo|onzilo");
79 assertEquals(encode(args, false, "1234"), "");
80 }
81
82 @Test
83 public void testSolrASHKENAZI() {
84 Map<String, String> args;
85
86
87 args = new TreeMap<String, String>();
88 args.put("nameType", "ASHKENAZI");
89 assertEquals(encode(args, true, "Angelo"), "YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo");
90 args.put("ruleType", "EXACT");
91 assertEquals(encode(args, true, "Angelo"), "andZelo|angelo|anhelo|anxelo");
92 assertEquals(encode(args, true, "D'Angelo"), "dandZelo|dangelo|danhelo|danxelo");
93 args.put("languageSet", "italian,greek,spanish");
94 assertEquals(encode(args, true, "Angelo"), "angelo|anxelo");
95 assertEquals(encode(args, true, "1234"), "");
96
97
98 args = new TreeMap<String, String>();
99 args.put("nameType", "ASHKENAZI");
100 assertEquals(encode(args, false, "Angelo"), "YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo");
101 args.put("ruleType", "EXACT");
102 assertEquals(encode(args, false, "Angelo"), "andZelo|angelo|anhelo|anxelo");
103 assertEquals(encode(args, false, "D'Angelo"), "dandZelo|dangelo|danhelo|danxelo");
104 args.put("languageSet", "italian,greek,spanish");
105 assertEquals(encode(args, false, "Angelo"), "angelo|anxelo");
106 assertEquals(encode(args, false, "1234"), "");
107
108
109 args = new TreeMap<String, String>();
110 args.put("nameType", "ASHKENAZI");
111 assertEquals(encode(args, true, "Angelo"), "YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo");
112 args.put("ruleType", "APPROX");
113 assertEquals(encode(args, true, "Angelo"), "YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo");
114 assertEquals(encode(args, true, "D'Angelo"), "dYngYlo|dYngilo|dangYlo|dangilo|danilo|danxilo|danzilo|dongYlo|dongilo|donilo|donxilo|donzilo");
115 args.put("languageSet", "italian,greek,spanish");
116 assertEquals(encode(args, true, "Angelo"), "angilo|anxilo|ongilo|onxilo");
117 assertEquals(encode(args, true, "1234"), "");
118
119
120 args = new TreeMap<String, String>();
121 args.put("nameType", "ASHKENAZI");
122 assertEquals(encode(args, false, "Angelo"), "YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo");
123 args.put("ruleType", "APPROX");
124 assertEquals(encode(args, false, "Angelo"), "YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo");
125 assertEquals(encode(args, false, "D'Angelo"), "dYngYlo|dYngilo|dangYlo|dangilo|danilo|danxilo|danzilo|dongYlo|dongilo|donilo|donxilo|donzilo");
126 args.put("languageSet", "italian,greek,spanish");
127 assertEquals(encode(args, false, "Angelo"), "angilo|anxilo|ongilo|onxilo");
128 assertEquals(encode(args, false, "1234"), "");
129 }
130
131 @Test
132 public void testSolrSEPHARDIC() {
133 Map<String, String> args;
134
135
136 args = new TreeMap<String, String>();
137 args.put("nameType", "SEPHARDIC");
138 assertEquals(encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
139 args.put("ruleType", "EXACT");
140 assertEquals(encode(args, true, "Angelo"), "anZelo|andZelo|anxelo");
141 assertEquals(encode(args, true, "D'Angelo"), "anZelo|andZelo|anxelo");
142 args.put("languageSet", "italian,greek,spanish");
143 assertEquals(encode(args, true, "Angelo"), "andZelo|anxelo");
144 assertEquals(encode(args, true, "1234"), "");
145
146
147 args = new TreeMap<String, String>();
148 args.put("nameType", "SEPHARDIC");
149 assertEquals(encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
150 args.put("ruleType", "EXACT");
151 assertEquals(encode(args, false, "Angelo"), "anZelo|andZelo|anxelo");
152 assertEquals(encode(args, false, "D'Angelo"), "danZelo|dandZelo|danxelo");
153 args.put("languageSet", "italian,greek,spanish");
154 assertEquals(encode(args, false, "Angelo"), "andZelo|anxelo");
155 assertEquals(encode(args, false, "1234"), "");
156
157
158 args = new TreeMap<String, String>();
159 args.put("nameType", "SEPHARDIC");
160 assertEquals(encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
161 args.put("ruleType", "APPROX");
162 assertEquals(encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
163 assertEquals(encode(args, true, "D'Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
164 args.put("languageSet", "italian,greek,spanish");
165 assertEquals(encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
166 assertEquals(encode(args, true, "1234"), "");
167
168
169 args = new TreeMap<String, String>();
170 args.put("nameType", "SEPHARDIC");
171 assertEquals(encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
172 args.put("ruleType", "APPROX");
173 assertEquals(encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
174 assertEquals(encode(args, false, "D'Angelo"), "danhila|danhilu|danzila|danzilu|nhila|nhilu|nzila|nzilu");
175 args.put("languageSet", "italian,greek,spanish");
176 assertEquals(encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
177 assertEquals(encode(args, false, "1234"), "");
178 }
179
180 @Test
181 public void testCompatibilityWithOriginalVersion() {
182
183
184
185 Map<String, String> args = new TreeMap<String, String>();
186 args.put("nameType", "GENERIC");
187 args.put("ruleType", "APPROX");
188
189 assertEquals(encode(args, true, "abram"), "Ybram|Ybrom|abram|abran|abrom|abron|avram|avrom|obram|obran|obrom|obron|ovram|ovrom");
190 assertEquals(encode(args, true, "Bendzin"), "bndzn|bntsn|bnzn|vndzn|vntsn");
191
192 args.put("nameType", "ASHKENAZI");
193 args.put("ruleType", "APPROX");
194
195 assertEquals(encode(args, true, "abram"), "Ybram|Ybrom|abram|abrom|avram|avrom|imbram|imbrom|obram|obrom|ombram|ombrom|ovram|ovrom");
196 assertEquals(encode(args, true, "Halpern"), "YlpYrn|Ylpirn|alpYrn|alpirn|olpYrn|olpirn|xalpirn|xolpirn");
197
198 }
199
200
201
202
203
204
205
206
207 private static String encode(final Map<String, String> args, final boolean concat, final String input) {
208 Languages.LanguageSet languageSet;
209 PhoneticEngine engine;
210
211
212
213 final String nameTypeArg = args.get("nameType");
214 final NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : NameType.valueOf(nameTypeArg);
215
216 final String ruleTypeArg = args.get("ruleType");
217 final RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : RuleType.valueOf(ruleTypeArg);
218
219 engine = new PhoneticEngine(nameType, ruleType, concat);
220
221
222 final String languageSetArg = args.get("languageSet");
223 if (languageSetArg == null || languageSetArg.equals("auto")) {
224 languageSet = null;
225 } else {
226 languageSet = Languages.LanguageSet.from(new HashSet<String>(Arrays.asList(languageSetArg.split(","))));
227 }
228
229
230
231
232
233
234
235
236
237
238 if (languageSet == null) {
239 return engine.encode(input);
240 } else {
241 return engine.encode(input, languageSet);
242 }
243 }
244 }