Coverage Report - org.apache.commons.codec.language.DoubleMetaphone
 
Classes in this File Line Coverage Branch Coverage Complexity
DoubleMetaphone
98%
371/378
90%
399/443
5.536
DoubleMetaphone$DoubleMetaphoneResult
100%
36/36
100%
12/12
5.536
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *      http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.commons.codec.language;
 19  
 
 20  
 import org.apache.commons.codec.EncoderException;
 21  
 import org.apache.commons.codec.StringEncoder;
 22  
 
 23  
 /**
 24  
  * Encodes a string into a double metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence
 25  
  * Philips</CITE>.
 26  
  * <p>
 27  
  * This class is conditionally thread-safe. The instance field {@link #maxCodeLen} is mutable
 28  
  * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is
 29  
  * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
 30  
  * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup.
 31  
  *
 32  
  * @see <a href="http://drdobbs.com/184401251?pgno=2">Original Article</a>
 33  
  * @see <a href="http://en.wikipedia.org/wiki/Metaphone">http://en.wikipedia.org/wiki/Metaphone</a>
 34  
  *
 35  
  * @version $Id$
 36  
  */
 37  
 public class DoubleMetaphone implements StringEncoder {
 38  
 
 39  
     /**
 40  
      * "Vowels" to test for
 41  
      */
 42  
     private static final String VOWELS = "AEIOUY";
 43  
 
 44  
     /**
 45  
      * Prefixes when present which are not pronounced
 46  
      */
 47  1
     private static final String[] SILENT_START =
 48  
         { "GN", "KN", "PN", "WR", "PS" };
 49  1
     private static final String[] L_R_N_M_B_H_F_V_W_SPACE =
 50  
         { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
 51  1
     private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER =
 52  
         { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
 53  1
     private static final String[] L_T_K_S_N_M_B_Z =
 54  
         { "L", "T", "K", "S", "N", "M", "B", "Z" };
 55  
 
 56  
     /**
 57  
      * Maximum length of an encoding, default is 4
 58  
      */
 59  24
     private int maxCodeLen = 4;
 60  
 
 61  
     /**
 62  
      * Creates an instance of this DoubleMetaphone encoder
 63  
      */
 64  
     public DoubleMetaphone() {
 65  24
         super();
 66  24
     }
 67  
 
 68  
     /**
 69  
      * Encode a value with Double Metaphone.
 70  
      *
 71  
      * @param value String to encode
 72  
      * @return an encoded string
 73  
      */
 74  
     public String doubleMetaphone(String value) {
 75  82
         return doubleMetaphone(value, false);
 76  
     }
 77  
 
 78  
     /**
 79  
      * Encode a value with Double Metaphone, optionally using the alternate encoding.
 80  
      *
 81  
      * @param value String to encode
 82  
      * @param alternate use alternate encode
 83  
      * @return an encoded string
 84  
      */
 85  
     public String doubleMetaphone(String value, boolean alternate) {
 86  6504
         value = cleanInput(value);
 87  6504
         if (value == null) {
 88  12
             return null;
 89  
         }
 90  
 
 91  6492
         boolean slavoGermanic = isSlavoGermanic(value);
 92  6492
         int index = isSilentStart(value) ? 1 : 0;
 93  
 
 94  6492
         DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
 95  
 
 96  42088
         while (!result.isComplete() && index <= value.length() - 1) {
 97  35596
             switch (value.charAt(index)) {
 98  
             case 'A':
 99  
             case 'E':
 100  
             case 'I':
 101  
             case 'O':
 102  
             case 'U':
 103  
             case 'Y':
 104  13681
                 index = handleAEIOUY(result, index);
 105  13681
                 break;
 106  
             case 'B':
 107  884
                 result.append('P');
 108  884
                 index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
 109  884
                 break;
 110  
             case '\u00C7':
 111  
                 // A C with a Cedilla
 112  1
                 result.append('S');
 113  1
                 index++;
 114  1
                 break;
 115  
             case 'C':
 116  1680
                 index = handleC(value, result, index);
 117  1680
                 break;
 118  
             case 'D':
 119  1238
                 index = handleD(value, result, index);
 120  1238
                 break;
 121  
             case 'F':
 122  646
                 result.append('F');
 123  646
                 index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
 124  646
                 break;
 125  
             case 'G':
 126  801
                 index = handleG(value, result, index, slavoGermanic);
 127  801
                 break;
 128  
             case 'H':
 129  521
                 index = handleH(value, result, index);
 130  521
                 break;
 131  
             case 'J':
 132  87
                 index = handleJ(value, result, index, slavoGermanic);
 133  87
                 break;
 134  
             case 'K':
 135  324
                 result.append('K');
 136  324
                 index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
 137  324
                 break;
 138  
             case 'L':
 139  1797
                 index = handleL(value, result, index);
 140  1797
                 break;
 141  
             case 'M':
 142  1241
                 result.append('M');
 143  1241
                 index = conditionM0(value, index) ? index + 2 : index + 1;
 144  1241
                 break;
 145  
             case 'N':
 146  2777
                 result.append('N');
 147  2777
                 index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
 148  2777
                 break;
 149  
             case '\u00D1':
 150  
                 // N with a tilde (spanish ene)
 151  1
                 result.append('N');
 152  1
                 index++;
 153  1
                 break;
 154  
             case 'P':
 155  1144
                 index = handleP(value, result, index);
 156  1144
                 break;
 157  
             case 'Q':
 158  80
                 result.append('K');
 159  80
                 index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
 160  80
                 break;
 161  
             case 'R':
 162  2737
                 index = handleR(value, result, index, slavoGermanic);
 163  2737
                 break;
 164  
             case 'S':
 165  2151
                 index = handleS(value, result, index, slavoGermanic);
 166  2151
                 break;
 167  
             case 'T':
 168  2224
                 index = handleT(value, result, index);
 169  2224
                 break;
 170  
             case 'V':
 171  406
                 result.append('F');
 172  406
                 index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
 173  406
                 break;
 174  
             case 'W':
 175  519
                 index = handleW(value, result, index);
 176  519
                 break;
 177  
             case 'X':
 178  152
                 index = handleX(value, result, index);
 179  152
                 break;
 180  
             case 'Z':
 181  97
                 index = handleZ(value, result, index, slavoGermanic);
 182  97
                 break;
 183  
             default:
 184  407
                 index++;
 185  407
                 break;
 186  
             }
 187  
         }
 188  
 
 189  6492
         return alternate ? result.getAlternate() : result.getPrimary();
 190  
     }
 191  
 
 192  
     /**
 193  
      * Encode the value using DoubleMetaphone.  It will only work if
 194  
      * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>).
 195  
      *
 196  
      * @param obj Object to encode (should be of type String)
 197  
      * @return An encoded Object (will be of type String)
 198  
      * @throws EncoderException encode parameter is not of type String
 199  
      */
 200  
     @Override
 201  
     public Object encode(Object obj) throws EncoderException {
 202  35
         if (!(obj instanceof String)) {
 203  3
             throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
 204  
         }
 205  32
         return doubleMetaphone((String) obj);
 206  
     }
 207  
 
 208  
     /**
 209  
      * Encode the value using DoubleMetaphone.
 210  
      *
 211  
      * @param value String to encode
 212  
      * @return An encoded String
 213  
      */
 214  
     @Override
 215  
     public String encode(String value) {
 216  30
         return doubleMetaphone(value);
 217  
     }
 218  
 
 219  
     /**
 220  
      * Check if the Double Metaphone values of two <code>String</code> values
 221  
      * are equal.
 222  
      *
 223  
      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
 224  
      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
 225  
      * @return {@code true} if the encoded <code>String</code>s are equal;
 226  
      *          {@code false} otherwise.
 227  
      * @see #isDoubleMetaphoneEqual(String,String,boolean)
 228  
      */
 229  
     public boolean isDoubleMetaphoneEqual(String value1, String value2) {
 230  22
         return isDoubleMetaphoneEqual(value1, value2, false);
 231  
     }
 232  
 
 233  
     /**
 234  
      * Check if the Double Metaphone values of two <code>String</code> values
 235  
      * are equal, optionally using the alternate value.
 236  
      *
 237  
      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
 238  
      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
 239  
      * @param alternate use the alternate value if {@code true}.
 240  
      * @return {@code true} if the encoded <code>String</code>s are equal;
 241  
      *          {@code false} otherwise.
 242  
      */
 243  
     public boolean isDoubleMetaphoneEqual(String value1, String value2, boolean alternate) {
 244  1970
         return doubleMetaphone(value1, alternate).equals(doubleMetaphone(value2, alternate));
 245  
     }
 246  
 
 247  
     /**
 248  
      * Returns the maxCodeLen.
 249  
      * @return int
 250  
      */
 251  
     public int getMaxCodeLen() {
 252  19478
         return this.maxCodeLen;
 253  
     }
 254  
 
 255  
     /**
 256  
      * Sets the maxCodeLen.
 257  
      * @param maxCodeLen The maxCodeLen to set
 258  
      */
 259  
     public void setMaxCodeLen(int maxCodeLen) {
 260  1
         this.maxCodeLen = maxCodeLen;
 261  1
     }
 262  
 
 263  
     //-- BEGIN HANDLERS --//
 264  
 
 265  
     /**
 266  
      * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases.
 267  
      */
 268  
     private int handleAEIOUY(DoubleMetaphoneResult result, int index) {
 269  13681
         if (index == 0) {
 270  1515
             result.append('A');
 271  
         }
 272  13681
         return index + 1;
 273  
     }
 274  
 
 275  
     /**
 276  
      * Handles 'C' cases.
 277  
      */
 278  
     private int handleC(String value, DoubleMetaphoneResult result, int index) {
 279  1680
         if (conditionC0(value, index)) {  // very confusing, moved out
 280  16
             result.append('K');
 281  16
             index += 2;
 282  1664
         } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
 283  6
             result.append('S');
 284  6
             index += 2;
 285  1658
         } else if (contains(value, index, 2, "CH")) {
 286  156
             index = handleCH(value, result, index);
 287  1502
         } else if (contains(value, index, 2, "CZ") &&
 288  
                    !contains(value, index - 2, 4, "WICZ")) {
 289  
             //-- "Czerny" --//
 290  7
             result.append('S', 'X');
 291  7
             index += 2;
 292  1495
         } else if (contains(value, index + 1, 3, "CIA")) {
 293  
             //-- "focaccia" --//
 294  2
             result.append('X');
 295  2
             index += 3;
 296  1493
         } else if (contains(value, index, 2, "CC") &&
 297  
                    !(index == 1 && charAt(value, 0) == 'M')) {
 298  
             //-- double "cc" but not "McClelland" --//
 299  109
             return handleCC(value, result, index);
 300  1384
         } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
 301  111
             result.append('K');
 302  111
             index += 2;
 303  1273
         } else if (contains(value, index, 2, "CI", "CE", "CY")) {
 304  
             //-- Italian vs. English --//
 305  286
             if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
 306  46
                 result.append('S', 'X');
 307  
             } else {
 308  240
                 result.append('S');
 309  
             }
 310  286
             index += 2;
 311  
         } else {
 312  987
             result.append('K');
 313  987
             if (contains(value, index + 1, 2, " C", " Q", " G")) {
 314  
                 //-- Mac Caffrey, Mac Gregor --//
 315  4
                 index += 3;
 316  983
             } else if (contains(value, index + 1, 1, "C", "K", "Q") &&
 317  
                        !contains(value, index + 1, 2, "CE", "CI")) {
 318  9
                 index += 2;
 319  
             } else {
 320  974
                 index++;
 321  
             }
 322  
         }
 323  
 
 324  1571
         return index;
 325  
     }
 326  
 
 327  
     /**
 328  
      * Handles 'CC' cases.
 329  
      */
 330  
     private int handleCC(String value, DoubleMetaphoneResult result, int index) {
 331  109
         if (contains(value, index + 2, 1, "I", "E", "H") &&
 332  
             !contains(value, index + 2, 2, "HU")) {
 333  
             //-- "bellocchio" but not "bacchus" --//
 334  22
             if ((index == 1 && charAt(value, index - 1) == 'A') ||
 335  
                 contains(value, index - 1, 5, "UCCEE", "UCCES")) {
 336  
                 //-- "accident", "accede", "succeed" --//
 337  14
                 result.append("KS");
 338  
             } else {
 339  
                 //-- "bacci", "bertucci", other Italian --//
 340  8
                 result.append('X');
 341  
             }
 342  22
             index += 3;
 343  
         } else {    // Pierce's rule
 344  87
             result.append('K');
 345  87
             index += 2;
 346  
         }
 347  
 
 348  109
         return index;
 349  
     }
 350  
 
 351  
     /**
 352  
      * Handles 'CH' cases.
 353  
      */
 354  
     private int handleCH(String value, DoubleMetaphoneResult result, int index) {
 355  156
         if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
 356  0
             result.append('K', 'X');
 357  0
             return index + 2;
 358  156
         } else if (conditionCH0(value, index)) {
 359  
             //-- Greek roots ("chemistry", "chorus", etc.) --//
 360  4
             result.append('K');
 361  4
             return index + 2;
 362  152
         } else if (conditionCH1(value, index)) {
 363  
             //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
 364  34
             result.append('K');
 365  34
             return index + 2;
 366  
         } else {
 367  118
             if (index > 0) {
 368  82
                 if (contains(value, 0, 2, "MC")) {
 369  2
                     result.append('K');
 370  
                 } else {
 371  80
                     result.append('X', 'K');
 372  
                 }
 373  
             } else {
 374  36
                 result.append('X');
 375  
             }
 376  118
             return index + 2;
 377  
         }
 378  
     }
 379  
 
 380  
     /**
 381  
      * Handles 'D' cases.
 382  
      */
 383  
     private int handleD(String value, DoubleMetaphoneResult result, int index) {
 384  1238
         if (contains(value, index, 2, "DG")) {
 385  
             //-- "Edge" --//
 386  10
             if (contains(value, index + 2, 1, "I", "E", "Y")) {
 387  4
                 result.append('J');
 388  4
                 index += 3;
 389  
                 //-- "Edgar" --//
 390  
             } else {
 391  6
                 result.append("TK");
 392  6
                 index += 2;
 393  
             }
 394  1228
         } else if (contains(value, index, 2, "DT", "DD")) {
 395  38
             result.append('T');
 396  38
             index += 2;
 397  
         } else {
 398  1190
             result.append('T');
 399  1190
             index++;
 400  
         }
 401  1238
         return index;
 402  
     }
 403  
 
 404  
     /**
 405  
      * Handles 'G' cases.
 406  
      */
 407  
     private int handleG(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) {
 408  801
         if (charAt(value, index + 1) == 'H') {
 409  106
             index = handleGH(value, result, index);
 410  695
         } else if (charAt(value, index + 1) == 'N') {
 411  26
             if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
 412  0
                 result.append("KN", "N");
 413  26
             } else if (!contains(value, index + 2, 2, "EY") &&
 414  
                        charAt(value, index + 1) != 'Y' && !slavoGermanic) {
 415  22
                 result.append("N", "KN");
 416  
             } else {
 417  4
                 result.append("KN");
 418  
             }
 419  26
             index = index + 2;
 420  669
         } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
 421  4
             result.append("KL", "L");
 422  4
             index += 2;
 423  665
         } else if (index == 0 &&
 424  
                    (charAt(value, index + 1) == 'Y' ||
 425  
                     contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
 426  
             //-- -ges-, -gep-, -gel-, -gie- at beginning --//
 427  16
             result.append('K', 'J');
 428  16
             index += 2;
 429  649
         } else if ((contains(value, index + 1, 2, "ER") ||
 430  
                     charAt(value, index + 1) == 'Y') &&
 431  
                    !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
 432  
                    !contains(value, index - 1, 1, "E", "I") &&
 433  
                    !contains(value, index - 1, 3, "RGY", "OGY")) {
 434  
             //-- -ger-, -gy- --//
 435  22
             result.append('K', 'J');
 436  22
             index += 2;
 437  627
         } else if (contains(value, index + 1, 1, "E", "I", "Y") ||
 438  
                    contains(value, index - 1, 4, "AGGI", "OGGI")) {
 439  
             //-- Italian "biaggi" --//
 440  182
             if (contains(value, 0 ,4, "VAN ", "VON ") ||
 441  
                 contains(value, 0, 3, "SCH") ||
 442  
                 contains(value, index + 1, 2, "ET")) {
 443  
                 //-- obvious germanic --//
 444  2
                 result.append('K');
 445  180
             } else if (contains(value, index + 1, 3, "IER")) {
 446  4
                 result.append('J');
 447  
             } else {
 448  176
                 result.append('J', 'K');
 449  
             }
 450  182
             index += 2;
 451  445
         } else if (charAt(value, index + 1) == 'G') {
 452  34
             index += 2;
 453  34
             result.append('K');
 454  
         } else {
 455  411
             index++;
 456  411
             result.append('K');
 457  
         }
 458  801
         return index;
 459  
     }
 460  
 
 461  
     /**
 462  
      * Handles 'GH' cases.
 463  
      */
 464  
     private int handleGH(String value, DoubleMetaphoneResult result, int index) {
 465  106
         if (index > 0 && !isVowel(charAt(value, index - 1))) {
 466  4
             result.append('K');
 467  4
             index += 2;
 468  102
         } else if (index == 0) {
 469  8
             if (charAt(value, index + 2) == 'I') {
 470  4
                 result.append('J');
 471  
             } else {
 472  4
                 result.append('K');
 473  
             }
 474  8
             index += 2;
 475  94
         } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
 476  
                    (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
 477  
                    (index > 3 && contains(value, index - 4, 1, "B", "H"))) {
 478  
             //-- Parker's rule (with some further refinements) - "hugh"
 479  28
             index += 2;
 480  
         } else {
 481  66
             if (index > 2 && charAt(value, index - 1) == 'U' &&
 482  
                 contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
 483  
                 //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
 484  22
                 result.append('F');
 485  44
             } else if (index > 0 && charAt(value, index - 1) != 'I') {
 486  8
                 result.append('K');
 487  
             }
 488  66
             index += 2;
 489  
         }
 490  106
         return index;
 491  
     }
 492  
 
 493  
     /**
 494  
      * Handles 'H' cases.
 495  
      */
 496  
     private int handleH(String value, DoubleMetaphoneResult result, int index) {
 497  
         //-- only keep if first & before vowel or between 2 vowels --//
 498  521
         if ((index == 0 || isVowel(charAt(value, index - 1))) &&
 499  
             isVowel(charAt(value, index + 1))) {
 500  387
             result.append('H');
 501  387
             index += 2;
 502  
             //-- also takes car of "HH" --//
 503  
         } else {
 504  134
             index++;
 505  
         }
 506  521
         return index;
 507  
     }
 508  
 
 509  
     /**
 510  
      * Handles 'J' cases.
 511  
      */
 512  
     private int handleJ(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) {
 513  87
         if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
 514  
                 //-- obvious Spanish, "Jose", "San Jacinto" --//
 515  11
                 if ((index == 0 && (charAt(value, index + 4) == ' ') ||
 516  
                      value.length() == 4) || contains(value, 0, 4, "SAN ")) {
 517  9
                     result.append('H');
 518  
                 } else {
 519  2
                     result.append('J', 'H');
 520  
                 }
 521  11
                 index++;
 522  
             } else {
 523  76
                 if (index == 0 && !contains(value, index, 4, "JOSE")) {
 524  48
                     result.append('J', 'A');
 525  28
                 } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
 526  
                            (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
 527  10
                     result.append('J', 'H');
 528  18
                 } else if (index == value.length() - 1) {
 529  0
                     result.append('J', ' ');
 530  18
                 } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) &&
 531  
                            !contains(value, index - 1, 1, "S", "K", "L")) {
 532  18
                     result.append('J');
 533  
                 }
 534  
 
 535  76
                 if (charAt(value, index + 1) == 'J') {
 536  0
                     index += 2;
 537  
                 } else {
 538  76
                     index++;
 539  
                 }
 540  
             }
 541  87
         return index;
 542  
     }
 543  
 
 544  
     /**
 545  
      * Handles 'L' cases.
 546  
      */
 547  
     private int handleL(String value, DoubleMetaphoneResult result, int index) {
 548  1797
         if (charAt(value, index + 1) == 'L') {
 549  353
             if (conditionL0(value, index)) {
 550  4
                 result.appendPrimary('L');
 551  
             } else {
 552  349
                 result.append('L');
 553  
             }
 554  353
             index += 2;
 555  
         } else {
 556  1444
             index++;
 557  1444
             result.append('L');
 558  
         }
 559  1797
         return index;
 560  
     }
 561  
 
 562  
     /**
 563  
      * Handles 'P' cases.
 564  
      */
 565  
     private int handleP(String value, DoubleMetaphoneResult result, int index) {
 566  1144
         if (charAt(value, index + 1) == 'H') {
 567  82
             result.append('F');
 568  82
             index += 2;
 569  
         } else {
 570  1062
             result.append('P');
 571  1062
             index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
 572  
         }
 573  1144
         return index;
 574  
     }
 575  
 
 576  
     /**
 577  
      * Handles 'R' cases.
 578  
      */
 579  
     private int handleR(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) {
 580  2737
         if (index == value.length() - 1 && !slavoGermanic &&
 581  
             contains(value, index - 2, 2, "IE") &&
 582  
             !contains(value, index - 4, 2, "ME", "MA")) {
 583  12
             result.appendAlternate('R');
 584  
         } else {
 585  2725
             result.append('R');
 586  
         }
 587  2737
         return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
 588  
     }
 589  
 
 590  
     /**
 591  
      * Handles 'S' cases.
 592  
      */
 593  
     private int handleS(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) {
 594  2151
         if (contains(value, index - 1, 3, "ISL", "YSL")) {
 595  
             //-- special cases "island", "isle", "carlisle", "carlysle" --//
 596  12
             index++;
 597  2139
         } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
 598  
             //-- special case "sugar-" --//
 599  4
             result.append('X', 'S');
 600  4
             index++;
 601  2135
         } else if (contains(value, index, 2, "SH")) {
 602  78
             if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) {
 603  
                 //-- germanic --//
 604  6
                 result.append('S');
 605  
             } else {
 606  72
                 result.append('X');
 607  
             }
 608  78
             index += 2;
 609  2057
         } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
 610  
             //-- Italian and Armenian --//
 611  50
             if (slavoGermanic) {
 612  0
                 result.append('S');
 613  
             } else {
 614  50
                 result.append('S', 'X');
 615  
             }
 616  50
             index += 3;
 617  2007
         } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) ||
 618  
                    contains(value, index + 1, 1, "Z")) {
 619  
             //-- german & anglicisations, e.g. "smith" match "schmidt" //
 620  
             // "snider" match "schneider" --//
 621  
             //-- also, -sz- in slavic language altho in hungarian it //
 622  
             //   is pronounced "s" --//
 623  48
             result.append('S', 'X');
 624  48
             index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
 625  1959
         } else if (contains(value, index, 2, "SC")) {
 626  114
             index = handleSC(value, result, index);
 627  
         } else {
 628  1845
             if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) {
 629  
                 //-- french e.g. "resnais", "artois" --//
 630  4
                 result.appendAlternate('S');
 631  
             } else {
 632  1841
                 result.append('S');
 633  
             }
 634  1845
             index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
 635  
         }
 636  2151
         return index;
 637  
     }
 638  
 
 639  
     /**
 640  
      * Handles 'SC' cases.
 641  
      */
 642  
     private int handleSC(String value, DoubleMetaphoneResult result, int index) {
 643  114
         if (charAt(value, index + 2) == 'H') {
 644  
             //-- Schlesinger's rule --//
 645  38
             if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) {
 646  
                 //-- Dutch origin, e.g. "school", "schooner" --//
 647  10
                 if (contains(value, index + 3, 2, "ER", "EN")) {
 648  
                     //-- "schermerhorn", "schenker" --//
 649  6
                     result.append("X", "SK");
 650  
                 } else {
 651  4
                     result.append("SK");
 652  
                 }
 653  
             } else {
 654  28
                 if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
 655  10
                     result.append('X', 'S');
 656  
                 } else {
 657  18
                     result.append('X');
 658  
                 }
 659  
             }
 660  76
         } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
 661  12
             result.append('S');
 662  
         } else {
 663  64
             result.append("SK");
 664  
         }
 665  114
         return index + 3;
 666  
     }
 667  
 
 668  
     /**
 669  
      * Handles 'T' cases.
 670  
      */
 671  
     private int handleT(String value, DoubleMetaphoneResult result, int index) {
 672  2224
         if (contains(value, index, 4, "TION")) {
 673  52
             result.append('X');
 674  52
             index += 3;
 675  2172
         } else if (contains(value, index, 3, "TIA", "TCH")) {
 676  17
             result.append('X');
 677  17
             index += 3;
 678  2155
         } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) {
 679  168
             if (contains(value, index + 2, 2, "OM", "AM") ||
 680  
                 //-- special case "thomas", "thames" or germanic --//
 681  
                 contains(value, 0, 4, "VAN ", "VON ") ||
 682  
                 contains(value, 0, 3, "SCH")) {
 683  10
                 result.append('T');
 684  
             } else {
 685  158
                 result.append('0', 'T');
 686  
             }
 687  168
             index += 2;
 688  
         } else {
 689  1987
             result.append('T');
 690  1987
             index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
 691  
         }
 692  2224
         return index;
 693  
     }
 694  
 
 695  
     /**
 696  
      * Handles 'W' cases.
 697  
      */
 698  
     private int handleW(String value, DoubleMetaphoneResult result, int index) {
 699  519
         if (contains(value, index, 2, "WR")) {
 700  
             //-- can also be in middle of word --//
 701  12
             result.append('R');
 702  12
             index += 2;
 703  
         } else {
 704  507
             if (index == 0 && (isVowel(charAt(value, index + 1)) ||
 705  
                                contains(value, index, 2, "WH"))) {
 706  216
                 if (isVowel(charAt(value, index + 1))) {
 707  
                     //-- Wasserman should match Vasserman --//
 708  186
                     result.append('A', 'F');
 709  
                 } else {
 710  
                     //-- need Uomo to match Womo --//
 711  30
                     result.append('A');
 712  
                 }
 713  216
                 index++;
 714  291
             } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) ||
 715  
                        contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
 716  
                        contains(value, 0, 3, "SCH")) {
 717  
                 //-- Arnow should match Arnoff --//
 718  46
                 result.appendAlternate('F');
 719  46
                 index++;
 720  245
             } else if (contains(value, index, 4, "WICZ", "WITZ")) {
 721  
                 //-- Polish e.g. "filipowicz" --//
 722  16
                 result.append("TS", "FX");
 723  16
                 index += 4;
 724  
             } else {
 725  229
                 index++;
 726  
             }
 727  
         }
 728  519
         return index;
 729  
     }
 730  
 
 731  
     /**
 732  
      * Handles 'X' cases.
 733  
      */
 734  
     private int handleX(String value, DoubleMetaphoneResult result, int index) {
 735  152
         if (index == 0) {
 736  5
             result.append('S');
 737  5
             index++;
 738  
         } else {
 739  147
             if (!((index == value.length() - 1) &&
 740  
                   (contains(value, index - 3, 3, "IAU", "EAU") ||
 741  
                    contains(value, index - 2, 2, "AU", "OU")))) {
 742  
                 //-- French e.g. breaux --//
 743  141
                 result.append("KS");
 744  
             }
 745  147
             index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
 746  
         }
 747  152
         return index;
 748  
     }
 749  
 
 750  
     /**
 751  
      * Handles 'Z' cases.
 752  
      */
 753  
     private int handleZ(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) {
 754  97
         if (charAt(value, index + 1) == 'H') {
 755  
             //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
 756  2
             result.append('J');
 757  2
             index += 2;
 758  
         } else {
 759  95
             if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") ||
 760  
                 (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) {
 761  12
                 result.append("S", "TS");
 762  
             } else {
 763  83
                 result.append('S');
 764  
             }
 765  95
             index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
 766  
         }
 767  97
         return index;
 768  
     }
 769  
 
 770  
     //-- BEGIN CONDITIONS --//
 771  
 
 772  
     /**
 773  
      * Complex condition 0 for 'C'.
 774  
      */
 775  
     private boolean conditionC0(String value, int index) {
 776  1680
         if (contains(value, index, 4, "CHIA")) {
 777  2
             return true;
 778  1678
         } else if (index <= 1) {
 779  680
             return false;
 780  998
         } else if (isVowel(charAt(value, index - 2))) {
 781  357
             return false;
 782  641
         } else if (!contains(value, index - 1, 3, "ACH")) {
 783  621
             return false;
 784  
         } else {
 785  20
             char c = charAt(value, index + 2);
 786  20
             return (c != 'I' && c != 'E') ||
 787  
                     contains(value, index - 2, 6, "BACHER", "MACHER");
 788  
         }
 789  
     }
 790  
 
 791  
     /**
 792  
      * Complex condition 0 for 'CH'.
 793  
      */
 794  
     private boolean conditionCH0(String value, int index) {
 795  156
         if (index != 0) {
 796  114
             return false;
 797  42
         } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") &&
 798  
                    !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
 799  38
             return false;
 800  4
         } else if (contains(value, 0, 5, "CHORE")) {
 801  0
             return false;
 802  
         } else {
 803  4
             return true;
 804  
         }
 805  
     }
 806  
 
 807  
     /**
 808  
      * Complex condition 1 for 'CH'.
 809  
      */
 810  
     private boolean conditionCH1(String value, int index) {
 811  152
         return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) ||
 812  
                 contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
 813  
                 contains(value, index + 2, 1, "T", "S") ||
 814  
                 ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
 815  
                  (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1)));
 816  
     }
 817  
 
 818  
     /**
 819  
      * Complex condition 0 for 'L'.
 820  
      */
 821  
     private boolean conditionL0(String value, int index) {
 822  353
         if (index == value.length() - 3 &&
 823  
             contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
 824  2
             return true;
 825  351
         } else if ((contains(value, value.length() - 2, 2, "AS", "OS") ||
 826  
                     contains(value, value.length() - 1, 1, "A", "O")) &&
 827  
                    contains(value, index - 1, 4, "ALLE")) {
 828  2
             return true;
 829  
         } else {
 830  349
             return false;
 831  
         }
 832  
     }
 833  
 
 834  
     /**
 835  
      * Complex condition 0 for 'M'.
 836  
      */
 837  
     private boolean conditionM0(String value, int index) {
 838  1241
         if (charAt(value, index + 1) == 'M') {
 839  100
             return true;
 840  
         }
 841  1141
         return contains(value, index - 1, 3, "UMB") &&
 842  
                ((index + 1) == value.length() - 1 || contains(value, index + 2, 2, "ER"));
 843  
     }
 844  
 
 845  
     //-- BEGIN HELPER FUNCTIONS --//
 846  
 
 847  
     /**
 848  
      * Determines whether or not a value is of slavo-germanic orgin. A value is
 849  
      * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
 850  
      */
 851  
     private boolean isSlavoGermanic(String value) {
 852  6492
         return value.indexOf('W') > -1 || value.indexOf('K') > -1 ||
 853  
             value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1;
 854  
     }
 855  
 
 856  
     /**
 857  
      * Determines whether or not a character is a vowel or not
 858  
      */
 859  
     private boolean isVowel(char ch) {
 860  2201
         return VOWELS.indexOf(ch) != -1;
 861  
     }
 862  
 
 863  
     /**
 864  
      * Determines whether or not the value starts with a silent letter.  It will
 865  
      * return {@code true} if the value starts with any of 'GN', 'KN',
 866  
      * 'PN', 'WR' or 'PS'.
 867  
      */
 868  
     private boolean isSilentStart(String value) {
 869  6492
         boolean result = false;
 870  38804
         for (String element : SILENT_START) {
 871  32380
             if (value.startsWith(element)) {
 872  68
                 result = true;
 873  68
                 break;
 874  
             }
 875  
         }
 876  6492
         return result;
 877  
     }
 878  
 
 879  
     /**
 880  
      * Cleans the input.
 881  
      */
 882  
     private String cleanInput(String input) {
 883  6504
         if (input == null) {
 884  3
             return null;
 885  
         }
 886  6501
         input = input.trim();
 887  6501
         if (input.length() == 0) {
 888  9
             return null;
 889  
         }
 890  6492
         return input.toUpperCase(java.util.Locale.ENGLISH);
 891  
     }
 892  
 
 893  
     /**
 894  
      * Gets the character at index <code>index</code> if available, otherwise
 895  
      * it returns <code>Character.MIN_VALUE</code> so that there is some sort
 896  
      * of a default.
 897  
      */
 898  
     protected char charAt(String value, int index) {
 899  17628
         if (index < 0 || index >= value.length()) {
 900  1502
             return Character.MIN_VALUE;
 901  
         }
 902  16126
         return value.charAt(index);
 903  
     }
 904  
 
 905  
     /**
 906  
      * Shortcut method with 1 criteria.
 907  
      */
 908  
     private static boolean contains(String value, int start, int length, String criteria) {
 909  30236
         return contains(value, start, length, new String[] { criteria });
 910  
     }
 911  
 
 912  
     /**
 913  
      * Shortcut method with 2 criteria.
 914  
      */
 915  
     private static boolean contains(String value, int start, int length,
 916  
                                     String criteria1, String criteria2) {
 917  15344
         return contains(value, start, length, new String[] { criteria1, criteria2 });
 918  
     }
 919  
 
 920  
     /**
 921  
      * Shortcut method with 3 criteria.
 922  
      */
 923  
     private static boolean contains(String value, int start, int length,
 924  
                                     String criteria1, String criteria2, String criteria3) {
 925  6280
         return contains(value, start, length, new String[] { criteria1, criteria2, criteria3 });
 926  
     }
 927  
 
 928  
     /**
 929  
      * Shortcut method with 4 criteria.
 930  
      */
 931  
     private static boolean contains(String value, int start, int length,
 932  
                                     String criteria1, String criteria2,
 933  
                                     String criteria3, String criteria4) {
 934  1037
         return contains(value, start, length,
 935  
                         new String[] { criteria1, criteria2, criteria3, criteria4 });
 936  
     }
 937  
 
 938  
     /**
 939  
      * Shortcut method with 5 criteria.
 940  
      */
 941  
     private static boolean contains(String value, int start, int length,
 942  
                                     String criteria1, String criteria2,
 943  
                                     String criteria3, String criteria4,
 944  
                                     String criteria5) {
 945  28
         return contains(value, start, length,
 946  
                         new String[] { criteria1, criteria2, criteria3,
 947  
                                        criteria4, criteria5 });
 948  
     }
 949  
 
 950  
     /**
 951  
      * Shortcut method with 6 criteria.
 952  
      */
 953  
     private static boolean contains(String value, int start, int length,
 954  
                                     String criteria1, String criteria2,
 955  
                                     String criteria3, String criteria4,
 956  
                                     String criteria5, String criteria6) {
 957  38
         return contains(value, start, length,
 958  
                         new String[] { criteria1, criteria2, criteria3,
 959  
                                        criteria4, criteria5, criteria6 });
 960  
     }
 961  
 
 962  
     /**
 963  
      * Determines whether <code>value</code> contains any of the criteria starting at index <code>start</code> and
 964  
      * matching up to length <code>length</code>.
 965  
      */
 966  
     protected static boolean contains(String value, int start, int length,
 967  
                                       String[] criteria) {
 968  53209
         boolean result = false;
 969  53209
         if (start >= 0 && start + length <= value.length()) {
 970  45038
             String target = value.substring(start, start + length);
 971  
 
 972  115116
             for (String element : criteria) {
 973  72502
                 if (target.equals(element)) {
 974  2424
                     result = true;
 975  2424
                     break;
 976  
                 }
 977  
             }
 978  
         }
 979  53209
         return result;
 980  
     }
 981  
 
 982  
     //-- BEGIN INNER CLASSES --//
 983  
 
 984  
     /**
 985  
      * Inner class for storing results, since there is the optional alternate encoding.
 986  
      */
 987  
     public class DoubleMetaphoneResult {
 988  
 
 989  6492
         private final StringBuilder primary = new StringBuilder(getMaxCodeLen());
 990  6492
         private final StringBuilder alternate = new StringBuilder(getMaxCodeLen());
 991  
         private final int maxLength;
 992  
 
 993  6492
         public DoubleMetaphoneResult(int maxLength) {
 994  6492
             this.maxLength = maxLength;
 995  6492
         }
 996  
 
 997  
         public void append(char value) {
 998  21356
             appendPrimary(value);
 999  21356
             appendAlternate(value);
 1000  21356
         }
 1001  
 
 1002  
         public void append(char primary, char alternate) {
 1003  863
             appendPrimary(primary);
 1004  863
             appendAlternate(alternate);
 1005  863
         }
 1006  
 
 1007  
         public void appendPrimary(char value) {
 1008  22223
             if (this.primary.length() < this.maxLength) {
 1009  22221
                 this.primary.append(value);
 1010  
             }
 1011  22223
         }
 1012  
 
 1013  
         public void appendAlternate(char value) {
 1014  22281
             if (this.alternate.length() < this.maxLength) {
 1015  22263
                 this.alternate.append(value);
 1016  
             }
 1017  22281
         }
 1018  
 
 1019  
         public void append(String value) {
 1020  233
             appendPrimary(value);
 1021  233
             appendAlternate(value);
 1022  233
         }
 1023  
 
 1024  
         public void append(String primary, String alternate) {
 1025  60
             appendPrimary(primary);
 1026  60
             appendAlternate(alternate);
 1027  60
         }
 1028  
 
 1029  
         public void appendPrimary(String value) {
 1030  293
             int addChars = this.maxLength - this.primary.length();
 1031  293
             if (value.length() <= addChars) {
 1032  249
                 this.primary.append(value);
 1033  
             } else {
 1034  44
                 this.primary.append(value.substring(0, addChars));
 1035  
             }
 1036  293
         }
 1037  
 
 1038  
         public void appendAlternate(String value) {
 1039  293
             int addChars = this.maxLength - this.alternate.length();
 1040  293
             if (value.length() <= addChars) {
 1041  237
                 this.alternate.append(value);
 1042  
             } else {
 1043  56
                 this.alternate.append(value.substring(0, addChars));
 1044  
             }
 1045  293
         }
 1046  
 
 1047  
         public String getPrimary() {
 1048  3299
             return this.primary.toString();
 1049  
         }
 1050  
 
 1051  
         public String getAlternate() {
 1052  3193
             return this.alternate.toString();
 1053  
         }
 1054  
 
 1055  
         public boolean isComplete() {
 1056  42088
             return this.primary.length() >= this.maxLength &&
 1057  
                    this.alternate.length() >= this.maxLength;
 1058  
         }
 1059  
     }
 1060  
 }