Coverage Report - org.apache.commons.codec.language.DoubleMetaphone
 
Classes in this File Line Coverage Branch Coverage Complexity
DoubleMetaphone
98%
365/372
90%
399/443
6.08
DoubleMetaphone$DoubleMetaphoneResult
100%
36/36
100%
12/12
6.08
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *      http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.commons.codec.language;
 19  
 
 20  
 import org.apache.commons.codec.EncoderException;
 21  
 import org.apache.commons.codec.StringEncoder;
 22  
 
 23  
 /**
 24  
  * Encodes a string into a double metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence
 25  
  * Philips</CITE>.
 26  
  * <p>
 27  
  * This class is conditionally thread-safe. The instance field {@link #maxCodeLen} is mutable
 28  
  * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is
 29  
  * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
 30  
  * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup.
 31  
  *
 32  
  * @see <a href="http://drdobbs.com/184401251?pgno=2">Original Article</a>
 33  
  * @see <a href="http://en.wikipedia.org/wiki/Metaphone">http://en.wikipedia.org/wiki/Metaphone</a>
 34  
  *
 35  
  * @version $Id$
 36  
  */
 37  
 public class DoubleMetaphone implements StringEncoder {
 38  
 
 39  
     /**
 40  
      * "Vowels" to test for
 41  
      */
 42  
     private static final String VOWELS = "AEIOUY";
 43  
 
 44  
     /**
 45  
      * Prefixes when present which are not pronounced
 46  
      */
 47  1
     private static final String[] SILENT_START =
 48  
         { "GN", "KN", "PN", "WR", "PS" };
 49  1
     private static final String[] L_R_N_M_B_H_F_V_W_SPACE =
 50  
         { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
 51  1
     private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER =
 52  
         { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
 53  1
     private static final String[] L_T_K_S_N_M_B_Z =
 54  
         { "L", "T", "K", "S", "N", "M", "B", "Z" };
 55  
 
 56  
     /**
 57  
      * Maximum length of an encoding, default is 4
 58  
      */
 59  24
     private int maxCodeLen = 4;
 60  
 
 61  
     /**
 62  
      * Creates an instance of this DoubleMetaphone encoder
 63  
      */
 64  
     public DoubleMetaphone() {
 65  24
         super();
 66  24
     }
 67  
 
 68  
     /**
 69  
      * Encode a value with Double Metaphone.
 70  
      *
 71  
      * @param value String to encode
 72  
      * @return an encoded string
 73  
      */
 74  
     public String doubleMetaphone(final String value) {
 75  82
         return doubleMetaphone(value, false);
 76  
     }
 77  
 
 78  
     /**
 79  
      * Encode a value with Double Metaphone, optionally using the alternate encoding.
 80  
      *
 81  
      * @param value String to encode
 82  
      * @param alternate use alternate encode
 83  
      * @return an encoded string
 84  
      */
 85  
     public String doubleMetaphone(String value, final boolean alternate) {
 86  6504
         value = cleanInput(value);
 87  6504
         if (value == null) {
 88  12
             return null;
 89  
         }
 90  
 
 91  6492
         final boolean slavoGermanic = isSlavoGermanic(value);
 92  6492
         int index = isSilentStart(value) ? 1 : 0;
 93  
 
 94  6492
         final DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
 95  
 
 96  42088
         while (!result.isComplete() && index <= value.length() - 1) {
 97  35596
             switch (value.charAt(index)) {
 98  
             case 'A':
 99  
             case 'E':
 100  
             case 'I':
 101  
             case 'O':
 102  
             case 'U':
 103  
             case 'Y':
 104  13681
                 index = handleAEIOUY(result, index);
 105  13681
                 break;
 106  
             case 'B':
 107  884
                 result.append('P');
 108  884
                 index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
 109  884
                 break;
 110  
             case '\u00C7':
 111  
                 // A C with a Cedilla
 112  1
                 result.append('S');
 113  1
                 index++;
 114  1
                 break;
 115  
             case 'C':
 116  1680
                 index = handleC(value, result, index);
 117  1680
                 break;
 118  
             case 'D':
 119  1238
                 index = handleD(value, result, index);
 120  1238
                 break;
 121  
             case 'F':
 122  646
                 result.append('F');
 123  646
                 index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
 124  646
                 break;
 125  
             case 'G':
 126  801
                 index = handleG(value, result, index, slavoGermanic);
 127  801
                 break;
 128  
             case 'H':
 129  521
                 index = handleH(value, result, index);
 130  521
                 break;
 131  
             case 'J':
 132  87
                 index = handleJ(value, result, index, slavoGermanic);
 133  87
                 break;
 134  
             case 'K':
 135  324
                 result.append('K');
 136  324
                 index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
 137  324
                 break;
 138  
             case 'L':
 139  1797
                 index = handleL(value, result, index);
 140  1797
                 break;
 141  
             case 'M':
 142  1241
                 result.append('M');
 143  1241
                 index = conditionM0(value, index) ? index + 2 : index + 1;
 144  1241
                 break;
 145  
             case 'N':
 146  2777
                 result.append('N');
 147  2777
                 index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
 148  2777
                 break;
 149  
             case '\u00D1':
 150  
                 // N with a tilde (spanish ene)
 151  1
                 result.append('N');
 152  1
                 index++;
 153  1
                 break;
 154  
             case 'P':
 155  1144
                 index = handleP(value, result, index);
 156  1144
                 break;
 157  
             case 'Q':
 158  80
                 result.append('K');
 159  80
                 index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
 160  80
                 break;
 161  
             case 'R':
 162  2737
                 index = handleR(value, result, index, slavoGermanic);
 163  2737
                 break;
 164  
             case 'S':
 165  2151
                 index = handleS(value, result, index, slavoGermanic);
 166  2151
                 break;
 167  
             case 'T':
 168  2224
                 index = handleT(value, result, index);
 169  2224
                 break;
 170  
             case 'V':
 171  406
                 result.append('F');
 172  406
                 index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
 173  406
                 break;
 174  
             case 'W':
 175  519
                 index = handleW(value, result, index);
 176  519
                 break;
 177  
             case 'X':
 178  152
                 index = handleX(value, result, index);
 179  152
                 break;
 180  
             case 'Z':
 181  97
                 index = handleZ(value, result, index, slavoGermanic);
 182  97
                 break;
 183  
             default:
 184  407
                 index++;
 185  407
                 break;
 186  
             }
 187  
         }
 188  
 
 189  6492
         return alternate ? result.getAlternate() : result.getPrimary();
 190  
     }
 191  
 
 192  
     /**
 193  
      * Encode the value using DoubleMetaphone.  It will only work if
 194  
      * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>).
 195  
      *
 196  
      * @param obj Object to encode (should be of type String)
 197  
      * @return An encoded Object (will be of type String)
 198  
      * @throws EncoderException encode parameter is not of type String
 199  
      */
 200  
     @Override
 201  
     public Object encode(final Object obj) throws EncoderException {
 202  35
         if (!(obj instanceof String)) {
 203  3
             throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
 204  
         }
 205  32
         return doubleMetaphone((String) obj);
 206  
     }
 207  
 
 208  
     /**
 209  
      * Encode the value using DoubleMetaphone.
 210  
      *
 211  
      * @param value String to encode
 212  
      * @return An encoded String
 213  
      */
 214  
     @Override
 215  
     public String encode(final String value) {
 216  30
         return doubleMetaphone(value);
 217  
     }
 218  
 
 219  
     /**
 220  
      * Check if the Double Metaphone values of two <code>String</code> values
 221  
      * are equal.
 222  
      *
 223  
      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
 224  
      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
 225  
      * @return {@code true} if the encoded <code>String</code>s are equal;
 226  
      *          {@code false} otherwise.
 227  
      * @see #isDoubleMetaphoneEqual(String,String,boolean)
 228  
      */
 229  
     public boolean isDoubleMetaphoneEqual(final String value1, final String value2) {
 230  22
         return isDoubleMetaphoneEqual(value1, value2, false);
 231  
     }
 232  
 
 233  
     /**
 234  
      * Check if the Double Metaphone values of two <code>String</code> values
 235  
      * are equal, optionally using the alternate value.
 236  
      *
 237  
      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
 238  
      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
 239  
      * @param alternate use the alternate value if {@code true}.
 240  
      * @return {@code true} if the encoded <code>String</code>s are equal;
 241  
      *          {@code false} otherwise.
 242  
      */
 243  
     public boolean isDoubleMetaphoneEqual(final String value1, final String value2, final boolean alternate) {
 244  1970
         return doubleMetaphone(value1, alternate).equals(doubleMetaphone(value2, alternate));
 245  
     }
 246  
 
 247  
     /**
 248  
      * Returns the maxCodeLen.
 249  
      * @return int
 250  
      */
 251  
     public int getMaxCodeLen() {
 252  19478
         return this.maxCodeLen;
 253  
     }
 254  
 
 255  
     /**
 256  
      * Sets the maxCodeLen.
 257  
      * @param maxCodeLen The maxCodeLen to set
 258  
      */
 259  
     public void setMaxCodeLen(final int maxCodeLen) {
 260  1
         this.maxCodeLen = maxCodeLen;
 261  1
     }
 262  
 
 263  
     //-- BEGIN HANDLERS --//
 264  
 
 265  
     /**
 266  
      * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases.
 267  
      */
 268  
     private int handleAEIOUY(final DoubleMetaphoneResult result, final int index) {
 269  13681
         if (index == 0) {
 270  1515
             result.append('A');
 271  
         }
 272  13681
         return index + 1;
 273  
     }
 274  
 
 275  
     /**
 276  
      * Handles 'C' cases.
 277  
      */
 278  
     private int handleC(final String value, final DoubleMetaphoneResult result, int index) {
 279  1680
         if (conditionC0(value, index)) {  // very confusing, moved out
 280  16
             result.append('K');
 281  16
             index += 2;
 282  1664
         } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
 283  6
             result.append('S');
 284  6
             index += 2;
 285  1658
         } else if (contains(value, index, 2, "CH")) {
 286  156
             index = handleCH(value, result, index);
 287  1502
         } else if (contains(value, index, 2, "CZ") &&
 288  
                    !contains(value, index - 2, 4, "WICZ")) {
 289  
             //-- "Czerny" --//
 290  7
             result.append('S', 'X');
 291  7
             index += 2;
 292  1495
         } else if (contains(value, index + 1, 3, "CIA")) {
 293  
             //-- "focaccia" --//
 294  2
             result.append('X');
 295  2
             index += 3;
 296  1493
         } else if (contains(value, index, 2, "CC") &&
 297  
                    !(index == 1 && charAt(value, 0) == 'M')) {
 298  
             //-- double "cc" but not "McClelland" --//
 299  109
             return handleCC(value, result, index);
 300  1384
         } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
 301  111
             result.append('K');
 302  111
             index += 2;
 303  1273
         } else if (contains(value, index, 2, "CI", "CE", "CY")) {
 304  
             //-- Italian vs. English --//
 305  286
             if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
 306  46
                 result.append('S', 'X');
 307  
             } else {
 308  240
                 result.append('S');
 309  
             }
 310  286
             index += 2;
 311  
         } else {
 312  987
             result.append('K');
 313  987
             if (contains(value, index + 1, 2, " C", " Q", " G")) {
 314  
                 //-- Mac Caffrey, Mac Gregor --//
 315  4
                 index += 3;
 316  983
             } else if (contains(value, index + 1, 1, "C", "K", "Q") &&
 317  
                        !contains(value, index + 1, 2, "CE", "CI")) {
 318  9
                 index += 2;
 319  
             } else {
 320  974
                 index++;
 321  
             }
 322  
         }
 323  
 
 324  1571
         return index;
 325  
     }
 326  
 
 327  
     /**
 328  
      * Handles 'CC' cases.
 329  
      */
 330  
     private int handleCC(final String value, final DoubleMetaphoneResult result, int index) {
 331  109
         if (contains(value, index + 2, 1, "I", "E", "H") &&
 332  
             !contains(value, index + 2, 2, "HU")) {
 333  
             //-- "bellocchio" but not "bacchus" --//
 334  22
             if ((index == 1 && charAt(value, index - 1) == 'A') ||
 335  
                 contains(value, index - 1, 5, "UCCEE", "UCCES")) {
 336  
                 //-- "accident", "accede", "succeed" --//
 337  14
                 result.append("KS");
 338  
             } else {
 339  
                 //-- "bacci", "bertucci", other Italian --//
 340  8
                 result.append('X');
 341  
             }
 342  22
             index += 3;
 343  
         } else {    // Pierce's rule
 344  87
             result.append('K');
 345  87
             index += 2;
 346  
         }
 347  
 
 348  109
         return index;
 349  
     }
 350  
 
 351  
     /**
 352  
      * Handles 'CH' cases.
 353  
      */
 354  
     private int handleCH(final String value, final DoubleMetaphoneResult result, final int index) {
 355  156
         if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
 356  0
             result.append('K', 'X');
 357  0
             return index + 2;
 358  156
         } else if (conditionCH0(value, index)) {
 359  
             //-- Greek roots ("chemistry", "chorus", etc.) --//
 360  4
             result.append('K');
 361  4
             return index + 2;
 362  152
         } else if (conditionCH1(value, index)) {
 363  
             //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
 364  34
             result.append('K');
 365  34
             return index + 2;
 366  
         } else {
 367  118
             if (index > 0) {
 368  82
                 if (contains(value, 0, 2, "MC")) {
 369  2
                     result.append('K');
 370  
                 } else {
 371  80
                     result.append('X', 'K');
 372  
                 }
 373  
             } else {
 374  36
                 result.append('X');
 375  
             }
 376  118
             return index + 2;
 377  
         }
 378  
     }
 379  
 
 380  
     /**
 381  
      * Handles 'D' cases.
 382  
      */
 383  
     private int handleD(final String value, final DoubleMetaphoneResult result, int index) {
 384  1238
         if (contains(value, index, 2, "DG")) {
 385  
             //-- "Edge" --//
 386  10
             if (contains(value, index + 2, 1, "I", "E", "Y")) {
 387  4
                 result.append('J');
 388  4
                 index += 3;
 389  
                 //-- "Edgar" --//
 390  
             } else {
 391  6
                 result.append("TK");
 392  6
                 index += 2;
 393  
             }
 394  1228
         } else if (contains(value, index, 2, "DT", "DD")) {
 395  38
             result.append('T');
 396  38
             index += 2;
 397  
         } else {
 398  1190
             result.append('T');
 399  1190
             index++;
 400  
         }
 401  1238
         return index;
 402  
     }
 403  
 
 404  
     /**
 405  
      * Handles 'G' cases.
 406  
      */
 407  
     private int handleG(final String value, final DoubleMetaphoneResult result, int index,
 408  
                         final boolean slavoGermanic) {
 409  801
         if (charAt(value, index + 1) == 'H') {
 410  106
             index = handleGH(value, result, index);
 411  695
         } else if (charAt(value, index + 1) == 'N') {
 412  26
             if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
 413  0
                 result.append("KN", "N");
 414  26
             } else if (!contains(value, index + 2, 2, "EY") &&
 415  
                        charAt(value, index + 1) != 'Y' && !slavoGermanic) {
 416  22
                 result.append("N", "KN");
 417  
             } else {
 418  4
                 result.append("KN");
 419  
             }
 420  26
             index = index + 2;
 421  669
         } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
 422  4
             result.append("KL", "L");
 423  4
             index += 2;
 424  665
         } else if (index == 0 &&
 425  
                    (charAt(value, index + 1) == 'Y' ||
 426  
                     contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
 427  
             //-- -ges-, -gep-, -gel-, -gie- at beginning --//
 428  16
             result.append('K', 'J');
 429  16
             index += 2;
 430  649
         } else if ((contains(value, index + 1, 2, "ER") ||
 431  
                     charAt(value, index + 1) == 'Y') &&
 432  
                    !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
 433  
                    !contains(value, index - 1, 1, "E", "I") &&
 434  
                    !contains(value, index - 1, 3, "RGY", "OGY")) {
 435  
             //-- -ger-, -gy- --//
 436  22
             result.append('K', 'J');
 437  22
             index += 2;
 438  627
         } else if (contains(value, index + 1, 1, "E", "I", "Y") ||
 439  
                    contains(value, index - 1, 4, "AGGI", "OGGI")) {
 440  
             //-- Italian "biaggi" --//
 441  182
             if (contains(value, 0 ,4, "VAN ", "VON ") ||
 442  
                 contains(value, 0, 3, "SCH") ||
 443  
                 contains(value, index + 1, 2, "ET")) {
 444  
                 //-- obvious germanic --//
 445  2
                 result.append('K');
 446  180
             } else if (contains(value, index + 1, 3, "IER")) {
 447  4
                 result.append('J');
 448  
             } else {
 449  176
                 result.append('J', 'K');
 450  
             }
 451  182
             index += 2;
 452  445
         } else if (charAt(value, index + 1) == 'G') {
 453  34
             index += 2;
 454  34
             result.append('K');
 455  
         } else {
 456  411
             index++;
 457  411
             result.append('K');
 458  
         }
 459  801
         return index;
 460  
     }
 461  
 
 462  
     /**
 463  
      * Handles 'GH' cases.
 464  
      */
 465  
     private int handleGH(final String value, final DoubleMetaphoneResult result, int index) {
 466  106
         if (index > 0 && !isVowel(charAt(value, index - 1))) {
 467  4
             result.append('K');
 468  4
             index += 2;
 469  102
         } else if (index == 0) {
 470  8
             if (charAt(value, index + 2) == 'I') {
 471  4
                 result.append('J');
 472  
             } else {
 473  4
                 result.append('K');
 474  
             }
 475  8
             index += 2;
 476  94
         } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
 477  
                    (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
 478  
                    (index > 3 && contains(value, index - 4, 1, "B", "H"))) {
 479  
             //-- Parker's rule (with some further refinements) - "hugh"
 480  28
             index += 2;
 481  
         } else {
 482  66
             if (index > 2 && charAt(value, index - 1) == 'U' &&
 483  
                 contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
 484  
                 //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
 485  22
                 result.append('F');
 486  44
             } else if (index > 0 && charAt(value, index - 1) != 'I') {
 487  8
                 result.append('K');
 488  
             }
 489  66
             index += 2;
 490  
         }
 491  106
         return index;
 492  
     }
 493  
 
 494  
     /**
 495  
      * Handles 'H' cases.
 496  
      */
 497  
     private int handleH(final String value, final DoubleMetaphoneResult result, int index) {
 498  
         //-- only keep if first & before vowel or between 2 vowels --//
 499  521
         if ((index == 0 || isVowel(charAt(value, index - 1))) &&
 500  
             isVowel(charAt(value, index + 1))) {
 501  387
             result.append('H');
 502  387
             index += 2;
 503  
             //-- also takes car of "HH" --//
 504  
         } else {
 505  134
             index++;
 506  
         }
 507  521
         return index;
 508  
     }
 509  
 
 510  
     /**
 511  
      * Handles 'J' cases.
 512  
      */
 513  
     private int handleJ(final String value, final DoubleMetaphoneResult result, int index,
 514  
                         final boolean slavoGermanic) {
 515  87
         if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
 516  
                 //-- obvious Spanish, "Jose", "San Jacinto" --//
 517  11
                 if ((index == 0 && (charAt(value, index + 4) == ' ') ||
 518  
                      value.length() == 4) || contains(value, 0, 4, "SAN ")) {
 519  9
                     result.append('H');
 520  
                 } else {
 521  2
                     result.append('J', 'H');
 522  
                 }
 523  11
                 index++;
 524  
             } else {
 525  76
                 if (index == 0 && !contains(value, index, 4, "JOSE")) {
 526  48
                     result.append('J', 'A');
 527  28
                 } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
 528  
                            (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
 529  10
                     result.append('J', 'H');
 530  18
                 } else if (index == value.length() - 1) {
 531  0
                     result.append('J', ' ');
 532  18
                 } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) &&
 533  
                            !contains(value, index - 1, 1, "S", "K", "L")) {
 534  18
                     result.append('J');
 535  
                 }
 536  
 
 537  76
                 if (charAt(value, index + 1) == 'J') {
 538  0
                     index += 2;
 539  
                 } else {
 540  76
                     index++;
 541  
                 }
 542  
             }
 543  87
         return index;
 544  
     }
 545  
 
 546  
     /**
 547  
      * Handles 'L' cases.
 548  
      */
 549  
     private int handleL(final String value, final DoubleMetaphoneResult result, int index) {
 550  1797
         if (charAt(value, index + 1) == 'L') {
 551  353
             if (conditionL0(value, index)) {
 552  4
                 result.appendPrimary('L');
 553  
             } else {
 554  349
                 result.append('L');
 555  
             }
 556  353
             index += 2;
 557  
         } else {
 558  1444
             index++;
 559  1444
             result.append('L');
 560  
         }
 561  1797
         return index;
 562  
     }
 563  
 
 564  
     /**
 565  
      * Handles 'P' cases.
 566  
      */
 567  
     private int handleP(final String value, final DoubleMetaphoneResult result, int index) {
 568  1144
         if (charAt(value, index + 1) == 'H') {
 569  82
             result.append('F');
 570  82
             index += 2;
 571  
         } else {
 572  1062
             result.append('P');
 573  1062
             index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
 574  
         }
 575  1144
         return index;
 576  
     }
 577  
 
 578  
     /**
 579  
      * Handles 'R' cases.
 580  
      */
 581  
     private int handleR(final String value, final DoubleMetaphoneResult result, final int index,
 582  
                         final boolean slavoGermanic) {
 583  2737
         if (index == value.length() - 1 && !slavoGermanic &&
 584  
             contains(value, index - 2, 2, "IE") &&
 585  
             !contains(value, index - 4, 2, "ME", "MA")) {
 586  12
             result.appendAlternate('R');
 587  
         } else {
 588  2725
             result.append('R');
 589  
         }
 590  2737
         return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
 591  
     }
 592  
 
 593  
     /**
 594  
      * Handles 'S' cases.
 595  
      */
 596  
     private int handleS(final String value, final DoubleMetaphoneResult result, int index,
 597  
                         final boolean slavoGermanic) {
 598  2151
         if (contains(value, index - 1, 3, "ISL", "YSL")) {
 599  
             //-- special cases "island", "isle", "carlisle", "carlysle" --//
 600  12
             index++;
 601  2139
         } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
 602  
             //-- special case "sugar-" --//
 603  4
             result.append('X', 'S');
 604  4
             index++;
 605  2135
         } else if (contains(value, index, 2, "SH")) {
 606  78
             if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) {
 607  
                 //-- germanic --//
 608  6
                 result.append('S');
 609  
             } else {
 610  72
                 result.append('X');
 611  
             }
 612  78
             index += 2;
 613  2057
         } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
 614  
             //-- Italian and Armenian --//
 615  50
             if (slavoGermanic) {
 616  0
                 result.append('S');
 617  
             } else {
 618  50
                 result.append('S', 'X');
 619  
             }
 620  50
             index += 3;
 621  2007
         } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) ||
 622  
                    contains(value, index + 1, 1, "Z")) {
 623  
             //-- german & anglicisations, e.g. "smith" match "schmidt" //
 624  
             // "snider" match "schneider" --//
 625  
             //-- also, -sz- in slavic language altho in hungarian it //
 626  
             //   is pronounced "s" --//
 627  48
             result.append('S', 'X');
 628  48
             index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
 629  1959
         } else if (contains(value, index, 2, "SC")) {
 630  114
             index = handleSC(value, result, index);
 631  
         } else {
 632  1845
             if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) {
 633  
                 //-- french e.g. "resnais", "artois" --//
 634  4
                 result.appendAlternate('S');
 635  
             } else {
 636  1841
                 result.append('S');
 637  
             }
 638  1845
             index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
 639  
         }
 640  2151
         return index;
 641  
     }
 642  
 
 643  
     /**
 644  
      * Handles 'SC' cases.
 645  
      */
 646  
     private int handleSC(final String value, final DoubleMetaphoneResult result, final int index) {
 647  114
         if (charAt(value, index + 2) == 'H') {
 648  
             //-- Schlesinger's rule --//
 649  38
             if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) {
 650  
                 //-- Dutch origin, e.g. "school", "schooner" --//
 651  10
                 if (contains(value, index + 3, 2, "ER", "EN")) {
 652  
                     //-- "schermerhorn", "schenker" --//
 653  6
                     result.append("X", "SK");
 654  
                 } else {
 655  4
                     result.append("SK");
 656  
                 }
 657  
             } else {
 658  28
                 if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
 659  10
                     result.append('X', 'S');
 660  
                 } else {
 661  18
                     result.append('X');
 662  
                 }
 663  
             }
 664  76
         } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
 665  12
             result.append('S');
 666  
         } else {
 667  64
             result.append("SK");
 668  
         }
 669  114
         return index + 3;
 670  
     }
 671  
 
 672  
     /**
 673  
      * Handles 'T' cases.
 674  
      */
 675  
     private int handleT(final String value, final DoubleMetaphoneResult result, int index) {
 676  2224
         if (contains(value, index, 4, "TION")) {
 677  52
             result.append('X');
 678  52
             index += 3;
 679  2172
         } else if (contains(value, index, 3, "TIA", "TCH")) {
 680  17
             result.append('X');
 681  17
             index += 3;
 682  2155
         } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) {
 683  168
             if (contains(value, index + 2, 2, "OM", "AM") ||
 684  
                 //-- special case "thomas", "thames" or germanic --//
 685  
                 contains(value, 0, 4, "VAN ", "VON ") ||
 686  
                 contains(value, 0, 3, "SCH")) {
 687  10
                 result.append('T');
 688  
             } else {
 689  158
                 result.append('0', 'T');
 690  
             }
 691  168
             index += 2;
 692  
         } else {
 693  1987
             result.append('T');
 694  1987
             index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
 695  
         }
 696  2224
         return index;
 697  
     }
 698  
 
 699  
     /**
 700  
      * Handles 'W' cases.
 701  
      */
 702  
     private int handleW(final String value, final DoubleMetaphoneResult result, int index) {
 703  519
         if (contains(value, index, 2, "WR")) {
 704  
             //-- can also be in middle of word --//
 705  12
             result.append('R');
 706  12
             index += 2;
 707  
         } else {
 708  507
             if (index == 0 && (isVowel(charAt(value, index + 1)) ||
 709  
                                contains(value, index, 2, "WH"))) {
 710  216
                 if (isVowel(charAt(value, index + 1))) {
 711  
                     //-- Wasserman should match Vasserman --//
 712  186
                     result.append('A', 'F');
 713  
                 } else {
 714  
                     //-- need Uomo to match Womo --//
 715  30
                     result.append('A');
 716  
                 }
 717  216
                 index++;
 718  291
             } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) ||
 719  
                        contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
 720  
                        contains(value, 0, 3, "SCH")) {
 721  
                 //-- Arnow should match Arnoff --//
 722  46
                 result.appendAlternate('F');
 723  46
                 index++;
 724  245
             } else if (contains(value, index, 4, "WICZ", "WITZ")) {
 725  
                 //-- Polish e.g. "filipowicz" --//
 726  16
                 result.append("TS", "FX");
 727  16
                 index += 4;
 728  
             } else {
 729  229
                 index++;
 730  
             }
 731  
         }
 732  519
         return index;
 733  
     }
 734  
 
 735  
     /**
 736  
      * Handles 'X' cases.
 737  
      */
 738  
     private int handleX(final String value, final DoubleMetaphoneResult result, int index) {
 739  152
         if (index == 0) {
 740  5
             result.append('S');
 741  5
             index++;
 742  
         } else {
 743  147
             if (!((index == value.length() - 1) &&
 744  
                   (contains(value, index - 3, 3, "IAU", "EAU") ||
 745  
                    contains(value, index - 2, 2, "AU", "OU")))) {
 746  
                 //-- French e.g. breaux --//
 747  141
                 result.append("KS");
 748  
             }
 749  147
             index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
 750  
         }
 751  152
         return index;
 752  
     }
 753  
 
 754  
     /**
 755  
      * Handles 'Z' cases.
 756  
      */
 757  
     private int handleZ(final String value, final DoubleMetaphoneResult result, int index,
 758  
                         final boolean slavoGermanic) {
 759  97
         if (charAt(value, index + 1) == 'H') {
 760  
             //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
 761  2
             result.append('J');
 762  2
             index += 2;
 763  
         } else {
 764  95
             if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") ||
 765  
                 (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) {
 766  12
                 result.append("S", "TS");
 767  
             } else {
 768  83
                 result.append('S');
 769  
             }
 770  95
             index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
 771  
         }
 772  97
         return index;
 773  
     }
 774  
 
 775  
     //-- BEGIN CONDITIONS --//
 776  
 
 777  
     /**
 778  
      * Complex condition 0 for 'C'.
 779  
      */
 780  
     private boolean conditionC0(final String value, final int index) {
 781  1680
         if (contains(value, index, 4, "CHIA")) {
 782  2
             return true;
 783  1678
         } else if (index <= 1) {
 784  680
             return false;
 785  998
         } else if (isVowel(charAt(value, index - 2))) {
 786  357
             return false;
 787  641
         } else if (!contains(value, index - 1, 3, "ACH")) {
 788  621
             return false;
 789  
         } else {
 790  20
             final char c = charAt(value, index + 2);
 791  20
             return (c != 'I' && c != 'E') ||
 792  
                     contains(value, index - 2, 6, "BACHER", "MACHER");
 793  
         }
 794  
     }
 795  
 
 796  
     /**
 797  
      * Complex condition 0 for 'CH'.
 798  
      */
 799  
     private boolean conditionCH0(final String value, final int index) {
 800  156
         if (index != 0) {
 801  114
             return false;
 802  42
         } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") &&
 803  
                    !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
 804  38
             return false;
 805  4
         } else if (contains(value, 0, 5, "CHORE")) {
 806  0
             return false;
 807  
         } else {
 808  4
             return true;
 809  
         }
 810  
     }
 811  
 
 812  
     /**
 813  
      * Complex condition 1 for 'CH'.
 814  
      */
 815  
     private boolean conditionCH1(final String value, final int index) {
 816  152
         return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) ||
 817  
                 contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
 818  
                 contains(value, index + 2, 1, "T", "S") ||
 819  
                 ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
 820  
                  (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1)));
 821  
     }
 822  
 
 823  
     /**
 824  
      * Complex condition 0 for 'L'.
 825  
      */
 826  
     private boolean conditionL0(final String value, final int index) {
 827  353
         if (index == value.length() - 3 &&
 828  
             contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
 829  2
             return true;
 830  351
         } else if ((contains(value, value.length() - 2, 2, "AS", "OS") ||
 831  
                     contains(value, value.length() - 1, 1, "A", "O")) &&
 832  
                    contains(value, index - 1, 4, "ALLE")) {
 833  2
             return true;
 834  
         } else {
 835  349
             return false;
 836  
         }
 837  
     }
 838  
 
 839  
     /**
 840  
      * Complex condition 0 for 'M'.
 841  
      */
 842  
     private boolean conditionM0(final String value, final int index) {
 843  1241
         if (charAt(value, index + 1) == 'M') {
 844  100
             return true;
 845  
         }
 846  1141
         return contains(value, index - 1, 3, "UMB") &&
 847  
                ((index + 1) == value.length() - 1 || contains(value, index + 2, 2, "ER"));
 848  
     }
 849  
 
 850  
     //-- BEGIN HELPER FUNCTIONS --//
 851  
 
 852  
     /**
 853  
      * Determines whether or not a value is of slavo-germanic orgin. A value is
 854  
      * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
 855  
      */
 856  
     private boolean isSlavoGermanic(final String value) {
 857  6492
         return value.indexOf('W') > -1 || value.indexOf('K') > -1 ||
 858  
             value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1;
 859  
     }
 860  
 
 861  
     /**
 862  
      * Determines whether or not a character is a vowel or not
 863  
      */
 864  
     private boolean isVowel(final char ch) {
 865  2201
         return VOWELS.indexOf(ch) != -1;
 866  
     }
 867  
 
 868  
     /**
 869  
      * Determines whether or not the value starts with a silent letter.  It will
 870  
      * return {@code true} if the value starts with any of 'GN', 'KN',
 871  
      * 'PN', 'WR' or 'PS'.
 872  
      */
 873  
     private boolean isSilentStart(final String value) {
 874  6492
         boolean result = false;
 875  38804
         for (final String element : SILENT_START) {
 876  32380
             if (value.startsWith(element)) {
 877  68
                 result = true;
 878  68
                 break;
 879  
             }
 880  
         }
 881  6492
         return result;
 882  
     }
 883  
 
 884  
     /**
 885  
      * Cleans the input.
 886  
      */
 887  
     private String cleanInput(String input) {
 888  6504
         if (input == null) {
 889  3
             return null;
 890  
         }
 891  6501
         input = input.trim();
 892  6501
         if (input.length() == 0) {
 893  9
             return null;
 894  
         }
 895  6492
         return input.toUpperCase(java.util.Locale.ENGLISH);
 896  
     }
 897  
 
 898  
     /**
 899  
      * Gets the character at index <code>index</code> if available, otherwise
 900  
      * it returns <code>Character.MIN_VALUE</code> so that there is some sort
 901  
      * of a default.
 902  
      */
 903  
     protected char charAt(final String value, final int index) {
 904  17628
         if (index < 0 || index >= value.length()) {
 905  1502
             return Character.MIN_VALUE;
 906  
         }
 907  16126
         return value.charAt(index);
 908  
     }
 909  
 
 910  
     /**
 911  
      * Determines whether <code>value</code> contains any of the criteria starting at index <code>start</code> and
 912  
      * matching up to length <code>length</code>.
 913  
      */
 914  
     protected static boolean contains(final String value, final int start, final int length,
 915  
                                       final String... criteria) {
 916  53209
         boolean result = false;
 917  53209
         if (start >= 0 && start + length <= value.length()) {
 918  45038
             final String target = value.substring(start, start + length);
 919  
 
 920  115116
             for (final String element : criteria) {
 921  72502
                 if (target.equals(element)) {
 922  2424
                     result = true;
 923  2424
                     break;
 924  
                 }
 925  
             }
 926  
         }
 927  53209
         return result;
 928  
     }
 929  
 
 930  
     //-- BEGIN INNER CLASSES --//
 931  
 
 932  
     /**
 933  
      * Inner class for storing results, since there is the optional alternate encoding.
 934  
      */
 935  
     public class DoubleMetaphoneResult {
 936  
 
 937  6492
         private final StringBuilder primary = new StringBuilder(getMaxCodeLen());
 938  6492
         private final StringBuilder alternate = new StringBuilder(getMaxCodeLen());
 939  
         private final int maxLength;
 940  
 
 941  6492
         public DoubleMetaphoneResult(final int maxLength) {
 942  6492
             this.maxLength = maxLength;
 943  6492
         }
 944  
 
 945  
         public void append(final char value) {
 946  21356
             appendPrimary(value);
 947  21356
             appendAlternate(value);
 948  21356
         }
 949  
 
 950  
         public void append(final char primary, final char alternate) {
 951  863
             appendPrimary(primary);
 952  863
             appendAlternate(alternate);
 953  863
         }
 954  
 
 955  
         public void appendPrimary(final char value) {
 956  22223
             if (this.primary.length() < this.maxLength) {
 957  22221
                 this.primary.append(value);
 958  
             }
 959  22223
         }
 960  
 
 961  
         public void appendAlternate(final char value) {
 962  22281
             if (this.alternate.length() < this.maxLength) {
 963  22263
                 this.alternate.append(value);
 964  
             }
 965  22281
         }
 966  
 
 967  
         public void append(final String value) {
 968  233
             appendPrimary(value);
 969  233
             appendAlternate(value);
 970  233
         }
 971  
 
 972  
         public void append(final String primary, final String alternate) {
 973  60
             appendPrimary(primary);
 974  60
             appendAlternate(alternate);
 975  60
         }
 976  
 
 977  
         public void appendPrimary(final String value) {
 978  293
             final int addChars = this.maxLength - this.primary.length();
 979  293
             if (value.length() <= addChars) {
 980  249
                 this.primary.append(value);
 981  
             } else {
 982  44
                 this.primary.append(value.substring(0, addChars));
 983  
             }
 984  293
         }
 985  
 
 986  
         public void appendAlternate(final String value) {
 987  293
             final int addChars = this.maxLength - this.alternate.length();
 988  293
             if (value.length() <= addChars) {
 989  237
                 this.alternate.append(value);
 990  
             } else {
 991  56
                 this.alternate.append(value.substring(0, addChars));
 992  
             }
 993  293
         }
 994  
 
 995  
         public String getPrimary() {
 996  3299
             return this.primary.toString();
 997  
         }
 998  
 
 999  
         public String getAlternate() {
 1000  3193
             return this.alternate.toString();
 1001  
         }
 1002  
 
 1003  
         public boolean isComplete() {
 1004  42088
             return this.primary.length() >= this.maxLength &&
 1005  
                    this.alternate.length() >= this.maxLength;
 1006  
         }
 1007  
     }
 1008  
 }