Coverage Report - org.apache.commons.codec.language.SoundexUtils
 
Classes in this File Line Coverage Branch Coverage Complexity
SoundexUtils
100%
21/21
100%
18/18
5
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *      http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.commons.codec.language;
 19  
 
 20  
 import org.apache.commons.codec.EncoderException;
 21  
 import org.apache.commons.codec.StringEncoder;
 22  
 
 23  
 /**
 24  
  * Utility methods for {@link Soundex} and {@link RefinedSoundex} classes.
 25  
  *
 26  
  * <p>This class is immutable and thread-safe.</p>
 27  
  *
 28  
  * @version $Id$
 29  
  * @since 1.3
 30  
  */
 31  1
 final class SoundexUtils {
 32  
 
 33  
     /**
 34  
      * Cleans up the input string before Soundex processing by only returning
 35  
      * upper case letters.
 36  
      *
 37  
      * @param str
 38  
      *                  The String to clean.
 39  
      * @return A clean String.
 40  
      */
 41  
     static String clean(String str) {
 42  324
         if (str == null || str.length() == 0) {
 43  9
             return str;
 44  
         }
 45  315
         int len = str.length();
 46  315
         char[] chars = new char[len];
 47  315
         int count = 0;
 48  2064
         for (int i = 0; i < len; i++) {
 49  1749
             if (Character.isLetter(str.charAt(i))) {
 50  1704
                 chars[count++] = str.charAt(i);
 51  
             }
 52  
         }
 53  315
         if (count == len) {
 54  279
             return str.toUpperCase(java.util.Locale.ENGLISH);
 55  
         }
 56  36
         return new String(chars, 0, count).toUpperCase(java.util.Locale.ENGLISH);
 57  
     }
 58  
 
 59  
     /**
 60  
      * Encodes the Strings and returns the number of characters in the two
 61  
      * encoded Strings that are the same.
 62  
      * <ul>
 63  
      * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates
 64  
      * little or no similarity, and 4 indicates strong similarity or identical
 65  
      * values.</li>
 66  
      * <li>For refined Soundex, the return value can be greater than 4.</li>
 67  
      * </ul>
 68  
      *
 69  
      * @param encoder
 70  
      *                  The encoder to use to encode the Strings.
 71  
      * @param s1
 72  
      *                  A String that will be encoded and compared.
 73  
      * @param s2
 74  
      *                  A String that will be encoded and compared.
 75  
      * @return The number of characters in the two Soundex encoded Strings that
 76  
      *             are the same.
 77  
      *
 78  
      * @see #differenceEncoded(String,String)
 79  
      * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
 80  
      *          MS T-SQL DIFFERENCE</a>
 81  
      *
 82  
      * @throws EncoderException
 83  
      *                  if an error occurs encoding one of the strings
 84  
      */
 85  
     static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException {
 86  24
         return differenceEncoded(encoder.encode(s1), encoder.encode(s2));
 87  
     }
 88  
 
 89  
     /**
 90  
      * Returns the number of characters in the two Soundex encoded Strings that
 91  
      * are the same.
 92  
      * <ul>
 93  
      * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates
 94  
      * little or no similarity, and 4 indicates strong similarity or identical
 95  
      * values.</li>
 96  
      * <li>For refined Soundex, the return value can be greater than 4.</li>
 97  
      * </ul>
 98  
      *
 99  
      * @param es1
 100  
      *                  An encoded String.
 101  
      * @param es2
 102  
      *                  An encoded String.
 103  
      * @return The number of characters in the two Soundex encoded Strings that
 104  
      *             are the same.
 105  
      *
 106  
      * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
 107  
      *          MS T-SQL DIFFERENCE</a>
 108  
      */
 109  
     static int differenceEncoded(String es1, String es2) {
 110  
 
 111  26
         if (es1 == null || es2 == null) {
 112  4
             return 0;
 113  
         }
 114  22
         int lengthToMatch = Math.min(es1.length(), es2.length());
 115  22
         int diff = 0;
 116  112
         for (int i = 0; i < lengthToMatch; i++) {
 117  90
             if (es1.charAt(i) == es2.charAt(i)) {
 118  57
                 diff++;
 119  
             }
 120  
         }
 121  22
         return diff;
 122  
     }
 123  
 
 124  
 }