Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
SoundexUtils |
|
| 5.0;5 |
1 | /* | |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.commons.codec.language; | |
19 | ||
20 | import org.apache.commons.codec.EncoderException; | |
21 | import org.apache.commons.codec.StringEncoder; | |
22 | ||
23 | /** | |
24 | * Utility methods for {@link Soundex} and {@link RefinedSoundex} classes. | |
25 | * | |
26 | * <p>This class is immutable and thread-safe.</p> | |
27 | * | |
28 | * @version $Id$ | |
29 | * @since 1.3 | |
30 | */ | |
31 | 1 | final class SoundexUtils { |
32 | ||
33 | /** | |
34 | * Cleans up the input string before Soundex processing by only returning | |
35 | * upper case letters. | |
36 | * | |
37 | * @param str | |
38 | * The String to clean. | |
39 | * @return A clean String. | |
40 | */ | |
41 | static String clean(String str) { | |
42 | 324 | if (str == null || str.length() == 0) { |
43 | 9 | return str; |
44 | } | |
45 | 315 | int len = str.length(); |
46 | 315 | char[] chars = new char[len]; |
47 | 315 | int count = 0; |
48 | 2064 | for (int i = 0; i < len; i++) { |
49 | 1749 | if (Character.isLetter(str.charAt(i))) { |
50 | 1704 | chars[count++] = str.charAt(i); |
51 | } | |
52 | } | |
53 | 315 | if (count == len) { |
54 | 279 | return str.toUpperCase(java.util.Locale.ENGLISH); |
55 | } | |
56 | 36 | return new String(chars, 0, count).toUpperCase(java.util.Locale.ENGLISH); |
57 | } | |
58 | ||
59 | /** | |
60 | * Encodes the Strings and returns the number of characters in the two | |
61 | * encoded Strings that are the same. | |
62 | * <ul> | |
63 | * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates | |
64 | * little or no similarity, and 4 indicates strong similarity or identical | |
65 | * values.</li> | |
66 | * <li>For refined Soundex, the return value can be greater than 4.</li> | |
67 | * </ul> | |
68 | * | |
69 | * @param encoder | |
70 | * The encoder to use to encode the Strings. | |
71 | * @param s1 | |
72 | * A String that will be encoded and compared. | |
73 | * @param s2 | |
74 | * A String that will be encoded and compared. | |
75 | * @return The number of characters in the two Soundex encoded Strings that | |
76 | * are the same. | |
77 | * | |
78 | * @see #differenceEncoded(String,String) | |
79 | * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> | |
80 | * MS T-SQL DIFFERENCE</a> | |
81 | * | |
82 | * @throws EncoderException | |
83 | * if an error occurs encoding one of the strings | |
84 | */ | |
85 | static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException { | |
86 | 24 | return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); |
87 | } | |
88 | ||
89 | /** | |
90 | * Returns the number of characters in the two Soundex encoded Strings that | |
91 | * are the same. | |
92 | * <ul> | |
93 | * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates | |
94 | * little or no similarity, and 4 indicates strong similarity or identical | |
95 | * values.</li> | |
96 | * <li>For refined Soundex, the return value can be greater than 4.</li> | |
97 | * </ul> | |
98 | * | |
99 | * @param es1 | |
100 | * An encoded String. | |
101 | * @param es2 | |
102 | * An encoded String. | |
103 | * @return The number of characters in the two Soundex encoded Strings that | |
104 | * are the same. | |
105 | * | |
106 | * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> | |
107 | * MS T-SQL DIFFERENCE</a> | |
108 | */ | |
109 | static int differenceEncoded(String es1, String es2) { | |
110 | ||
111 | 26 | if (es1 == null || es2 == null) { |
112 | 4 | return 0; |
113 | } | |
114 | 22 | int lengthToMatch = Math.min(es1.length(), es2.length()); |
115 | 22 | int diff = 0; |
116 | 112 | for (int i = 0; i < lengthToMatch; i++) { |
117 | 90 | if (es1.charAt(i) == es2.charAt(i)) { |
118 | 57 | diff++; |
119 | } | |
120 | } | |
121 | 22 | return diff; |
122 | } | |
123 | ||
124 | } |