Coverage Report

Coverage Report - org.apache.commons.codec.language.MatchRatingApproachEncoder

Classes in this File

Line Coverage

Branch Coverage

Complexity

MatchRatingApproachEncoder

100%

113/113

97%

78/80

5.455

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.commons.codec.language;
 
 import java.util.Locale;
 
 import org.apache.commons.codec.EncoderException;
 import org.apache.commons.codec.StringEncoder;
 
 /**
  * Match Rating Approach Phonetic Algorithm Developed by <CITE>Western Airlines</CITE> in 1977.
  *
  * This class is immutable and thread-safe.
  *
  * @see <a href="http://en.wikipedia.org/wiki/Match_rating_approach">Wikipedia - Match Rating Approach</a>
  * @since 1.8
  */
 public class MatchRatingApproachEncoder implements StringEncoder {
 
     private static final String SPACE = " ";
 
     private static final String EMPTY = "";
 
     /**
      * Constants used mainly for the min rating value.
      */
     private static final int ONE = 1, TWO = 2, THREE = 3, FOUR = 4, FIVE = 5, SIX = 6, SEVEN = 7, EIGHT = 8,
                              ELEVEN = 11, TWELVE = 12;
 
     /**
      * The plain letter equivalent of the accented letters.
      */
     private static final String PLAIN_ASCII = "AaEeIiOoUu" + // grave
             "AaEeIiOoUuYy" + // acute
             "AaEeIiOoUuYy" + // circumflex
             "AaOoNn" + // tilde
             "AaEeIiOoUuYy" + // umlaut
             "Aa" + // ring
             "Cc" + // cedilla
             "OoUu"; // double acute
 
     /**
      * Unicode characters corresponding to various accented letters. For example: \u00DA is U acute etc...
      */
     private static final String UNICODE = "\u00C0\u00E0\u00C8\u00E8\u00CC\u00EC\u00D2\u00F2\u00D9\u00F9" +
             "\u00C1\u00E1\u00C9\u00E9\u00CD\u00ED\u00D3\u00F3\u00DA\u00FA\u00DD\u00FD" +
             "\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177" +
             "\u00C3\u00E3\u00D5\u00F5\u00D1\u00F1" +
             "\u00C4\u00E4\u00CB\u00EB\u00CF\u00EF\u00D6\u00F6\u00DC\u00FC\u0178\u00FF" +
             "\u00C5\u00E5" + "\u00C7\u00E7" + "\u0150\u0151\u0170\u0171";
 
     private static final String[] DOUBLE_CONSONANT =
             new String[] { "BB", "CC", "DD", "FF", "GG", "HH", "JJ", "KK", "LL", "MM", "NN", "PP", "QQ", "RR", "SS",
                            "TT", "VV", "WW", "XX", "YY", "ZZ" };
 
     /**
      * Cleans up a name: 1. Upper-cases everything 2. Removes some common punctuation 3. Removes accents 4. Removes any
      * spaces.
      *
      * <h2>API Usage</h2>
      * <p>
      * Consider this method private, it is package protected for unit testing only.
      * </p>
      *
      * @param name
      *            The name to be cleaned
      * @return The cleaned name
      */
     String cleanName(final String name) {
         String upperName = name.toUpperCase(Locale.ENGLISH);
 
         final String[] charsToTrim = { "\\-", "[&]", "\\'", "\\.", "[\\,]" };
         for (final String str : charsToTrim) {
             upperName = upperName.replaceAll(str, EMPTY);
         }
 
         upperName = removeAccents(upperName);
         upperName = upperName.replaceAll("\\s+", EMPTY);
 
         return upperName;
     }
 
     /**
      * Encodes an Object using the Match Rating Approach algo. Method is here to satisfy the requirements of the
      * Encoder interface Throws an EncoderException if input object is not of type java.lang.String.
      *
      * @param pObject
      *            Object to encode
      * @return An object (or type java.lang.String) containing the Match Rating Approach code which corresponds to the
      *         String supplied.
      * @throws EncoderException
      *             if the parameter supplied is not of type java.lang.String
      */
     @Override
     public final Object encode(final Object pObject) throws EncoderException {
         if (!(pObject instanceof String)) {
             throw new EncoderException(
                     "Parameter supplied to Match Rating Approach encoder is not of type java.lang.String");
         }
         return encode((String) pObject);
     }
 
     /**
      * Encodes a String using the Match Rating Approach (MRA) algorithm.
      *
      * @param name
      *            String object to encode
      * @return The MRA code corresponding to the String supplied
      */
     @Override
     public final String encode(String name) {
         // Bulletproof for trivial input - NINO
         if (name == null || EMPTY.equalsIgnoreCase(name) || SPACE.equalsIgnoreCase(name) || name.length() == 1) {
             return EMPTY;
         }
 
         // Preprocessing
         name = cleanName(name);
 
         // BEGIN: Actual encoding part of the algorithm...
         // 1. Delete all vowels unless the vowel begins the word
         name = removeVowels(name);
 
         // 2. Remove second consonant from any double consonant
         name = removeDoubleConsonants(name);
 
         // 3. Reduce codex to 6 letters by joining the first 3 and last 3 letters
         name = getFirst3Last3(name);
 
         return name;
     }
 
     /**
      * Gets the first & last 3 letters of a name (if > 6 characters) Else just returns the name.
      *
      * <h2>API Usage</h2>
      * <p>
      * Consider this method private, it is package protected for unit testing only.
      * </p>
      *
      * @param name
      *            The string to get the substrings from
      * @return Annexed first & last 3 letters of input word.
      */
     String getFirst3Last3(final String name) {
         final int nameLength = name.length();
 
         if (nameLength > SIX) {
             final String firstThree = name.substring(0, THREE);
             final String lastThree = name.substring(nameLength - THREE, nameLength);
             return firstThree + lastThree;
         } else {
             return name;
         }
     }
 
     /**
      * Obtains the min rating of the length sum of the 2 names. In essence the larger the sum length the smaller the
      * min rating. Values strictly from documentation.
      *
      * <h2>API Usage</h2>
      * <p>
      * Consider this method private, it is package protected for unit testing only.
      * </p>
      *
      * @param sumLength
      *            The length of 2 strings sent down
      * @return The min rating value
      */
     int getMinRating(final int sumLength) {
         int minRating = 0;
 
         if (sumLength <= FOUR) {
             minRating = FIVE;
         } else if (sumLength >= FIVE && sumLength <= SEVEN) {
             minRating = FOUR;
         } else if (sumLength >= EIGHT && sumLength <= ELEVEN) {
             minRating = THREE;
         } else if (sumLength == TWELVE) {
             minRating = TWO;
         } else {
             minRating = ONE; // docs said little here.
         }
 
         return minRating;
     }
 
     /**
      * Determines if two names are homophonous via Match Rating Approach (MRA) algorithm. It should be noted that the
      * strings are cleaned in the same way as {@link #encode(String)}.
      *
      * @param name1
      *            First of the 2 strings (names) to compare
      * @param name2
      *            Second of the 2 names to compare
      * @return <code>true</code> if the encodings are identical <code>false</code> otherwise.
      */
     public boolean isEncodeEquals(String name1, String name2) {
         // Bulletproof for trivial input - NINO
         if (name1 == null || EMPTY.equalsIgnoreCase(name1) || SPACE.equalsIgnoreCase(name1)) {
             return false;
         } else if (name2 == null || EMPTY.equalsIgnoreCase(name2) || SPACE.equalsIgnoreCase(name2)) {
             return false;
         } else if (name1.length() == 1 || name2.length() == 1) {
             return false;
         } else if (name1.equalsIgnoreCase(name2)) {
             return true;
         }
 
         // Preprocessing
         name1 = cleanName(name1);
         name2 = cleanName(name2);
 
         // Actual MRA Algorithm
 
         // 1. Remove vowels
         name1 = removeVowels(name1);
         name2 = removeVowels(name2);
 
         // 2. Remove double consonants
         name1 = removeDoubleConsonants(name1);
         name2 = removeDoubleConsonants(name2);
 
         // 3. Reduce down to 3 letters
         name1 = getFirst3Last3(name1);
         name2 = getFirst3Last3(name2);
 
         // 4. Check for length difference - if 3 or greater then no similarity
         // comparison is done
         if (Math.abs(name1.length() - name2.length()) >= THREE) {
             return false;
         }
 
         // 5. Obtain the minimum rating value by calculating the length sum of the
         // encoded Strings and sending it down.
         final int sumLength = Math.abs(name1.length() + name2.length());
         int minRating = 0;
         minRating = getMinRating(sumLength);
 
         // 6. Process the encoded Strings from left to right and remove any
         // identical characters found from both Strings respectively.
         final int count = leftToRightThenRightToLeftProcessing(name1, name2);
 
         // 7. Each PNI item that has a similarity rating equal to or greater than
         // the min is considered to be a good candidate match
         return count >= minRating;
 
     }
 
     /**
      * Determines if a letter is a vowel.
      *
      * <h2>API Usage</h2>
      * <p>
      * Consider this method private, it is package protected for unit testing only.
      * </p>
      *
      * @param letter
      *            The letter under investiagtion
      * @return True if a vowel, else false
      */
     boolean isVowel(final String letter) {
         return letter.equalsIgnoreCase("E") || letter.equalsIgnoreCase("A") || letter.equalsIgnoreCase("O") ||
                letter.equalsIgnoreCase("I") || letter.equalsIgnoreCase("U");
     }
 
     /**
      * Processes the names from left to right (first) then right to left removing identical letters in same positions.
      * Then subtracts the longer string that remains from 6 and returns this.
      *
      * <h2>API Usage</h2>
      * <p>
      * Consider this method private, it is package protected for unit testing only.
      * </p>
      *
      * @param name1
      *            name2
      * @return
      */
     int leftToRightThenRightToLeftProcessing(final String name1, final String name2) {
         final char[] name1Char = name1.toCharArray();
         final char[] name2Char = name2.toCharArray();
 
         final int name1Size = name1.length() - 1;
         final int name2Size = name2.length() - 1;
 
         String name1LtRStart = EMPTY;
         String name1LtREnd = EMPTY;
 
         String name2RtLStart = EMPTY;
         String name2RtLEnd = EMPTY;
 
         for (int i = 0; i < name1Char.length; i++) {
             if (i > name2Size) {
                 break;
             }
 
             name1LtRStart = name1.substring(i, i + 1);
             name1LtREnd = name1.substring(name1Size - i, name1Size - i + 1);
 
             name2RtLStart = name2.substring(i, i + 1);
             name2RtLEnd = name2.substring(name2Size - i, name2Size - i + 1);
 
             // Left to right...
             if (name1LtRStart.equals(name2RtLStart)) {
                 name1Char[i] = ' ';
                 name2Char[i] = ' ';
             }
 
             // Right to left...
             if (name1LtREnd.equals(name2RtLEnd)) {
                 name1Char[name1Size - i] = ' ';
                 name2Char[name2Size - i] = ' ';
             }
         }
 
         // Char arrays -> string & remove extraneous space
         final String strA = new String(name1Char).replaceAll("\\s+", EMPTY);
         final String strB = new String(name2Char).replaceAll("\\s+", EMPTY);
 
         // Final bit - subtract longest string from 6 and return this int value
         if (strA.length() > strB.length()) {
             return Math.abs(SIX - strA.length());
         } else {
             return Math.abs(SIX - strB.length());
         }
     }
 
     /**
      * Removes accented letters and replaces with non-accented ascii equivalent Case is preserved.
      * http://www.codecodex.com/wiki/Remove_accent_from_letters_%28ex_.%C3%A9_to_e%29
      *
      * @param accentedWord
      *            The word that may have accents in it.
      * @return De-accented word
      */
     String removeAccents(final String accentedWord) {
         if (accentedWord == null) {
             return null;
         }
 
         final StringBuilder sb = new StringBuilder();
         final int n = accentedWord.length();
 
         for (int i = 0; i < n; i++) {
             final char c = accentedWord.charAt(i);
             final int pos = UNICODE.indexOf(c);
             if (pos > -1) {
                 sb.append(PLAIN_ASCII.charAt(pos));
             } else {
                 sb.append(c);
             }
         }
 
         return sb.toString();
     }
 
     /**
      * Replaces any double consonant pair with the single letter equivalent.
      *
      * <h2>API Usage</h2>
      * <p>
      * Consider this method private, it is package protected for unit testing only.
      * </p>
      *
      * @param name
      *            String to have double consonants removed
      * @return Single consonant word
      */
     String removeDoubleConsonants(final String name) {
         String replacedName = name.toUpperCase();
         for (final String dc : DOUBLE_CONSONANT) {
             if (replacedName.contains(dc)) {
                 final String singleLetter = dc.substring(0, 1);
                 replacedName = replacedName.replace(dc, singleLetter);
             }
         }
         return replacedName;
     }
 
     /**
      * Deletes all vowels unless the vowel begins the word.
      *
      * <h2>API Usage</h2>
      * <p>
      * Consider this method private, it is package protected for unit testing only.
      * </p>
      *
      * @param name
      *            The name to have vowels removed
      * @return De-voweled word
      */
     String removeVowels(String name) {
         // Extract first letter
         final String firstLetter = name.substring(0, 1);
 
         name = name.replaceAll("A", EMPTY);
         name = name.replaceAll("E", EMPTY);
         name = name.replaceAll("I", EMPTY);
         name = name.replaceAll("O", EMPTY);
         name = name.replaceAll("U", EMPTY);
 
         name = name.replaceAll("\\s{2,}\\b", SPACE);
 
         // return isVowel(firstLetter) ? (firstLetter + name) : name;
         if (isVowel(firstLetter)) {
             return firstLetter + name;
         } else {
             return name;
         }
     }
 }

1		/*
2		* Licensed to the Apache Software Foundation (ASF) under one or more
3		* contributor license agreements. See the NOTICE file distributed with
4		* this work for additional information regarding copyright ownership.
5		* The ASF licenses this file to You under the Apache License, Version 2.0
6		* (the "License"); you may not use this file except in compliance with
7		* the License. You may obtain a copy of the License at
8		*
9		* http://www.apache.org/licenses/LICENSE-2.0
10		*
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*/
17		package org.apache.commons.codec.language;
18
19		import java.util.Locale;
20
21		import org.apache.commons.codec.EncoderException;
22		import org.apache.commons.codec.StringEncoder;
23
24		/**
25		* Match Rating Approach Phonetic Algorithm Developed by <CITE>Western Airlines</CITE> in 1977.
26		*
27		* This class is immutable and thread-safe.
28		*
29		* @see <a href="http://en.wikipedia.org/wiki/Match_rating_approach">Wikipedia - Match Rating Approach</a>
30		* @since 1.8
31		*/
32	96	public class MatchRatingApproachEncoder implements StringEncoder {
33
34		private static final String SPACE = " ";
35
36		private static final String EMPTY = "";
37
38		/**
39		* Constants used mainly for the min rating value.
40		*/
41		private static final int ONE = 1, TWO = 2, THREE = 3, FOUR = 4, FIVE = 5, SIX = 6, SEVEN = 7, EIGHT = 8,
42		ELEVEN = 11, TWELVE = 12;
43
44		/**
45		* The plain letter equivalent of the accented letters.
46		*/
47		private static final String PLAIN_ASCII = "AaEeIiOoUu" + // grave
48		"AaEeIiOoUuYy" + // acute
49		"AaEeIiOoUuYy" + // circumflex
50		"AaOoNn" + // tilde
51		"AaEeIiOoUuYy" + // umlaut
52		"Aa" + // ring
53		"Cc" + // cedilla
54		"OoUu"; // double acute
55
56		/**
57		* Unicode characters corresponding to various accented letters. For example: \u00DA is U acute etc...
58		*/
59		private static final String UNICODE = "\u00C0\u00E0\u00C8\u00E8\u00CC\u00EC\u00D2\u00F2\u00D9\u00F9" +
60		"\u00C1\u00E1\u00C9\u00E9\u00CD\u00ED\u00D3\u00F3\u00DA\u00FA\u00DD\u00FD" +
61		"\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177" +
62		"\u00C3\u00E3\u00D5\u00F5\u00D1\u00F1" +
63		"\u00C4\u00E4\u00CB\u00EB\u00CF\u00EF\u00D6\u00F6\u00DC\u00FC\u0178\u00FF" +
64		"\u00C5\u00E5" + "\u00C7\u00E7" + "\u0150\u0151\u0170\u0171";
65
66	1	private static final String[] DOUBLE_CONSONANT =
67		new String[] { "BB", "CC", "DD", "FF", "GG", "HH", "JJ", "KK", "LL", "MM", "NN", "PP", "QQ", "RR", "SS",
68		"TT", "VV", "WW", "XX", "YY", "ZZ" };
69
70		/**
71		* Cleans up a name: 1. Upper-cases everything 2. Removes some common punctuation 3. Removes accents 4. Removes any
72		* spaces.
73		*
74		* <h2>API Usage</h2>
75		* <p>
76		* Consider this method private, it is package protected for unit testing only.
77		* </p>
78		*
79		* @param name
80		* The name to be cleaned
81		* @return The cleaned name
82		*/
83		String cleanName(final String name) {
84	78	String upperName = name.toUpperCase(Locale.ENGLISH);
85
86	78	final String[] charsToTrim = { "\\-", "[&]", "\\'", "\\.", "[\\,]" };
87	468	for (final String str : charsToTrim) {
88	390	upperName = upperName.replaceAll(str, EMPTY);
89		}
90
91	78	upperName = removeAccents(upperName);
92	78	upperName = upperName.replaceAll("\\s+", EMPTY);
93
94	78	return upperName;
95		}
96
97		/**
98		* Encodes an Object using the Match Rating Approach algo. Method is here to satisfy the requirements of the
99		* Encoder interface Throws an EncoderException if input object is not of type java.lang.String.
100		*
101		* @param pObject
102		* Object to encode
103		* @return An object (or type java.lang.String) containing the Match Rating Approach code which corresponds to the
104		* String supplied.
105		* @throws EncoderException
106		* if the parameter supplied is not of type java.lang.String
107		*/
108		@Override
109		public final Object encode(final Object pObject) throws EncoderException {
110	4	if (!(pObject instanceof String)) {
111	1	throw new EncoderException(
112		"Parameter supplied to Match Rating Approach encoder is not of type java.lang.String");
113		}
114	3	return encode((String) pObject);
115		}
116
117		/**
118		* Encodes a String using the Match Rating Approach (MRA) algorithm.
119		*
120		* @param name
121		* String object to encode
122		* @return The MRA code corresponding to the String supplied
123		*/
124		@Override
125		public final String encode(String name) {
126		// Bulletproof for trivial input - NINO
127	17	if (name == null \|\| EMPTY.equalsIgnoreCase(name) \|\| SPACE.equalsIgnoreCase(name) \|\| name.length() == 1) {
128	14	return EMPTY;
129		}
130
131		// Preprocessing
132	3	name = cleanName(name);
133
134		// BEGIN: Actual encoding part of the algorithm...
135		// 1. Delete all vowels unless the vowel begins the word
136	3	name = removeVowels(name);
137
138		// 2. Remove second consonant from any double consonant
139	3	name = removeDoubleConsonants(name);
140
141		// 3. Reduce codex to 6 letters by joining the first 3 and last 3 letters
142	3	name = getFirst3Last3(name);
143
144	3	return name;
145		}
146
147		/**
148		* Gets the first & last 3 letters of a name (if > 6 characters) Else just returns the name.
149		*
150		* <h2>API Usage</h2>
151		* <p>
152		* Consider this method private, it is package protected for unit testing only.
153		* </p>
154		*
155		* @param name
156		* The string to get the substrings from
157		* @return Annexed first & last 3 letters of input word.
158		*/
159		String getFirst3Last3(final String name) {
160	79	final int nameLength = name.length();
161
162	79	if (nameLength > SIX) {
163	11	final String firstThree = name.substring(0, THREE);
164	11	final String lastThree = name.substring(nameLength - THREE, nameLength);
165	11	return firstThree + lastThree;
166		} else {
167	68	return name;
168		}
169		}
170
171		/**
172		* Obtains the min rating of the length sum of the 2 names. In essence the larger the sum length the smaller the
173		* min rating. Values strictly from documentation.
174		*
175		* <h2>API Usage</h2>
176		* <p>
177		* Consider this method private, it is package protected for unit testing only.
178		* </p>
179		*
180		* @param sumLength
181		* The length of 2 strings sent down
182		* @return The min rating value
183		*/
184		int getMinRating(final int sumLength) {
185	47	int minRating = 0;
186
187	47	if (sumLength <= FOUR) {
188	4	minRating = FIVE;
189	43	} else if (sumLength >= FIVE && sumLength <= SEVEN) {
190	18	minRating = FOUR;
191	25	} else if (sumLength >= EIGHT && sumLength <= ELEVEN) {
192	17	minRating = THREE;
193	8	} else if (sumLength == TWELVE) {
194	7	minRating = TWO;
195		} else {
196	1	minRating = ONE; // docs said little here.
197		}
198
199	47	return minRating;
200		}
201
202		/**
203		* Determines if two names are homophonous via Match Rating Approach (MRA) algorithm. It should be noted that the
204		* strings are cleaned in the same way as {@link #encode(String)}.
205		*
206		* @param name1
207		* First of the 2 strings (names) to compare
208		* @param name2
209		* Second of the 2 names to compare
210		* @return <code>true</code> if the encodings are identical <code>false</code> otherwise.
211		*/
212		public boolean isEncodeEquals(String name1, String name2) {
213		// Bulletproof for trivial input - NINO
214	51	if (name1 == null \|\| EMPTY.equalsIgnoreCase(name1) \|\| SPACE.equalsIgnoreCase(name1)) {
215	5	return false;
216	46	} else if (name2 == null \|\| EMPTY.equalsIgnoreCase(name2) \|\| SPACE.equalsIgnoreCase(name2)) {
217	5	return false;
218	41	} else if (name1.length() == 1 \|\| name2.length() == 1) {
219	3	return false;
220	38	} else if (name1.equalsIgnoreCase(name2)) {
221	1	return true;
222		}
223
224		// Preprocessing
225	37	name1 = cleanName(name1);
226	37	name2 = cleanName(name2);
227
228		// Actual MRA Algorithm
229
230		// 1. Remove vowels
231	37	name1 = removeVowels(name1);
232	37	name2 = removeVowels(name2);
233
234		// 2. Remove double consonants
235	37	name1 = removeDoubleConsonants(name1);
236	37	name2 = removeDoubleConsonants(name2);
237
238		// 3. Reduce down to 3 letters
239	37	name1 = getFirst3Last3(name1);
240	37	name2 = getFirst3Last3(name2);
241
242		// 4. Check for length difference - if 3 or greater then no similarity
243		// comparison is done
244	37	if (Math.abs(name1.length() - name2.length()) >= THREE) {
245	1	return false;
246		}
247
248		// 5. Obtain the minimum rating value by calculating the length sum of the
249		// encoded Strings and sending it down.
250	36	final int sumLength = Math.abs(name1.length() + name2.length());
251	36	int minRating = 0;
252	36	minRating = getMinRating(sumLength);
253
254		// 6. Process the encoded Strings from left to right and remove any
255		// identical characters found from both Strings respectively.
256	36	final int count = leftToRightThenRightToLeftProcessing(name1, name2);
257
258		// 7. Each PNI item that has a similarity rating equal to or greater than
259		// the min is considered to be a good candidate match
260	36	return count >= minRating;
261
262		}
263
264		/**
265		* Determines if a letter is a vowel.
266		*
267		* <h2>API Usage</h2>
268		* <p>
269		* Consider this method private, it is package protected for unit testing only.
270		* </p>
271		*
272		* @param letter
273		* The letter under investiagtion
274		* @return True if a vowel, else false
275		*/
276		boolean isVowel(final String letter) {
277	83	return letter.equalsIgnoreCase("E") \|\| letter.equalsIgnoreCase("A") \|\| letter.equalsIgnoreCase("O") \|\|
278		letter.equalsIgnoreCase("I") \|\| letter.equalsIgnoreCase("U");
279		}
280
281		/**
282		* Processes the names from left to right (first) then right to left removing identical letters in same positions.
283		* Then subtracts the longer string that remains from 6 and returns this.
284		*
285		* <h2>API Usage</h2>
286		* <p>
287		* Consider this method private, it is package protected for unit testing only.
288		* </p>
289		*
290		* @param name1
291		* name2
292		* @return
293		*/
294		int leftToRightThenRightToLeftProcessing(final String name1, final String name2) {
295	38	final char[] name1Char = name1.toCharArray();
296	38	final char[] name2Char = name2.toCharArray();
297
298	38	final int name1Size = name1.length() - 1;
299	38	final int name2Size = name2.length() - 1;
300
301	38	String name1LtRStart = EMPTY;
302	38	String name1LtREnd = EMPTY;
303
304	38	String name2RtLStart = EMPTY;
305	38	String name2RtLEnd = EMPTY;
306
307	195	for (int i = 0; i < name1Char.length; i++) {
308	165	if (i > name2Size) {
309	8	break;
310		}
311
312	157	name1LtRStart = name1.substring(i, i + 1);
313	157	name1LtREnd = name1.substring(name1Size - i, name1Size - i + 1);
314
315	157	name2RtLStart = name2.substring(i, i + 1);
316	157	name2RtLEnd = name2.substring(name2Size - i, name2Size - i + 1);
317
318		// Left to right...
319	157	if (name1LtRStart.equals(name2RtLStart)) {
320	88	name1Char[i] = ' ';
321	88	name2Char[i] = ' ';
322		}
323
324		// Right to left...
325	157	if (name1LtREnd.equals(name2RtLEnd)) {
326	59	name1Char[name1Size - i] = ' ';
327	59	name2Char[name2Size - i] = ' ';
328		}
329		}
330
331		// Char arrays -> string & remove extraneous space
332	38	final String strA = new String(name1Char).replaceAll("\\s+", EMPTY);
333	38	final String strB = new String(name2Char).replaceAll("\\s+", EMPTY);
334
335		// Final bit - subtract longest string from 6 and return this int value
336	38	if (strA.length() > strB.length()) {
337	8	return Math.abs(SIX - strA.length());
338		} else {
339	30	return Math.abs(SIX - strB.length());
340		}
341		}
342
343		/**
344		* Removes accented letters and replaces with non-accented ascii equivalent Case is preserved.
345		* http://www.codecodex.com/wiki/Remove_accent_from_letters_%28ex_.%C3%A9_to_e%29
346		*
347		* @param accentedWord
348		* The word that may have accents in it.
349		* @return De-accented word
350		*/
351		String removeAccents(final String accentedWord) {
352	87	if (accentedWord == null) {
353	1	return null;
354		}
355
356	86	final StringBuilder sb = new StringBuilder();
357	86	final int n = accentedWord.length();
358
359	794	for (int i = 0; i < n; i++) {
360	708	final char c = accentedWord.charAt(i);
361	708	final int pos = UNICODE.indexOf(c);
362	708	if (pos > -1) {
363	60	sb.append(PLAIN_ASCII.charAt(pos));
364		} else {
365	648	sb.append(c);
366		}
367		}
368
369	86	return sb.toString();
370		}
371
372		/**
373		* Replaces any double consonant pair with the single letter equivalent.
374		*
375		* <h2>API Usage</h2>
376		* <p>
377		* Consider this method private, it is package protected for unit testing only.
378		* </p>
379		*
380		* @param name
381		* String to have double consonants removed
382		* @return Single consonant word
383		*/
384		String removeDoubleConsonants(final String name) {
385	80	String replacedName = name.toUpperCase();
386	1760	for (final String dc : DOUBLE_CONSONANT) {
387	1680	if (replacedName.contains(dc)) {
388	12	final String singleLetter = dc.substring(0, 1);
389	12	replacedName = replacedName.replace(dc, singleLetter);
390		}
391		}
392	80	return replacedName;
393		}
394
395		/**
396		* Deletes all vowels unless the vowel begins the word.
397		*
398		* <h2>API Usage</h2>
399		* <p>
400		* Consider this method private, it is package protected for unit testing only.
401		* </p>
402		*
403		* @param name
404		* The name to have vowels removed
405		* @return De-voweled word
406		*/
407		String removeVowels(String name) {
408		// Extract first letter
409	80	final String firstLetter = name.substring(0, 1);
410
411	80	name = name.replaceAll("A", EMPTY);
412	80	name = name.replaceAll("E", EMPTY);
413	80	name = name.replaceAll("I", EMPTY);
414	80	name = name.replaceAll("O", EMPTY);
415	80	name = name.replaceAll("U", EMPTY);
416
417	80	name = name.replaceAll("\\s{2,}\\b", SPACE);
418
419		// return isVowel(firstLetter) ? (firstLetter + name) : name;
420	80	if (isVowel(firstLetter)) {
421	16	return firstLetter + name;
422		} else {
423	64	return name;
424		}
425		}
426		}