View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text.similarity;
18  
19  /**
20   * The hamming distance between two strings of equal length is the number of
21   * positions at which the corresponding symbols are different.
22   *
23   * <p>
24   * For further explanation about the Hamming Distance, take a look at its
25   * Wikipedia page at http://en.wikipedia.org/wiki/Hamming_distance.
26   * </p>
27   *
28   * @since 1.0
29   */
30  public class HammingDistance implements EditDistance<Integer> {
31  
32      /**
33       * Find the Hamming Distance between two strings with the same
34       * length.
35       *
36       * <p>The distance starts with zero, and for each occurrence of a
37       * different character in either String, it increments the distance
38       * by 1, and finally return its value.</p>
39       *
40       * <p>Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths
41       * will throw IllegalArgumentException</p>
42       *
43       * <pre>
44       * distance.apply("", "")               = 0
45       * distance.apply("pappa", "pappa")     = 0
46       * distance.apply("1011101", "1011111") = 1
47       * distance.apply("ATCG", "ACCC")       = 2
48       * distance.apply("karolin", "kerstin"  = 3
49       * </pre>
50       *
51       * @param left the first CharSequence, must not be null
52       * @param right the second CharSequence, must not be null
53       * @return distance
54       * @throws IllegalArgumentException if either input is {@code null} or
55       *             if they do not have the same length
56       */
57      @Override
58      public Integer apply(final CharSequence left, final CharSequence right) {
59          if (left == null || right == null) {
60              throw new IllegalArgumentException("Strings must not be null");
61          }
62  
63          if (left.length() != right.length()) {
64              throw new IllegalArgumentException("Strings must have the same length");
65          }
66  
67          int distance = 0;
68  
69          for (int i = 0; i < left.length(); i++) {
70              if (left.charAt(i) != right.charAt(i)) {
71                  distance++;
72              }
73          }
74  
75          return distance;
76      }
77  
78  }