1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.math4.legacy.ml.clustering;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Collection;
23 import java.util.List;
24
25 import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
26 import org.apache.commons.math4.legacy.ml.distance.EuclideanDistance;
27 import org.apache.commons.rng.simple.RandomSource;
28 import org.apache.commons.rng.UniformRandomProvider;
29 import org.junit.Assert;
30 import org.junit.Before;
31 import org.junit.Test;
32
33 public class KMeansPlusPlusClustererTest {
34
35 private UniformRandomProvider random;
36
37 @Before
38 public void setUp() {
39 random = RandomSource.MT_64.create(1746432956321L);
40 }
41
42
43
44
45
46
47 @Test
48 public void testPerformClusterAnalysisDegenerate() {
49 KMeansPlusPlusClusterer<DoublePoint> transformer =
50 new KMeansPlusPlusClusterer<>(1, 1);
51
52 DoublePoint[] points = new DoublePoint[] {
53 new DoublePoint(new int[] { 1959, 325100 }),
54 new DoublePoint(new int[] { 1960, 373200 }), };
55 List<? extends Cluster<DoublePoint>> clusters = transformer.cluster(Arrays.asList(points));
56 Assert.assertEquals(1, clusters.size());
57 Assert.assertEquals(2, clusters.get(0).getPoints().size());
58 DoublePoint pt1 = new DoublePoint(new int[] { 1959, 325100 });
59 DoublePoint pt2 = new DoublePoint(new int[] { 1960, 373200 });
60 Assert.assertTrue(clusters.get(0).getPoints().contains(pt1));
61 Assert.assertTrue(clusters.get(0).getPoints().contains(pt2));
62 }
63
64 @Test
65 public void testCertainSpace() {
66 KMeansPlusPlusClusterer.EmptyClusterStrategy[] strategies = {
67 KMeansPlusPlusClusterer.EmptyClusterStrategy.LARGEST_VARIANCE,
68 KMeansPlusPlusClusterer.EmptyClusterStrategy.LARGEST_POINTS_NUMBER,
69 KMeansPlusPlusClusterer.EmptyClusterStrategy.FARTHEST_POINT
70 };
71 for (KMeansPlusPlusClusterer.EmptyClusterStrategy strategy : strategies) {
72 int numberOfVariables = 27;
73
74 int position1 = 1;
75 int position2 = position1 + numberOfVariables;
76 int position3 = position2 + numberOfVariables;
77 int position4 = position3 + numberOfVariables;
78
79 int multiplier = 1000000;
80
81 DoublePoint[] breakingPoints = new DoublePoint[numberOfVariables];
82
83 for (int i = 0; i < numberOfVariables; i++) {
84 int points[] = { position1, position2, position3, position4 };
85
86 for (int j = 0; j < points.length; j++) {
87 points[j] *= multiplier;
88 }
89 DoublePoint DoublePoint = new DoublePoint(points);
90 breakingPoints[i] = DoublePoint;
91 position1 += numberOfVariables;
92 position2 += numberOfVariables;
93 position3 += numberOfVariables;
94 position4 += numberOfVariables;
95 }
96
97 for (int n = 2; n < 27; ++n) {
98 KMeansPlusPlusClusterer<DoublePoint> transformer =
99 new KMeansPlusPlusClusterer<>(n, 100, new EuclideanDistance(), random, strategy);
100
101 List<? extends Cluster<DoublePoint>> clusters =
102 transformer.cluster(Arrays.asList(breakingPoints));
103
104 Assert.assertEquals(n, clusters.size());
105 int sum = 0;
106 for (Cluster<DoublePoint> cluster : clusters) {
107 sum += cluster.getPoints().size();
108 }
109 Assert.assertEquals(numberOfVariables, sum);
110 }
111 }
112 }
113
114
115
116
117
118 private static final class CloseDistance extends EuclideanDistance {
119 private static final long serialVersionUID = 1L;
120
121 @Override
122 public double compute(double[] a, double[] b) {
123 return super.compute(a, b) * 0.001;
124 }
125 }
126
127
128
129
130 @Test
131 public void testSmallDistances() {
132
133
134 final int[] repeatedArray = { 0 };
135 final int[] uniqueArray = { 1 };
136 final DoublePoint repeatedPoint = new DoublePoint(repeatedArray);
137 final DoublePoint uniquePoint = new DoublePoint(uniqueArray);
138
139 final Collection<DoublePoint> points = new ArrayList<>();
140 final int numRepeated = 10000;
141 for (int i = 0; i < numRepeated; i++) {
142 points.add(repeatedPoint);
143 }
144 points.add(uniquePoint);
145
146 final KMeansPlusPlusClusterer<DoublePoint> clusterer =
147 new KMeansPlusPlusClusterer<>(2, 1, new CloseDistance(), random);
148 final List<CentroidCluster<DoublePoint>> clusters = clusterer.cluster(points);
149
150
151 boolean uniquePointIsCenter = false;
152 for (CentroidCluster<DoublePoint> cluster : clusters) {
153 if (cluster.getCenter().equals(uniquePoint)) {
154 uniquePointIsCenter = true;
155 }
156 }
157 Assert.assertTrue(uniquePointIsCenter);
158 }
159
160
161
162
163 @Test(expected=NumberIsTooSmallException.class)
164 public void testPerformClusterAnalysisToManyClusters() {
165 KMeansPlusPlusClusterer<DoublePoint> transformer =
166 new KMeansPlusPlusClusterer<>(3, 1, new EuclideanDistance(), random);
167
168 DoublePoint[] points = new DoublePoint[] {
169 new DoublePoint(new int[] {
170 1959, 325100
171 }), new DoublePoint(new int[] {
172 1960, 373200
173 })
174 };
175
176 transformer.cluster(Arrays.asList(points));
177 }
178 }