View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.rng.sampling.distribution;
18  
19  import java.util.ArrayList;
20  import java.util.Arrays;
21  import java.util.List;
22  import org.apache.commons.math3.stat.inference.ChiSquareTest;
23  import org.junit.jupiter.api.Assertions;
24  import org.junit.jupiter.params.ParameterizedTest;
25  import org.junit.jupiter.params.provider.MethodSource;
26  
27  /**
28   * Tests for random deviates generators.
29   */
30  class DiscreteSamplerParametricTest {
31      private static Iterable<DiscreteSamplerTestData> getSamplerTestData() {
32          return DiscreteSamplersList.list();
33      }
34  
35      @ParameterizedTest
36      @MethodSource("getSamplerTestData")
37      void testSampling(DiscreteSamplerTestData data) {
38          final int sampleSize = 10000;
39          // Probabilities are normalised by the chi-square test
40          check(sampleSize,
41                data.getSampler(),
42                data.getPoints(),
43                data.getProbabilities());
44      }
45  
46      /**
47       * Performs a chi-square test of homogeneity of the observed
48       * distribution with the expected distribution.
49       * An average failure rate higher than 5% causes the test case
50       * to fail.
51       *
52       * @param sampler Sampler.
53       * @param sampleSize Number of random values to generate.
54       * @param points Outcomes.
55       * @param expected Expected counts of the given outcomes.
56       */
57      private static void check(long sampleSize,
58                                DiscreteSampler sampler,
59                                int[] points,
60                                double[] expected) {
61          final ChiSquareTest chiSquareTest = new ChiSquareTest();
62          final int numTests = 50;
63  
64          // Run the tests.
65          int numFailures = 0;
66  
67          final int numBins = points.length;
68          final long[] observed = new long[numBins];
69  
70          // For storing chi2 larger than the critical value.
71          final List<Double> failedStat = new ArrayList<>();
72          try {
73              for (int i = 0; i < numTests; i++) {
74                  Arrays.fill(observed, 0);
75                  SAMPLE: for (long j = 0; j < sampleSize; j++) {
76                      final int value = sampler.sample();
77  
78                      for (int k = 0; k < numBins; k++) {
79                          if (value == points[k]) {
80                              ++observed[k];
81                              continue SAMPLE;
82                          }
83                      }
84                  }
85  
86                  final double p = chiSquareTest.chiSquareTest(expected, observed);
87                  if (p < 0.01) {
88                      failedStat.add(p);
89                      ++numFailures;
90                  }
91              }
92          } catch (Exception e) {
93              // Should never happen.
94              throw new RuntimeException("Unexpected", e);
95          }
96  
97          // The expected number of failed tests can be modelled as a Binomial distribution
98          // B(n, p) with n=50, p=0.01 (50 tests with a 1% significance level).
99          // The cumulative probability of the number of failed tests (X) is:
100         // x     P(X>x)
101         // 1     0.0894
102         // 2     0.0138
103         // 3     0.0016
104 
105         if (numFailures > 3) { // Test will fail with 0.16% probability
106             Assertions.fail(String.format(
107                     "%s: Too many failures for sample size = %d " +
108                     " (%d out of %d tests failed, chi2=%s",
109                     sampler, sampleSize, numFailures, numTests,
110                     Arrays.toString(failedStat.toArray(new Double[0]))));
111         }
112     }
113 }