View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.distribution;
18  
19  import java.lang.reflect.Modifier;
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Collections;
23  import java.util.Properties;
24  import java.util.function.Function;
25  import java.util.stream.IntStream;
26  import java.util.stream.Stream;
27  import org.apache.commons.math3.util.MathArrays;
28  import org.apache.commons.rng.simple.RandomSource;
29  import org.apache.commons.statistics.distribution.DistributionTestData.DiscreteDistributionTestData;
30  import org.junit.jupiter.api.Assertions;
31  import org.junit.jupiter.api.Assumptions;
32  import org.junit.jupiter.api.TestInstance;
33  import org.junit.jupiter.api.TestInstance.Lifecycle;
34  import org.junit.jupiter.params.ParameterizedTest;
35  import org.junit.jupiter.params.provider.Arguments;
36  import org.junit.jupiter.params.provider.MethodSource;
37  
38  /**
39   * Abstract base class for {@link DiscreteDistribution} tests.
40   *
41   * <p>This class uses parameterized tests that are repeated for instances of a
42   * distribution. The distribution, test input and expected values are generated
43   * dynamically from properties files loaded from resources.
44   *
45   * <p>The class has a single instance (see {@link Lifecycle#PER_CLASS}) that loads properties
46   * files from resources on creation. Resource files are assumed to be in the corresponding package
47   * for the class and named sequentially from 1:
48   * <pre>
49   * test.distname.1.properties
50   * test.distname.2.properties
51   * </pre>
52   * <p>Where {@code distname} is the name of the distribution. The name is dynamically
53   * created in {@link #getDistributionName()} and can be overridden by implementing classes.
54   * A single parameterization of a distribution is tested using a single properties file.
55   *
56   * <p>To test a distribution create a sub-class and override the following methods:
57   * <ul>
58   * <li>{@link #makeDistribution(Object...) makeDistribution(Object...)} - Creates the distribution from the parameters
59   * <li>{@link #makeInvalidParameters()} - Generate invalid parameters for the distribution
60   * <li>{@link #getParameterNames()} - Return the names of parameter accessors
61   * </ul>
62   *
63   * <p>The distribution is created using
64   * {@link #makeDistribution(Object...) makeDistribution(Object...)}. This should
65   * create an instance of the distribution using parameters defined in the properties file.
66   * The parameters are parsed from String values to the appropriate parameter object. Currently
67   * this supports Double and Integer; numbers can be unboxed and used to create the distribution.
68   *
69   * <p>Illegal arguments for the distribution are tested from parameters provided by
70   * {@link #makeInvalidParameters()}. If there are no illegal arguments this method may return
71   * null to skip the test. Primitive parameters are boxed to objects so ensure the canonical form
72   * is used, e.g. {@code 1.0} not {@code 1} for a {@code double} argument.
73   *
74   * <p>If the distribution provides parameter accessors then the child test class can return
75   * the accessor names using {@link #getParameterNames()}. The distribution method accessors
76   * will be detected and invoked using reflection. The accessor must provide the same value
77   * as that passed to the {@link #makeDistribution(Object...)} method to create the distribution.
78   * This method may return null to skip the test, or null for the name to skip the test for a
79   * single parameter accessor.
80   *
81   * <p>The properties file must contain parameters for the distribution, properties of the
82   * distribution (moments and bounds) and points to test the CDF and PMF with the expected values.
83   * This information can be used to evaluate the distribution CDF and PMF but also the survival
84   * function, consistency of the probability computations and random sampling.
85   *
86   * <p>Optionally:
87   * <ul>
88   * <li>Points for the PMF (and log PMF) can be specified. The default will use the CDF points.
89   * Note: It is not expected that evaluation of the PMF will require different points to the CDF.
90   * <li>Points and expected values for the inverse CDF can be specified. These are used in
91   * addition to test the inverse mapping of the CDF values to the CDF test points.
92   * <li>Expected values for the log PMF can be specified. The default will use
93   * {@link Math#log(double)} on the PMF values.
94   * <li>Points and expected values for the survival function can be specified. These are used in
95   * addition to test the inverse mapping of the SF values to the SF test points.
96   * The default will use the expected CDF values (SF = 1 - CDF).
97   * <li>A tolerance for equality assertions. The default is set by {@link #getAbsoluteTolerance()}
98   * and {@link #getRelativeTolerance()}.
99   * </ul>
100  *
101  * <p>If the distribution provides higher precision implementations of
102  * cumulative probability and/or survival probability as the values approach zero, then test
103  * points and expected values can be provided for high-precision computations. If the default
104  * absolute tolerance has been set to non-zero, then very small p-values will require the
105  * high-precision absolute tolerance is configured for the test to a suitable magnitude (see below).
106  *
107  * <p>Per-test configuration
108  *
109  * <p>Each test is identified with a {@link TestName} key in the properties file. This key
110  * can be used to set a per-test tolerance, or disable the test:
111  * <pre>
112  * cdf.hp.relative = 1e-14
113  * cdf.hp.absolute = 1e-50
114  * sampling.disable
115  * </pre>
116  *
117  * <p>Note: All properties files are read during test initialization. Any errors in a single
118  * property file will throw an exception, invalidating the initialization and no tests
119  * will be executed.
120  *
121  * <p>The parameterized tests in this class are inherited. The tests are final and cannot be
122  * changed. This ensures each instance of a distribution is tested for all functionality in
123  * the {@link DiscreteDistribution} interface. Arguments to the parameterized tests are
124  * generated dynamically using methods of the same name. These can be over-ridden in child
125  * classes to alter parameters. Throwing a
126  * {@link org.opentest4j.TestAbortedException TestAbortedException} in this method will
127  * skip the test as the arguments will not be generated.
128  *
129  * <p>Each parameterized test is effectively static; it uses no instance data.
130  * To implement additional test cases with a specific distribution instance and test
131  * data, create a test in the child class and call the relevant test case to verify
132  * results. Note that it is recommended to use the properties file as this ensures the
133  * entire functionality of the distribution is tested for that parameterization.
134  *
135  * <p>Tests using floating-point equivalence comparisons are asserted using a {@link DoubleTolerance}.
136  * This interface computes true or false for the comparison of two {@code double} types.
137  * This allows the flexibility to use absolute, relative or ULP thresholds in combinations
138  * built using And or Or conditions to compare numbers. The default uses an Or combination
139  * of the absolute and relative thresholds. See {@link DoubleTolerances} to construct
140  * custom instances for additional tests.
141  *
142  * <p>Test data should be validated against reference tables or other packages where
143  * possible, and the source of the reference data and/or validation should be documented
144  * in the properties file or additional test cases as appropriate.
145  *
146  * <p>The properties file uses {@code key=value} pairs loaded using a
147  * {@link java.util.Properties} object. Values will be read as String and then parsed to
148  * numeric data, and data arrays. Multi-line values can use a {@code \} character to join lines.
149  * Data in the properties file will be converted to numbers using standard parsing
150  * functions appropriate to the primitive type, e.g. {@link Double#parseDouble(String)}.
151  * Special double values should use NaN, Infinity and -Infinity. As a convenience
152  * for creating test data parsing of doubles supports the following notations from
153  * other languages ('inf', 'Inf'); parsing of ints supports 'min' and 'max' for the
154  * minimum and maximum integer values.
155  *
156  * <p>The following is a complete properties file for a distribution:
157  * <pre>
158  * parameters = 0.5 1.0
159  * # Computed using XYZ
160  * mean = 1.0
161  * variance = NaN
162  * # optional (default -2147483648, Integer.MIN_VALUE)
163  * lower = 0
164  * # optional (default 2147483647, Integer.MAX_VALUE)
165  * upper = max
166  * # optional (default 1e-14 or over-ridden in getRelativeTolerance())
167  * tolerance.relative = 1e-9
168  * # optional (default 0.0 or over-ridden in getAbsoluteTolerance())
169  * tolerance.absolute = 0.0
170  * cdf.points = 0, 2
171  * cdf.values = 0.0, 0.5
172  * # optional (default uses cdf.points)
173  * pmf.points = 0, 40000
174  * pmf.values = 0.0,\
175  *  0.0
176  * # optional (default uses log pmf.values)
177  * logpmf.values = -1900.123, -Infinity
178  * # optional (default uses cdf.points and 1 - cdf.values)
179  * sf.points = 400
180  * sf.values = 0.0
181  * # optional high-precision CDF test
182  * cdf.hp.points = 1e-16
183  * cdf.hp.values = 1.23e-17
184  * # optional high-precision survival function test
185  * sf.hp.points = 9
186  * sf.hp.values = 2.34e-18
187  * # optional inverse CDF test (defaults to ignore)
188  * icdf.points = 0.0, 0.5
189  * icdf.values = 3, 4
190  * # optional inverse CDF test (defaults to ignore)
191  * isf.points = 1.0, 0.5
192  * isf.values = 3, 4
193  * # Optional per-test tolerance and disable
194  * cdf.hp.relative = 1e-14
195  * cdf.hp.absolute = 1e-50
196  * sampling.disable = true
197  * </pre>
198  *
199  * <p>See {@link BinomialDistributionTest} for an example and the resource file {@code test.binomial.1.properties}.
200  */
201 @TestInstance(Lifecycle.PER_CLASS)
202 abstract class BaseDiscreteDistributionTest
203     extends BaseDistributionTest<DiscreteDistribution, DiscreteDistributionTestData> {
204 
205     /** Threshold for the PMF summation test for a range that is too large. */
206     private static final int SUM_RANGE_TOO_LARGE = 50;
207 
208     @Override
209     DiscreteDistributionTestData makeDistributionData(Properties properties) {
210         return new DiscreteDistributionTestData(properties);
211     }
212 
213     //------------------------ Methods to stream the test data -----------------------------
214 
215     // The @MethodSource annotation will default to a no arguments method of the same name
216     // as the @ParameterizedTest method. These can be overridden by child classes to
217     // stream different arguments to the test case.
218 
219     /**
220      * Create a stream of arguments containing the distribution to test, the CDF test
221      * points and the test tolerance.
222      *
223      * @param name Name of the function under test
224      * @return the stream
225      */
226     Stream<Arguments> streamCdfTestPoints(TestName name) {
227         return stream(name,
228                       DiscreteDistributionTestData::getCdfPoints);
229     }
230 
231     /**
232      * Create a stream of arguments containing the distribution to test, the PMF test points
233      * and values, and the test tolerance.
234      *
235      * @return the stream
236      */
237     Stream<Arguments> testProbability() {
238         return stream(TestName.PMF,
239                       DiscreteDistributionTestData::getPmfPoints,
240                       DiscreteDistributionTestData::getPmfValues);
241     }
242 
243     /**
244      * Create a stream of arguments containing the distribution to test, the log PMF test points
245      * and values, and the test tolerance.
246      *
247      * @return the stream
248      */
249     Stream<Arguments> testLogProbability() {
250         return stream(TestName.LOGPMF,
251                       DiscreteDistributionTestData::getPmfPoints,
252                       DiscreteDistributionTestData::getLogPmfValues);
253     }
254 
255     /**
256      * Create a stream of arguments containing the distribution to test, the CDF test points
257      * and values, and the test tolerance.
258      *
259      * @return the stream
260      */
261     Stream<Arguments> testCumulativeProbability() {
262         return stream(TestName.CDF,
263                       DiscreteDistributionTestData::getCdfPoints,
264                       DiscreteDistributionTestData::getCdfValues);
265     }
266 
267     /**
268      * Create a stream of arguments containing the distribution to test, the survival function
269      * test points and values, and the test tolerance.
270      *
271      * <p>This defaults to using the CDF points. The survival function is tested as 1 - CDF.
272      *
273      * @return the stream
274      */
275     Stream<Arguments> testSurvivalProbability() {
276         return stream(TestName.SF,
277                       DiscreteDistributionTestData::getSfPoints,
278                       DiscreteDistributionTestData::getSfValues);
279     }
280 
281     /**
282      * Create a stream of arguments containing the distribution to test, the CDF test points
283      * and values, and the test tolerance for high-precision computations.
284      *
285      * @return the stream
286      */
287     Stream<Arguments> testCumulativeProbabilityHighPrecision() {
288         return stream(TestName.CDF_HP,
289                       DiscreteDistributionTestData::getCdfHpPoints,
290                       DiscreteDistributionTestData::getCdfHpValues);
291     }
292 
293     /**
294      * Create a stream of arguments containing the distribution to test, the survival function
295      * test points and values, and the test tolerance for high-precision computations.
296      *
297      * @return the stream
298      */
299     Stream<Arguments> testSurvivalProbabilityHighPrecision() {
300         return stream(TestName.SF_HP,
301                       DiscreteDistributionTestData::getSfHpPoints,
302                       DiscreteDistributionTestData::getSfHpValues);
303     }
304 
305     /**
306      * Create a stream of arguments containing the distribution to test, the inverse CDF test points
307      * and values. No test tolerance is required as the values are integers and must be exact.
308      *
309      * @return the stream
310      */
311     Stream<Arguments> testInverseCumulativeProbability() {
312         return stream(TestName.ICDF,
313                       DiscreteDistributionTestData::getIcdfPoints,
314                       DiscreteDistributionTestData::getIcdfValues);
315     }
316 
317     /**
318      * Create a stream of arguments containing the distribution to test, the inverse CDF test points
319      * and values. No test tolerance is required as the values are integers and must be exact.
320      *
321      * @return the stream
322      */
323     Stream<Arguments> testInverseSurvivalProbability() {
324         return stream(TestName.ISF,
325                       DiscreteDistributionTestData::getIsfPoints,
326                       DiscreteDistributionTestData::getIsfValues);
327     }
328 
329     /**
330      * Create a stream of arguments containing the distribution to test and the CDF test points.
331      *
332      * @return the stream
333      */
334     Stream<Arguments> testCumulativeProbabilityInverseMapping() {
335         return stream(TestName.CDF_MAPPING,
336                       DiscreteDistributionTestData::getCdfPoints);
337     }
338 
339     /**
340      * Create a stream of arguments containing the distribution to test and the SF test points.
341      *
342      * @return the stream
343      */
344     Stream<Arguments> testSurvivalProbabilityInverseMapping() {
345         return stream(TestName.SF_MAPPING,
346                       DiscreteDistributionTestData::getSfPoints);
347     }
348 
349     /**
350      * Create a stream of arguments containing the distribution to test and
351      * high-precision CDF test points.
352      *
353      * @return the stream
354      */
355     Stream<Arguments> testCumulativeProbabilityHighPrecisionInverseMapping() {
356         return stream(TestName.CDF_HP_MAPPING,
357                       DiscreteDistributionTestData::getCdfHpPoints);
358     }
359 
360     /**
361      * Create a stream of arguments containing the distribution to test and
362      * high-precision SF test points.
363      *
364      * @return the stream
365      */
366     Stream<Arguments> testSurvivalProbabilityHighPrecisionInverseMapping() {
367         return stream(TestName.SF_HP_MAPPING,
368                       DiscreteDistributionTestData::getSfHpPoints);
369     }
370 
371     /**
372      * Create a stream of arguments containing the distribution to test, the test points
373      * to evaluate the CDF and survival function, and the test tolerance. CDF + SF must equal 1.
374      *
375      * @return the stream
376      */
377     Stream<Arguments> testSurvivalAndCumulativeProbabilityComplement() {
378         // This is not disabled based on cdf.disable && sf.disable.
379         // Those flags are intended to ignore tests against reference values.
380         return streamCdfTestPoints(TestName.COMPLEMENT);
381     }
382 
383     /**
384      * Create a stream of arguments containing the distribution to test, the test points
385      * to evaluate the CDF and probability in a range, and the test tolerance.
386      * Used to test CDF(x1) - CDF(x0) = probability(x0, x1).
387      *
388      * @return the stream
389      */
390     Stream<Arguments> testConsistency() {
391         // This is not disabled based on cdf.disable.
392         // That flag is intended to ignore tests against reference values.
393         return streamCdfTestPoints(TestName.CONSISTENCY);
394     }
395 
396     /**
397      * Create a stream of arguments containing the distribution to test and the test tolerance.
398      *
399      * @return the stream
400      */
401     Stream<Arguments> testOutsideSupport() {
402         return stream(TestName.OUTSIDE_SUPPORT);
403     }
404 
405     /**
406      * Create a stream of arguments containing the distribution to test, the PMF test points
407      * and values. The sampled PMF should sum to more than 50% of the distribution.
408      *
409      * @return the stream
410      */
411     Stream<Arguments> testSamplingPMF() {
412         return stream(TestName.SAMPLING_PMF,
413                       DiscreteDistributionTestData::getPmfPoints,
414                       DiscreteDistributionTestData::getPmfValues);
415     }
416 
417     /**
418      * Create a stream of arguments containing the distribution to test. Sampling is tested using
419      * the distribution quartiles. The quartiles should be approximately 25% of the distribution
420      * PMF. Discrete distributions that have single points that contain much more than 25% of the
421      * probability mass will be ignored.
422      *
423      * @return the stream
424      */
425     Stream<Arguments> testSampling() {
426         return stream(TestName.SAMPLING);
427     }
428 
429     /**
430      * Stream the arguments to test the probability sums. The test
431      * sums the probability mass function between consecutive test points for the cumulative
432      * density function. The default tolerance is based on the test tolerance for evaluation
433      * of single points of the CDF.
434      * Override this method to change the tolerance.
435      *
436      * @return the stream
437      */
438     Stream<Arguments> testProbabilitySums() {
439         // Assume the the test tolerance for single CDF values can be relaxed slightly
440         // when summing values.
441         final double scale = 10;
442         final TestName cdf = TestName.CDF;
443         final Function<DiscreteDistributionTestData, DoubleTolerance> tolerance =
444             d -> createAbsOrRelTolerance(d.getAbsoluteTolerance(cdf) * scale,
445                                          d.getRelativeTolerance(cdf) * scale);
446         final TestName name = TestName.PMF_SUM;
447         return stream(d -> d.isDisabled(name),
448                       DiscreteDistributionTestData::getCdfPoints,
449                       DiscreteDistributionTestData::getCdfValues,
450                       tolerance, name.toString());
451     }
452 
453     /**
454      * Create a stream of arguments containing the distribution to test, the support
455      * lower and upper bound, and the support connect flag.
456      *
457      * @return the stream
458      */
459     Stream<Arguments> testSupport() {
460         return streamArguments(TestName.SUPPORT,
461             d -> Arguments.of(namedDistribution(d.getParameters()), d.getLower(), d.getUpper()));
462     }
463 
464     /**
465      * Create a stream of arguments containing the distribution to test, the mean
466      * and variance, and the test tolerance.
467      *
468      * @return the stream
469      */
470     Stream<Arguments> testMoments() {
471         final TestName name = TestName.MOMENTS;
472         return streamArguments(name,
473             d -> Arguments.of(namedDistribution(d.getParameters()), d.getMean(), d.getVariance(),
474                               createTestTolerance(d, name)));
475     }
476 
477     /**
478      * Create a stream of arguments containing the distribution to test.
479      *
480      * @return the stream
481      */
482     Stream<Arguments> testMedian() {
483         return streamArguments(TestName.MEDIAN,
484             d -> Arguments.of(namedDistribution(d.getParameters())));
485     }
486 
487     //------------------------ Tests -----------------------------
488 
489     // Tests are final. It is expected that the test can be modified by overriding
490     // the method used to stream the arguments, for example to use a specific tolerance
491     // for a test in preference to the tolerance defined in the properties file.
492 
493     // Extract the tests from the previous abstract test
494 
495     /**
496      * Test that probability calculations match expected values.
497      */
498     @ParameterizedTest
499     @MethodSource
500     final void testProbability(DiscreteDistribution dist,
501                                int[] points,
502                                double[] values,
503                                DoubleTolerance tolerance) {
504         for (int i = 0; i < points.length; i++) {
505             final int x = points[i];
506             TestUtils.assertEquals(values[i],
507                 dist.probability(x), tolerance,
508                 () -> "Incorrect probability mass value returned for " + x);
509         }
510     }
511 
512     /**
513      * Test that logarithmic probability calculations match expected values.
514      */
515     @ParameterizedTest
516     @MethodSource
517     final void testLogProbability(DiscreteDistribution dist,
518                                   int[] points,
519                                   double[] values,
520                                   DoubleTolerance tolerance) {
521         for (int i = 0; i < points.length; i++) {
522             final int x = points[i];
523             TestUtils.assertEquals(values[i],
524                 dist.logProbability(x), tolerance,
525                 () -> "Incorrect log probability mass value returned for " + x);
526         }
527     }
528 
529     /**
530      * Test that cumulative probability density calculations match expected values.
531      */
532     @ParameterizedTest
533     @MethodSource
534     final void testCumulativeProbability(DiscreteDistribution dist,
535                                          int[] points,
536                                          double[] values,
537                                          DoubleTolerance tolerance) {
538         // verify cumulativeProbability(double)
539         for (int i = 0; i < points.length; i++) {
540             final int x = points[i];
541             TestUtils.assertEquals(values[i],
542                 dist.cumulativeProbability(x),
543                 tolerance,
544                 () -> "Incorrect cumulative probability value returned for " + x);
545         }
546     }
547 
548     /**
549      * Test that survival probability density calculations match expected values.
550      */
551     @ParameterizedTest
552     @MethodSource
553     final void testSurvivalProbability(DiscreteDistribution dist,
554                                        int[] points,
555                                        double[] values,
556                                        DoubleTolerance tolerance) {
557         for (int i = 0; i < points.length; i++) {
558             final double x = points[i];
559             TestUtils.assertEquals(
560                 values[i],
561                 dist.survivalProbability(points[i]),
562                 tolerance,
563                 () -> "Incorrect survival probability value returned for " + x);
564         }
565     }
566 
567     /**
568      * Test that cumulative probability is not {@code (1 - survival probability)} by testing values
569      * that would result in inaccurate results if simply calculating (1 - sf).
570      */
571     @ParameterizedTest
572     @MethodSource
573     final void testCumulativeProbabilityHighPrecision(DiscreteDistribution dist,
574                                                       int[] points,
575                                                       double[] values,
576                                                       DoubleTolerance tolerance) {
577         assertHighPrecision(tolerance, values);
578         testCumulativeProbability(dist, points, values, tolerance);
579     }
580 
581     /**
582      * Test that survival probability is not {@code (1 - cumulative probability)} by testing values
583      * that would result in inaccurate results if simply calculating (1 - cdf).
584      */
585     @ParameterizedTest
586     @MethodSource
587     final void testSurvivalProbabilityHighPrecision(DiscreteDistribution dist,
588                                                     int[] points,
589                                                     double[] values,
590                                                     DoubleTolerance tolerance) {
591         assertHighPrecision(tolerance, values);
592         testSurvivalProbability(dist, points, values, tolerance);
593     }
594 
595     /**
596      * Test that inverse cumulative probability density calculations match expected values.
597      *
598      * <p>Note: Any expected values outside the support of the distribution are ignored.
599      */
600     @ParameterizedTest
601     @MethodSource
602     final void testInverseCumulativeProbability(DiscreteDistribution dist,
603                                                 double[] points,
604                                                 int[] values) {
605         final int lower = dist.getSupportLowerBound();
606         final int upper = dist.getSupportUpperBound();
607         for (int i = 0; i < points.length; i++) {
608             final int x = values[i];
609             if (x < lower || x > upper) {
610                 continue;
611             }
612             final double p = points[i];
613             Assertions.assertEquals(
614                 x,
615                 dist.inverseCumulativeProbability(p),
616                 () -> "Incorrect inverse cumulative probability value returned for " + p);
617         }
618     }
619 
620     /**
621      * Test that inverse survival probability density calculations match expected values.
622      *
623      * <p>Note: Any expected values outside the support of the distribution are ignored.
624      */
625     @ParameterizedTest
626     @MethodSource
627     final void testInverseSurvivalProbability(DiscreteDistribution dist,
628                                               double[] points,
629                                               int[] values) {
630         final int lower = dist.getSupportLowerBound();
631         final int upper = dist.getSupportUpperBound();
632         for (int i = 0; i < points.length; i++) {
633             final int x = values[i];
634             if (x < lower || x > upper) {
635                 continue;
636             }
637             final double p = points[i];
638             Assertions.assertEquals(
639                 x,
640                 dist.inverseSurvivalProbability(p),
641                 () -> "Incorrect inverse survival probability value returned for " + p);
642         }
643     }
644 
645     /**
646      * Test that an inverse mapping of the cumulative probability density values matches
647      * the original point, {@code x = icdf(cdf(x))}.
648      *
649      * <p>Note: It is possible for two points to compute the same CDF value. In this
650      * case the mapping is not a bijection. Any points computing a CDF=0 or 1 are ignored
651      * as this is expected to be inverted to the domain bound.
652      *
653      * <p>Note: Any points outside the support of the distribution are ignored.
654      *
655      * <p>This test checks consistency of the inverse with the forward function.
656      * The test checks (where applicable):
657      * <ul>
658      *  <li>{@code icdf( cdf(x) ) = x}
659      *  <li>{@code icdf( p > cdf(x) ) >= x+1}
660      *  <li>{@code icdf( cdf(x-1) < p < cdf(x) ) = x}
661      * </ul>
662      *
663      * <p>Does not check {@code isf( 1 - cdf(x) ) = x} since the complement {@code q = 1 - p}
664      * is inexact. The bound change for the isf to compute x may be different. The isf bound
665      * change is verified in a separate test.
666      * Thus the {@code icdf <-> cdf} mapping and {@code isf <-> sf} mapping are verified to
667      * have correct boundary changes with respect to the forward function and its inverse
668      * but the boundaries are allowed to be different. This can be corrected for example
669      * with an implementation that has a consistent computation for {@code x > median} and
670      * another for {@code x < median} with an inverse computation determined by {@code p > 0.5}.
671      */
672     @ParameterizedTest
673     @MethodSource
674     final void testCumulativeProbabilityInverseMapping(DiscreteDistribution dist,
675                                                        int[] points) {
676         final int lower = dist.getSupportLowerBound();
677         final int upper = dist.getSupportUpperBound();
678         for (int i = 0; i < points.length; i++) {
679             final int x = points[i];
680             if (x < lower || x > upper) {
681                 continue;
682             }
683             final double p = dist.cumulativeProbability(x);
684             if ((int) p == p) {
685                 // Assume mapping not a bijection and ignore
686                 continue;
687             }
688             final double x1 = dist.inverseCumulativeProbability(p);
689             Assertions.assertEquals(
690                 x,
691                 x1,
692                 () -> "Incorrect CDF inverse value returned for " + p);
693             // The next p value up should return the next value
694             final double pp = Math.nextUp(p);
695             if (x != upper && pp != 1 && p != dist.cumulativeProbability(x + 1)) {
696                 final double x2 = dist.inverseCumulativeProbability(pp);
697                 Assertions.assertEquals(
698                     x + 1,
699                     x2,
700                     () -> "Incorrect CDF inverse value returned for " + pp);
701             }
702 
703             // Invert a probability inside the range to the previous CDF value
704             if (x != lower) {
705                 final double pm1 = dist.cumulativeProbability(x - 1);
706                 final double px = (pm1 + p) / 2;
707                 if (px > pm1) {
708                     final double xx = dist.inverseCumulativeProbability(px);
709                     Assertions.assertEquals(
710                         x,
711                         xx,
712                         () -> "Incorrect CDF inverse value returned for " + px);
713                 }
714             }
715         }
716     }
717 
718     /**
719      * Test that an inverse mapping of the survival probability density values matches
720      * the original point, {@code x = isf(sf(x))}.
721      *
722      * <p>Note: It is possible for two points to compute the same SF value. In this
723      * case the mapping is not a bijection. Any points computing a SF=0 or 1 are ignored
724      * as this is expected to be inverted to the domain bound.
725      *
726      * <p>Note: Any points outside the support of the distribution are ignored.
727      *
728      * <p>This test checks consistency of the inverse with the forward function.
729      * The test checks (where applicable):
730      * <ul>
731      *  <li>{@code isf( sf(x) ) = x}
732      *  <li>{@code isf( p < sf(x) ) >= x+1}
733      *  <li>{@code isf( sf(x-1) > p > sf(x) ) = x}
734      * </ul>
735      *
736      * <p>Does not check {@code icdf( 1 - sf(x) ) = x} since the complement {@code q = 1 - p}
737      * is inexact. The bound change for the icdf to compute x may be different. The icdf bound
738      * change is verified in a separate test.
739      * Thus the {@code icdf <-> cdf} mapping and {@code isf <-> sf} mapping are verified to
740      * have correct boundary changes with respect to the forward function and its inverse
741      * but the boundaries are allowed to be different. This can be corrected for example
742      * with an implementation that has a consistent computation for {@code x > median} and
743      * another for {@code x < median} with an inverse computation determined by {@code p > 0.5}.
744      */
745     @ParameterizedTest
746     @MethodSource
747     final void testSurvivalProbabilityInverseMapping(DiscreteDistribution dist,
748                                                      int[] points) {
749         final int lower = dist.getSupportLowerBound();
750         final int upper = dist.getSupportUpperBound();
751         for (int i = 0; i < points.length; i++) {
752             final int x = points[i];
753             if (x < lower || x > upper) {
754                 continue;
755             }
756             final double p = dist.survivalProbability(x);
757             if ((int) p == p) {
758                 // Assume mapping not a bijection and ignore
759                 continue;
760             }
761             final double x1 = dist.inverseSurvivalProbability(p);
762             Assertions.assertEquals(
763                 x,
764                 x1,
765                 () -> "Incorrect SF inverse value returned for " + p);
766 
767             // The next p value down should return the next value
768             final double pp = Math.nextDown(p);
769             if (x != upper && pp != 0 && p != dist.survivalProbability(x + 1)) {
770                 final double x2 = dist.inverseSurvivalProbability(pp);
771                 Assertions.assertEquals(
772                     x + 1,
773                     x2,
774                     () -> "Incorrect SF inverse value returned for " + pp);
775             }
776 
777             // Invert a probability inside the range to the previous SF value
778             if (x != lower) {
779                 final double pm1 = dist.survivalProbability(x - 1);
780                 final double px = (pm1 + p) / 2;
781                 if (px < pm1) {
782                     final double xx = dist.inverseSurvivalProbability(px);
783                     Assertions.assertEquals(
784                         x,
785                         xx,
786                         () -> "Incorrect CDF inverse value returned for " + px);
787                 }
788             }
789         }
790     }
791 
792     /**
793      * Test that an inverse mapping of the cumulative probability density values matches
794      * the original point, {@code x = icdf(cdf(x))} using the points for the high-precision
795      * CDF.
796      */
797     @ParameterizedTest
798     @MethodSource
799     final void testCumulativeProbabilityHighPrecisionInverseMapping(
800             DiscreteDistribution dist,
801             int[] points) {
802         testCumulativeProbabilityInverseMapping(dist, points);
803     }
804 
805     /**
806      * Test that an inverse mapping of the survival probability density values matches
807      * the original point, {@code x = isf(sf(x))} using the points for the high-precision
808      * SF.
809      */
810     @ParameterizedTest
811     @MethodSource
812     final void testSurvivalProbabilityHighPrecisionInverseMapping(
813             DiscreteDistribution dist,
814             int[] points) {
815         testSurvivalProbabilityInverseMapping(dist, points);
816     }
817 
818     /**
819      * Test that cumulative probability density and survival probability calculations
820      * sum to approximately 1.0.
821      */
822     @ParameterizedTest
823     @MethodSource
824     final void testSurvivalAndCumulativeProbabilityComplement(DiscreteDistribution dist,
825                                                               int[] points,
826                                                               DoubleTolerance tolerance) {
827         for (final int x : points) {
828             TestUtils.assertEquals(
829                 1.0,
830                 dist.survivalProbability(x) + dist.cumulativeProbability(x),
831                 tolerance,
832                 () -> "survival + cumulative probability were not close to 1.0 for " + x);
833         }
834     }
835 
836     /**
837      * Test that probability computations are consistent.
838      * This checks probability(x, x) = 0; probability(x, x+1) = probability(x+1)
839      * and probability(x0, x1) = CDF(x1) - CDF(x0).
840      */
841     @ParameterizedTest
842     @MethodSource
843     final void testConsistency(DiscreteDistribution dist,
844                                int[] points,
845                                DoubleTolerance tolerance) {
846         final int upper = dist.getSupportUpperBound();
847         for (int i = 0; i < points.length; i++) {
848             final int x0 = points[i];
849 
850             // Check that probability(x, x) == 0
851             Assertions.assertEquals(
852                 0.0,
853                 dist.probability(x0, x0),
854                 () -> "Non-zero probability(x, x) for " + x0);
855 
856             // Check that probability(x, x + 1) == probability(x + 1)
857             if (x0 < upper) {
858                 Assertions.assertEquals(
859                     dist.probability(x0 + 1),
860                     dist.probability(x0, x0 + 1),
861                     () -> "probability(x + 1) != probability(x, x + 1) for " + x0);
862             }
863 
864             final double cdf0 = dist.cumulativeProbability(x0);
865             final double sf0 = cdf0 >= 0.5 ? dist.survivalProbability(x0) : Double.NaN;
866             for (int j = 0; j < points.length; j++) {
867                 final int x1 = points[j];
868                 // Ignore adjacent points.
869                 // Use long arithmetic to avoid overflow if x0 is the maximum integer value
870                 if (x0 + 1L < x1) {
871                     // Check that probability(x0, x1) = CDF(x1) - CDF(x0).
872                     // If x0 is above the median it is more accurate to use the
873                     // survival probability: probability(x0, x1) = SF(x0) - SF(x1).
874                     double expected;
875                     if (cdf0 >= 0.5) {
876                         expected = sf0 - dist.survivalProbability(x1);
877                     } else {
878                         expected = dist.cumulativeProbability(x1) - cdf0;
879                     }
880                     TestUtils.assertEquals(
881                         expected,
882                         dist.probability(x0, x1),
883                         tolerance,
884                         () -> "Inconsistent probability for (" + x0 + "," + x1 + ")");
885                 } else if (x0 > x1) {
886                     Assertions.assertThrows(IllegalArgumentException.class,
887                         () -> dist.probability(x0, x1),
888                         "probability(int, int) should have thrown an exception that first argument is too large");
889                 }
890             }
891         }
892     }
893 
894     /**
895      * Test CDF and inverse CDF values at the edge of the support of the distribution return
896      * expected values and the CDF outside the support returns consistent values.
897      */
898     @ParameterizedTest
899     @MethodSource
900     final void testOutsideSupport(DiscreteDistribution dist,
901                                   DoubleTolerance tolerance) {
902         // Test various quantities when the variable is outside the support.
903         final int lo = dist.getSupportLowerBound();
904         TestUtils.assertEquals(dist.probability(lo), dist.cumulativeProbability(lo), tolerance, () -> "pmf(lower) != cdf(lower) for " + lo);
905         Assertions.assertEquals(lo, dist.inverseCumulativeProbability(-0.0), "icdf(-0.0)");
906         Assertions.assertEquals(lo, dist.inverseCumulativeProbability(0.0), "icdf(0.0)");
907         Assertions.assertEquals(lo, dist.inverseSurvivalProbability(1.0), "isf(1.0)");
908 
909         if (lo != Integer.MIN_VALUE) {
910             final int below = lo - 1;
911             Assertions.assertEquals(0.0, dist.probability(below), "pmf(x < lower)");
912             Assertions.assertEquals(Double.NEGATIVE_INFINITY, dist.logProbability(below), "logpmf(x < lower)");
913             Assertions.assertEquals(0.0, dist.cumulativeProbability(below), "cdf(x < lower)");
914             Assertions.assertEquals(1.0, dist.survivalProbability(below), "sf(x < lower)");
915         }
916 
917         final int hi = dist.getSupportUpperBound();
918         Assertions.assertTrue(lo <= hi, "lower <= upper");
919         Assertions.assertEquals(hi, dist.inverseCumulativeProbability(1.0), "icdf(1.0)");
920         Assertions.assertEquals(hi, dist.inverseSurvivalProbability(-0.0), "isf(-0.0)");
921         Assertions.assertEquals(hi, dist.inverseSurvivalProbability(0.0), "isf(0.0)");
922         if (hi != Integer.MAX_VALUE) {
923             // For distributions defined up to integer max value we cannot test that
924             // the CDF is 1.0 as they may be truncated.
925             Assertions.assertEquals(1.0, dist.cumulativeProbability(hi), "cdf(upper)");
926             Assertions.assertEquals(0.0, dist.survivalProbability(hi), "sf(upper)");
927             TestUtils.assertEquals(dist.probability(hi), dist.survivalProbability(hi - 1), tolerance, () -> "pmf(upper - 1) != sf(upper - 1) for " + hi);
928 
929             final int above = hi + 1;
930             Assertions.assertEquals(0.0, dist.probability(above), "pmf(x > upper)");
931             Assertions.assertEquals(Double.NEGATIVE_INFINITY, dist.logProbability(above), "logpmf(x > upper)");
932             Assertions.assertEquals(1.0, dist.cumulativeProbability(above), "cdf(x > upper)");
933             Assertions.assertEquals(0.0, dist.survivalProbability(above), "sf(x > upper)");
934         }
935 
936         // Test the logProbability at the support bound. This hits edge case coverage for logProbability.
937         assertPmfAndLogPmfAtBound(dist, lo, tolerance, "lower");
938         assertPmfAndLogPmfAtBound(dist, hi, tolerance, "upper");
939     }
940 
941     /**
942      * Assert the PMF and log PMF are consistent at the named point.
943      * This method asserts either:
944      * <pre>
945      * log(pmf) == logpmf
946      * pmf      == exp(logpmf)
947      * </pre>
948      *
949      * @param dist Distribution
950      * @param x Point
951      * @param tolerance Test tolerance
952      * @param name Point name
953      */
954     private static void assertPmfAndLogPmfAtBound(DiscreteDistribution dist, int x,
955                                                   DoubleTolerance tolerance, String name) {
956         // It is assumed the log probability may support a value when the plain probability will be zero.
957         // Only assert Math.log(dist.probability(x)) == dist.logProbability(x) for normal values.
958         final double p = dist.probability(x);
959         final double logp = dist.logProbability(x);
960         if (p > Double.MIN_NORMAL) {
961             TestUtils.assertEquals(Math.log(p), logp, tolerance,
962                 () -> String.format("%s: log(pmf(%d)) != logpmf(%d)", name, x, x));
963         } else {
964             TestUtils.assertEquals(p, Math.exp(logp), tolerance,
965                 () -> String.format("%s: pmf(%d) != exp(logpmf(%d))", name, x, x));
966         }
967     }
968 
969     /**
970      * Test invalid probabilities passed to computations that require a p-value in {@code [0, 1]}
971      * or a range where {@code p1 <= p2}.
972      */
973     @ParameterizedTest
974     @MethodSource(value = "streamDistribution")
975     final void testInvalidProbabilities(DiscreteDistribution dist) {
976         final int lo = dist.getSupportLowerBound();
977         final int hi = dist.getSupportUpperBound();
978         if (lo < hi) {
979             Assertions.assertThrows(DistributionException.class, () -> dist.probability(hi, lo), "x0 > x1");
980         }
981         Assertions.assertThrows(DistributionException.class, () -> dist.inverseCumulativeProbability(-1), "p < 0.0");
982         Assertions.assertThrows(DistributionException.class, () -> dist.inverseCumulativeProbability(2), "p > 1.0");
983         Assertions.assertThrows(DistributionException.class, () -> dist.inverseSurvivalProbability(-1), "q < 0.0");
984         Assertions.assertThrows(DistributionException.class, () -> dist.inverseSurvivalProbability(2), "q > 1.0");
985     }
986 
987     /**
988      * Test sampling from the distribution.
989      * This test uses the points that are used to test the distribution PMF.
990      * The test is skipped if the sum of the PMF values is less than 0.5.
991      */
992     @ParameterizedTest
993     @MethodSource
994     final void testSamplingPMF(DiscreteDistribution dist,
995                                int[] points,
996                                double[] values) {
997         // This test uses the points that are used to test the distribution PMF.
998         // The sum of the probability values does not have to be 1 (or very close to 1).
999         // Any value generated by the sampler that is not an expected point will
1000         // be ignored. If the sum of probabilities is above 0.5 then at least half
1001         // of the samples should be counted and the test will verify these occur with
1002         // the expected relative frequencies. Note: The expected values are normalised
1003         // to 1 (i.e. relative frequencies) by the Chi-square test.
1004         points = points.clone();
1005         values = values.clone();
1006         final int length = TestUtils.eliminateZeroMassPoints(points, values);
1007         final double[] expected = Arrays.copyOf(values, length);
1008 
1009         // This test will not be valid if the points do not represent enough of the PMF.
1010         // Require at least 50%.
1011         final double sum = Arrays.stream(expected).sum();
1012         Assumptions.assumeTrue(sum > 0.5,
1013             () -> "Not enough of the PMF is tested during sampling: " + sum);
1014 
1015         // Use fixed seed.
1016         final DiscreteDistribution.Sampler sampler =
1017                 dist.createSampler(RandomSource.XO_SHI_RO_256_PP.create(1234567890L));
1018 
1019         // Edge case for distributions with all mass in a single point
1020         if (length == 1) {
1021             final int point = points[0];
1022             for (int i = 0; i < 20; i++) {
1023                 Assertions.assertEquals(point, sampler.sample());
1024             }
1025             return;
1026         }
1027 
1028         final int sampleSize = 1000;
1029         MathArrays.scaleInPlace(sampleSize, expected);
1030 
1031         final int[] sample = TestUtils.sample(sampleSize, sampler);
1032 
1033         final long[] counts = new long[length];
1034         for (int i = 0; i < sampleSize; i++) {
1035             final int x = sample[i];
1036             for (int j = 0; j < length; j++) {
1037                 if (x == points[j]) {
1038                     counts[j]++;
1039                     break;
1040                 }
1041             }
1042         }
1043 
1044         TestUtils.assertChiSquareAccept(points, expected, counts, 0.001);
1045     }
1046 
1047     /**
1048      * Test sampling from the distribution using quartiles.
1049      * This test is ignored if the range for the distribution PMF is small
1050      * and the quartiles do not map to approximately 0.25. When the range of
1051      * the distribution is small then the {@link #testSamplingPMF(DiscreteDistribution, int[], double[])}
1052      * method should be used with points that covers at least 50% of the PMF.
1053      */
1054     @ParameterizedTest
1055     @MethodSource
1056     final void testSampling(DiscreteDistribution dist) {
1057         final int[] quartiles = TestUtils.getDistributionQuartiles(dist);
1058         // The distribution quartiles are created using the inverse CDF.
1059         // This may not be accurate for extreme parameterizations of the distribution.
1060         // So use the values to compute the expected probability for each interval.
1061         final double[] expected = {
1062             dist.cumulativeProbability(quartiles[0]),
1063             dist.probability(quartiles[0], quartiles[1]),
1064             dist.probability(quartiles[1], quartiles[2]),
1065             dist.survivalProbability(quartiles[2]),
1066         };
1067         // Ignore this test if the quartiles are different from a quarter.
1068         // This will exclude distributions where the PMF is heavily concentrated
1069         // at a single point. In this case it can be realistically
1070         // sampled using a small number of points.
1071         final DoubleTolerance tolerance = DoubleTolerances.absolute(0.1);
1072         for (final double p : expected) {
1073             Assumptions.assumeTrue(tolerance.test(0.25, p),
1074                 () -> "Unexpected quartiles: " + Arrays.toString(expected));
1075         }
1076 
1077         final int sampleSize = 1000;
1078         MathArrays.scaleInPlace(sampleSize, expected);
1079 
1080         // Use fixed seed.
1081         final DiscreteDistribution.Sampler sampler =
1082             dist.createSampler(RandomSource.XO_SHI_RO_256_PP.create(123456789L));
1083         final int[] sample = TestUtils.sample(sampleSize, sampler);
1084 
1085         final long[] counts = new long[4];
1086         for (int i = 0; i < sampleSize; i++) {
1087             TestUtils.updateCounts(sample[i], counts, quartiles);
1088         }
1089 
1090         TestUtils.assertChiSquareAccept(expected, counts, 0.001);
1091     }
1092 
1093     /**
1094      * Test that probability sums match the distribution.
1095      * The (filtered, sorted) points array is used to source
1096      * summation limits. The sum of the probability mass function
1097      * is compared with the probability over the same interval.
1098      * Test points outside of the domain of the probability function
1099      * are discarded and large intervals are ignored.
1100      *
1101      * <p>This test is ignored for large ranges.
1102      */
1103     @ParameterizedTest
1104     @MethodSource
1105     final void testProbabilitySums(DiscreteDistribution dist,
1106                                    int[] points,
1107                                    double[] values,
1108                                    DoubleTolerance tolerance) {
1109         final ArrayList<Integer> integrationTestPoints = new ArrayList<>();
1110         for (int i = 0; i < points.length; i++) {
1111             if (Double.isNaN(values[i]) ||
1112                 values[i] < 1e-5 ||
1113                 values[i] > 1 - 1e-5) {
1114                 continue; // exclude sums outside domain.
1115             }
1116             integrationTestPoints.add(points[i]);
1117         }
1118         Collections.sort(integrationTestPoints);
1119 
1120         for (int i = 1; i < integrationTestPoints.size(); i++) {
1121             final int x0 = integrationTestPoints.get(i - 1);
1122             final int x1 = integrationTestPoints.get(i);
1123             // Ignore large ranges
1124             if (x1 - x0 > SUM_RANGE_TOO_LARGE) {
1125                 continue;
1126             }
1127             final double sum = IntStream.rangeClosed(x0 + 1, x1).mapToDouble(dist::probability).sum();
1128             TestUtils.assertEquals(dist.probability(x0, x1), sum, tolerance,
1129                 () -> "Invalid probability sum: " + (x0 + 1) + " to " + x1);
1130         }
1131     }
1132 
1133     /**
1134      * Test the support of the distribution matches the expected values.
1135      */
1136     @ParameterizedTest
1137     @MethodSource
1138     final void testSupport(DiscreteDistribution dist, double lower, double upper) {
1139         Assertions.assertEquals(lower, dist.getSupportLowerBound(), "lower bound");
1140         Assertions.assertEquals(upper, dist.getSupportUpperBound(), "upper bound");
1141     }
1142 
1143     /**
1144      * Test the moments of the distribution matches the expected values.
1145      */
1146     @ParameterizedTest
1147     @MethodSource
1148     final void testMoments(DiscreteDistribution dist, double mean, double variance, DoubleTolerance tolerance) {
1149         TestUtils.assertEquals(mean, dist.getMean(), tolerance, "mean");
1150         TestUtils.assertEquals(variance, dist.getVariance(), tolerance, "variance");
1151     }
1152 
1153     /**
1154      * Test the median of the distribution is equal to the value returned from the inverse CDF.
1155      * The median is used internally for computation of the probability of a range
1156      * using either the CDF or survival function. If overridden by a distribution it should
1157      * be equivalent to the inverse CDF called with 0.5.
1158      *
1159      * <p>The method modifiers are asserted to check the method is not public or protected.
1160      */
1161     @ParameterizedTest
1162     @MethodSource
1163     final void testMedian(DiscreteDistribution dist) {
1164         if (dist instanceof AbstractDiscreteDistribution) {
1165             final AbstractDiscreteDistribution d = (AbstractDiscreteDistribution) dist;
1166             Assertions.assertEquals(d.inverseCumulativeProbability(0.5), d.getMedian(), "median");
1167             assertMethodNotModified(dist.getClass(), Modifier.PUBLIC | Modifier.PROTECTED, "getMedian");
1168         }
1169     }
1170 }