View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.statistics.distribution;
19  
20  import java.util.stream.Stream;
21  import org.apache.commons.math3.util.MathArrays;
22  import org.apache.commons.rng.simple.RandomSource;
23  import org.junit.jupiter.api.Assertions;
24  import org.junit.jupiter.api.Test;
25  import org.junit.jupiter.params.ParameterizedTest;
26  import org.junit.jupiter.params.provider.Arguments;
27  import org.junit.jupiter.params.provider.MethodSource;
28  
29  /**
30   * Test cases for {@link ZipfDistribution}.
31   * Extends {@link BaseDiscreteDistributionTest}. See javadoc of that class for details.
32   */
33  class ZipfDistributionTest  extends BaseDiscreteDistributionTest {
34      @Override
35      DiscreteDistribution makeDistribution(Object... parameters) {
36          final int n = (Integer) parameters[0];
37          final double e = (Double) parameters[1];
38          return ZipfDistribution.of(n, e);
39      }
40  
41  
42      @Override
43      Object[][] makeInvalidParameters() {
44          return new Object[][] {
45              {0, 1.0},
46              {-1, 1.0},
47              {1, -0.1},
48          };
49      }
50  
51      @Override
52      String[] getParameterNames() {
53          return new String[] {"NumberOfElements", "Exponent"};
54      }
55  
56      @Override
57      protected double getRelativeTolerance() {
58          return 1e-14;
59      }
60  
61      //-------------------- Additional test cases -------------------------------
62  
63      @ParameterizedTest
64      @MethodSource
65      void testAdditionlSurvivalProbabilityHighPrecision(int n, double e, int[] x, double[] expected, DoubleTolerance tol) {
66          testSurvivalProbabilityHighPrecision(ZipfDistribution.of(n, e), x, expected, tol);
67      }
68  
69      static Stream<Arguments> testAdditionlSurvivalProbabilityHighPrecision() {
70          // computed using scipy.stats (1.7.1) zipfian
71          return Stream.of(
72              Arguments.of(60, 10,
73                  new int[] {57, 59},
74                  new double[] {2.3189337454689757e-18, 1.6521739576668957e-18},
75                  DoubleTolerances.absolute(1e-25)),
76              Arguments.of(60, 50.5,
77                  new int[] {57, 59},
78                  new double[] {8.8488396450491320e-90, 1.5972093932264611e-90},
79                  DoubleTolerances.absolute(1e-95)),
80              Arguments.of(60, 100.5,
81                  new int[] {57, 59},
82                  new double[] {5.9632998443758656e-178, 1.9760564023408183e-179},
83                  DoubleTolerances.absolute(1e-185))
84          );
85      }
86  
87      /**
88       * Test the high precision survival probability computation when the exponent creates
89       * an overflow in the intermediate. The result should not be infinite or NaN and
90       * it should be a complement to the CDF value.
91       */
92      @Test
93      void testAdditionalSurvivalAndCumulativeProbabilityComplement() {
94          // Requires (x+1)^a to overflow
95          final int n = 60;
96          final double a = 200.5;
97          Assertions.assertEquals(Double.POSITIVE_INFINITY, Math.pow(n, a));
98          ZipfDistribution dist = ZipfDistribution.of(n, a);
99          final int[] points = MathArrays.natural(n);
100         testSurvivalAndCumulativeProbabilityComplement(dist, points, createTolerance());
101     }
102 
103     /**
104      * Test sampling for various number of points and exponents.
105      */
106     @Test
107     void testSamplingExtended() {
108         final int sampleSize = 1000;
109 
110         final int[] numPointsValues = {
111             2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100
112         };
113         final double[] exponentValues = {
114             1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 2e-1, 5e-1,
115             1. - 1e-9, 1.0, 1. + 1e-9, 1.1, 1.2, 1.3, 1.5, 1.6, 1.7, 1.8, 2.0,
116             2.5, 3.0, 4., 5., 6., 7., 8., 9., 10., 20., 30., 100., 150.
117         };
118 
119         for (final int numPoints : numPointsValues) {
120             for (final double exponent : exponentValues) {
121                 double weightSum = 0.;
122                 final double[] weights = new double[numPoints];
123                 for (int i = numPoints; i >= 1; i -= 1) {
124                     weights[i - 1] = Math.pow(i, -exponent);
125                     weightSum += weights[i - 1];
126                 }
127 
128                 // Use fixed seed, the test is expected to fail for more than 50% of all
129                 // seeds because each test case can fail with probability 0.001, the chance
130                 // that all test cases do not fail is 0.999^(32*22) = 0.49442874426
131                 final DiscreteDistribution.Sampler distribution =
132                     ZipfDistribution.of(numPoints, exponent).createSampler(
133                         RandomSource.XO_SHI_RO_256_PP.create(1));
134 
135                 final double[] expectedCounts = new double[numPoints];
136                 final long[] observedCounts = new long[numPoints];
137                 for (int i = 0; i < numPoints; i++) {
138                     expectedCounts[i] = sampleSize * (weights[i] / weightSum);
139                 }
140                 final int[] sample = TestUtils.sample(sampleSize, distribution);
141                 for (final int s : sample) {
142                     observedCounts[s - 1]++;
143                 }
144                 TestUtils.assertChiSquareAccept(expectedCounts, observedCounts, 0.001);
145             }
146         }
147     }
148 }