View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.distribution;
18  
19  import java.util.function.IntToDoubleFunction;
20  import org.apache.commons.rng.UniformRandomProvider;
21  import org.apache.commons.rng.sampling.distribution.GeometricSampler;
22  
23  /**
24   * Implementation of the geometric distribution.
25   *
26   * <p>The probability mass function of \( X \) is:
27   *
28   * <p>\[ f(k; p) = (1-p)^k \, p \]
29   *
30   * <p>for \( p \in (0, 1] \) the probability of success and
31   * \( k \in \{0, 1, 2, \dots\} \) the number of failures.
32   *
33   * <p>This parameterization is used to model the number of failures until
34   * the first success.
35   *
36   * @see <a href="https://en.wikipedia.org/wiki/Geometric_distribution">Geometric distribution (Wikipedia)</a>
37   * @see <a href="https://mathworld.wolfram.com/GeometricDistribution.html">Geometric distribution (MathWorld)</a>
38   */
39  public final class GeometricDistribution extends AbstractDiscreteDistribution {
40      /** 1/2. */
41      private static final double HALF = 0.5;
42  
43      /** The probability of success. */
44      private final double probabilityOfSuccess;
45      /** {@code log(p)} where p is the probability of success. */
46      private final double logProbabilityOfSuccess;
47      /** {@code log(1 - p)} where p is the probability of success. */
48      private final double log1mProbabilityOfSuccess;
49      /** Value of survival probability for x=0.
50       * Used in the survival functions. Equal to (1 - probability of success). */
51      private final double sf0;
52      /** Implementation of PMF(x). Assumes that {@code x > 0}. */
53      private final IntToDoubleFunction pmf;
54  
55      /**
56       * @param p Probability of success.
57       */
58      private GeometricDistribution(double p) {
59          probabilityOfSuccess = p;
60          logProbabilityOfSuccess = Math.log(p);
61          log1mProbabilityOfSuccess = Math.log1p(-p);
62          sf0 = 1 - p;
63  
64          // Choose the PMF implementation.
65          // When p >= 0.5 then 1 - p is exact and using the power function
66          // is consistently more accurate than the use of the exponential function.
67          // When p -> 0 then the exponential function avoids large error propagation
68          // of the power function used with an inexact 1 - p.
69          // Also compute the survival probability for use when x=0.
70          if (p >= HALF) {
71              pmf = x -> Math.pow(sf0, x) * probabilityOfSuccess;
72          } else {
73              pmf = x -> Math.exp(log1mProbabilityOfSuccess * x) * probabilityOfSuccess;
74          }
75      }
76  
77      /**
78       * Creates a geometric distribution.
79       *
80       * @param p Probability of success.
81       * @return the geometric distribution
82       * @throws IllegalArgumentException if {@code p <= 0} or {@code p > 1}.
83       */
84      public static GeometricDistribution of(double p) {
85          if (p <= 0 || p > 1) {
86              throw new DistributionException(DistributionException.INVALID_NON_ZERO_PROBABILITY, p);
87          }
88          return new GeometricDistribution(p);
89      }
90  
91      /**
92       * Gets the probability of success parameter of this distribution.
93       *
94       * @return the probability of success.
95       */
96      public double getProbabilityOfSuccess() {
97          return probabilityOfSuccess;
98      }
99  
100     /** {@inheritDoc} */
101     @Override
102     public double probability(int x) {
103         if (x <= 0) {
104             // Special case of x=0 exploiting cancellation.
105             return x == 0 ? probabilityOfSuccess : 0;
106         }
107         return pmf.applyAsDouble(x);
108     }
109 
110     /** {@inheritDoc} */
111     @Override
112     public double logProbability(int x) {
113         if (x <= 0) {
114             // Special case of x=0 exploiting cancellation.
115             return x == 0 ? logProbabilityOfSuccess : Double.NEGATIVE_INFINITY;
116         }
117         return x * log1mProbabilityOfSuccess + logProbabilityOfSuccess;
118     }
119 
120     /** {@inheritDoc} */
121     @Override
122     public double cumulativeProbability(int x) {
123         if (x <= 0) {
124             // Note: CDF(x=0) = PDF(x=0) = probabilityOfSuccess
125             return x == 0 ? probabilityOfSuccess : 0;
126         }
127         // Note: Double addition avoids overflow. This may compute a value less than 1.0
128         // for the max integer value when p is very small.
129         return -Math.expm1(log1mProbabilityOfSuccess * (x + 1.0));
130     }
131 
132     /** {@inheritDoc} */
133     @Override
134     public double survivalProbability(int x) {
135         if (x <= 0) {
136             // Note: SF(x=0) = 1 - PDF(x=0) = 1 - probabilityOfSuccess
137             // Use a pre-computed value to avoid cancellation when probabilityOfSuccess -> 0
138             return x == 0 ? sf0 : 1;
139         }
140         // Note: Double addition avoids overflow. This may compute a value greater than 0.0
141         // for the max integer value when p is very small.
142         return Math.exp(log1mProbabilityOfSuccess * (x + 1.0));
143     }
144 
145     /** {@inheritDoc} */
146     @Override
147     public int inverseCumulativeProbability(double p) {
148         ArgumentUtils.checkProbability(p);
149         if (p == 1) {
150             return getSupportUpperBound();
151         }
152         if (p <= probabilityOfSuccess) {
153             return 0;
154         }
155         // p > probabilityOfSuccess
156         // => log(1-p) < log(1-probabilityOfSuccess);
157         // Both terms are negative as probabilityOfSuccess > 0.
158         // This should be lower bounded to (2 - 1) = 1
159         int x = (int) (Math.ceil(Math.log1p(-p) / log1mProbabilityOfSuccess) - 1);
160 
161         // Correct rounding errors.
162         // This ensures x == icdf(cdf(x))
163 
164         if (cumulativeProbability(x - 1) >= p) {
165             // No checks for x=0.
166             // If x=0; cdf(-1) = 0 and the condition is false as p>0 at this point.
167             x--;
168         } else if (cumulativeProbability(x) < p && x < Integer.MAX_VALUE) {
169             // The supported upper bound is max_value here as probabilityOfSuccess != 1
170             x++;
171         }
172 
173         return x;
174     }
175 
176     /** {@inheritDoc} */
177     @Override
178     public int inverseSurvivalProbability(double p) {
179         ArgumentUtils.checkProbability(p);
180         if (p == 0) {
181             return getSupportUpperBound();
182         }
183         if (p >= sf0) {
184             return 0;
185         }
186 
187         // p < 1 - probabilityOfSuccess
188         // Inversion as for icdf using log(p) in place of log1p(-p)
189         int x = (int) (Math.ceil(Math.log(p) / log1mProbabilityOfSuccess) - 1);
190 
191         // Correct rounding errors.
192         // This ensures x == isf(sf(x))
193 
194         if (survivalProbability(x - 1) <= p) {
195             // No checks for x=0
196             // If x=0; sf(-1) = 1 and the condition is false as p<1 at this point.
197             x--;
198         } else if (survivalProbability(x) > p && x < Integer.MAX_VALUE) {
199             // The supported upper bound is max_value here as probabilityOfSuccess != 1
200             x++;
201         }
202 
203         return x;
204     }
205 
206     /**
207      * {@inheritDoc}
208      *
209      * <p>For probability parameter \( p \), the mean is:
210      *
211      * <p>\[ \frac{1 - p}{p} \]
212      */
213     @Override
214     public double getMean() {
215         return (1 - probabilityOfSuccess) / probabilityOfSuccess;
216     }
217 
218     /**
219      * {@inheritDoc}
220      *
221      * <p>For probability parameter \( p \), the variance is:
222      *
223      * <p>\[ \frac{1 - p}{p^2} \]
224      */
225     @Override
226     public double getVariance() {
227         return (1 - probabilityOfSuccess) / (probabilityOfSuccess * probabilityOfSuccess);
228     }
229 
230     /**
231      * {@inheritDoc}
232      *
233      * <p>The lower bound of the support is always 0.
234      *
235      * @return 0.
236      */
237     @Override
238     public int getSupportLowerBound() {
239         return 0;
240     }
241 
242     /**
243      * {@inheritDoc}
244      *
245      * <p>The upper bound of the support is positive infinity except for the
246      * probability parameter {@code p = 1.0}.
247      *
248      * @return {@link Integer#MAX_VALUE} or 0.
249      */
250     @Override
251     public int getSupportUpperBound() {
252         return probabilityOfSuccess < 1 ? Integer.MAX_VALUE : 0;
253     }
254 
255     /** {@inheritDoc} */
256     @Override
257     public Sampler createSampler(UniformRandomProvider rng) {
258         return GeometricSampler.of(rng, probabilityOfSuccess)::sample;
259     }
260 }