View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.distribution;
18  
19  import java.util.stream.IntStream;
20  import org.apache.commons.rng.UniformRandomProvider;
21  
22  /**
23   * Interface for distributions on the integers.
24   */
25  public interface DiscreteDistribution {
26  
27      /**
28       * For a random variable {@code X} whose values are distributed according
29       * to this distribution, this method returns {@code P(X = x)}.
30       * In other words, this method represents the probability mass function (PMF)
31       * for the distribution.
32       *
33       * @param x Point at which the PMF is evaluated.
34       * @return the value of the probability mass function at {@code x}.
35       */
36      double probability(int x);
37  
38      /**
39       * For a random variable {@code X} whose values are distributed according
40       * to this distribution, this method returns {@code P(x0 < X <= x1)}.
41       * The default implementation uses the identity
42       * {@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)}
43       *
44       * <p>Special cases:
45       * <ul>
46       * <li>returns {@code 0.0} if {@code x0 == x1};
47       * <li>returns {@code probability(x1)} if {@code x0 + 1 == x1};
48       * </ul>
49       *
50       * @param x0 Lower bound (exclusive).
51       * @param x1 Upper bound (inclusive).
52       * @return the probability that a random variable with this distribution
53       * takes a value between {@code x0} and {@code x1},  excluding the lower
54       * and including the upper endpoint.
55       * @throws IllegalArgumentException if {@code x0 > x1}.
56       */
57      default double probability(int x0,
58                                 int x1) {
59          if (x0 > x1) {
60              throw new DistributionException(DistributionException.INVALID_RANGE_LOW_GT_HIGH, x0, x1);
61          }
62          // Long addition avoids overflow
63          if (x0 + 1L >= x1) {
64              return x0 == x1 ? 0.0 : probability(x1);
65          }
66          return cumulativeProbability(x1) - cumulativeProbability(x0);
67      }
68  
69      /**
70       * For a random variable {@code X} whose values are distributed according
71       * to this distribution, this method returns {@code log(P(X = x))}, where
72       * {@code log} is the natural logarithm.
73       *
74       * @param x Point at which the PMF is evaluated.
75       * @return the logarithm of the value of the probability mass function at
76       * {@code x}.
77       */
78      default double logProbability(int x) {
79          return Math.log(probability(x));
80      }
81  
82      /**
83       * For a random variable {@code X} whose values are distributed according
84       * to this distribution, this method returns {@code P(X <= x)}.
85       * In other, words, this method represents the (cumulative) distribution
86       * function (CDF) for this distribution.
87       *
88       * @param x Point at which the CDF is evaluated.
89       * @return the probability that a random variable with this distribution
90       * takes a value less than or equal to {@code x}.
91       */
92      double cumulativeProbability(int x);
93  
94      /**
95       * For a random variable {@code X} whose values are distributed according
96       * to this distribution, this method returns {@code P(X > x)}.
97       * In other words, this method represents the complementary cumulative
98       * distribution function.
99       *
100      * <p>By default, this is defined as {@code 1 - cumulativeProbability(x)}, but
101      * the specific implementation may be more accurate.
102      *
103      * @param x Point at which the survival function is evaluated.
104      * @return the probability that a random variable with this
105      * distribution takes a value greater than {@code x}.
106      */
107     default double survivalProbability(int x) {
108         return 1.0 - cumulativeProbability(x);
109     }
110 
111     /**
112      * Computes the quantile function of this distribution.
113      * For a random variable {@code X} distributed according to this distribution,
114      * the returned value is:
115      *
116      * <p>\[ x = \begin{cases}
117      *       \inf \{ x \in \mathbb Z : P(X \le x) \ge p\}   &amp; \text{for } 0 \lt p \le 1 \\
118      *       \inf \{ x \in \mathbb Z : P(X \le x) \gt 0 \}  &amp; \text{for } p = 0
119      *       \end{cases} \]
120      *
121      * <p>If the result exceeds the range of the data type {@code int},
122      * then {@link Integer#MIN_VALUE} or {@link Integer#MAX_VALUE} is returned.
123      * In this case the result of {@link #cumulativeProbability(int) cumulativeProbability(x)}
124      * called using the returned {@code p}-quantile may not compute the original {@code p}.
125      *
126      * @param p Cumulative probability.
127      * @return the smallest {@code p}-quantile of this distribution
128      * (largest 0-quantile for {@code p = 0}).
129      * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}.
130      */
131     int inverseCumulativeProbability(double p);
132 
133     /**
134      * Computes the inverse survival probability function of this distribution.
135      * For a random variable {@code X} distributed according to this distribution,
136      * the returned value is:
137      *
138      * <p>\[ x = \begin{cases}
139      *       \inf \{ x \in \mathbb Z : P(X \ge x) \le p\}   &amp; \text{for } 0 \le p \lt 1 \\
140      *       \inf \{ x \in \mathbb Z : P(X \ge x) \lt 1 \}  &amp; \text{for } p = 1
141      *       \end{cases} \]
142      *
143      * <p>If the result exceeds the range of the data type {@code int},
144      * then {@link Integer#MIN_VALUE} or {@link Integer#MAX_VALUE} is returned.
145      * In this case the result of {@link #survivalProbability(int) survivalProbability(x)}
146      * called using the returned {@code (1-p)}-quantile may not compute the original {@code p}.
147      *
148      * <p>By default, this is defined as {@code inverseCumulativeProbability(1 - p)}, but
149      * the specific implementation may be more accurate.
150      *
151      * @param p Cumulative probability.
152      * @return the smallest {@code (1-p)}-quantile of this distribution
153      * (largest 0-quantile for {@code p = 1}).
154      * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}.
155      */
156     default int inverseSurvivalProbability(double p) {
157         return inverseCumulativeProbability(1 - p);
158     }
159 
160     /**
161      * Gets the mean of this distribution.
162      *
163      * @return the mean.
164      */
165     double getMean();
166 
167     /**
168      * Gets the variance of this distribution.
169      *
170      * @return the variance.
171      */
172     double getVariance();
173 
174     /**
175      * Gets the lower bound of the support.
176      * This method must return the same value as
177      * {@code inverseCumulativeProbability(0)}, i.e.
178      * \( \inf \{ x \in \mathbb Z : P(X \le x) \gt 0 \} \).
179      * By convention, {@link Integer#MIN_VALUE} should be substituted
180      * for negative infinity.
181      *
182      * @return the lower bound of the support.
183      */
184     int getSupportLowerBound();
185 
186     /**
187      * Gets the upper bound of the support.
188      * This method must return the same value as
189      * {@code inverseCumulativeProbability(1)}, i.e.
190      * \( \inf \{ x \in \mathbb Z : P(X \le x) = 1 \} \).
191      * By convention, {@link Integer#MAX_VALUE} should be substituted
192      * for positive infinity.
193      *
194      * @return the upper bound of the support.
195      */
196     int getSupportUpperBound();
197 
198     /**
199      * Creates a sampler.
200      *
201      * @param rng Generator of uniformly distributed numbers.
202      * @return a sampler that produces random numbers according this
203      * distribution.
204      */
205     Sampler createSampler(UniformRandomProvider rng);
206 
207     /**
208      * Distribution sampling functionality.
209      */
210     @FunctionalInterface
211     interface Sampler {
212         /**
213          * Generates a random value sampled from this distribution.
214          *
215          * @return a random value.
216          */
217         int sample();
218 
219         /**
220          * Returns an effectively unlimited stream of {@code int} sample values.
221          *
222          * <p>The default implementation produces a sequential stream that repeatedly
223          * calls {@link #sample sample}().
224          *
225          * @return a stream of {@code int} values.
226          */
227         default IntStream samples() {
228             return IntStream.generate(this::sample).sequential();
229         }
230 
231         /**
232          * Returns a stream producing the given {@code streamSize} number of {@code int}
233          * sample values.
234          *
235          * <p>The default implementation produces a sequential stream that repeatedly
236          * calls {@link #sample sample}(); the stream is limited to the given {@code streamSize}.
237          *
238          * @param streamSize Number of values to generate.
239          * @return a stream of {@code int} values.
240          */
241         default IntStream samples(long streamSize) {
242             return samples().limit(streamSize);
243         }
244     }
245 }