View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.rng.examples.jmh.sampling.distribution;
19  
20  import org.apache.commons.rng.UniformRandomProvider;
21  import org.apache.commons.rng.sampling.distribution.DiscreteSampler;
22  import org.apache.commons.rng.sampling.distribution.KempSmallMeanPoissonSampler;
23  import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler;
24  import org.apache.commons.rng.sampling.distribution.SmallMeanPoissonSampler;
25  import org.apache.commons.rng.simple.RandomSource;
26  
27  import org.openjdk.jmh.annotations.Benchmark;
28  import org.openjdk.jmh.annotations.BenchmarkMode;
29  import org.openjdk.jmh.annotations.Fork;
30  import org.openjdk.jmh.annotations.Measurement;
31  import org.openjdk.jmh.annotations.Mode;
32  import org.openjdk.jmh.annotations.OutputTimeUnit;
33  import org.openjdk.jmh.annotations.Param;
34  import org.openjdk.jmh.annotations.Scope;
35  import org.openjdk.jmh.annotations.Setup;
36  import org.openjdk.jmh.annotations.State;
37  import org.openjdk.jmh.annotations.Warmup;
38  
39  import java.util.concurrent.TimeUnit;
40  import java.util.function.Supplier;
41  
42  /**
43   * Executes benchmark to compare the speed of generation of Poisson distributed random numbers.
44   */
45  @BenchmarkMode(Mode.AverageTime)
46  @OutputTimeUnit(TimeUnit.NANOSECONDS)
47  @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
48  @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
49  @State(Scope.Benchmark)
50  @Fork(value = 1, jvmArgs = {"-server", "-Xms128M", "-Xmx128M"})
51  public class PoissonSamplersPerformance {
52      /**
53       * The value for the baseline generation of an {@code int} value.
54       *
55       * <p>This must NOT be final!</p>
56       */
57      private int value;
58  
59      /**
60       * The mean for the call to {@link Math#exp(double)}.
61       */
62      @State(Scope.Benchmark)
63      public static class Means {
64          /**
65           * The Poisson mean. This is set at a level where the small mean sampler is to be used
66           * in preference to the large mean sampler.
67           */
68          @Param({"0.25",
69                  "0.5",
70                  "1",
71                  "2",
72                  "4",
73                  "8",
74                  "16",
75                  "32"})
76          private double mean;
77  
78          /**
79           * Gets the mean.
80           *
81           * @return the mean
82           */
83          public double getMean() {
84              return mean;
85          }
86      }
87  
88      /**
89       * The {@link DiscreteSampler} samplers to use for testing. Creates the sampler for each
90       * {@link RandomSource} in the default
91       * {@link org.apache.commons.rng.examples.jmh.RandomSources RandomSources}.
92       */
93      @State(Scope.Benchmark)
94      public static class Sources {
95          /**
96           * RNG providers.
97           *
98           * <p>Use different speeds.</p>
99           *
100          * @see <a href="https://commons.apache.org/proper/commons-rng/userguide/rng.html">
101          *      Commons RNG user guide</a>
102          */
103         @Param({"WELL_44497_B",
104                 //"ISAAC",
105                 "XO_RO_SHI_RO_128_PLUS"})
106         private String randomSourceName;
107 
108         /**
109          * The sampler type.
110          */
111         @Param({"SmallMeanPoissonSampler",
112                 "KempSmallMeanPoissonSampler",
113                 "BoundedKempSmallMeanPoissonSampler",
114                 "KempSmallMeanPoissonSamplerP50",
115                 "KempSmallMeanPoissonSamplerBinarySearch",
116                 "KempSmallMeanPoissonSamplerGuideTable",
117                 "LargeMeanPoissonSampler",
118                 "TinyMeanPoissonSampler"})
119         private String samplerType;
120 
121         /**
122          * The Poisson mean. This is set at a level where the small mean sampler is to be used
123          * in preference to the large mean sampler.
124          */
125         @Param({"0.25",
126                 "0.5",
127                 "1",
128                 "2",
129                 "4",
130                 "8",
131                 "16",
132                 "32",
133                 "64"})
134         private double mean;
135 
136         /** RNG. */
137         private UniformRandomProvider generator;
138 
139         /** The factory. */
140         private Supplier<DiscreteSampler> factory;
141 
142         /** The sampler. */
143         private DiscreteSampler sampler;
144 
145         /**
146          * @return The RNG.
147          */
148         public UniformRandomProvider getGenerator() {
149             return generator;
150         }
151 
152         /**
153          * Gets the sampler.
154          *
155          * @return The sampler.
156          */
157         public DiscreteSampler getSampler() {
158             return sampler;
159         }
160 
161         /** Instantiates sampler. */
162         @Setup
163         public void setup() {
164             final RandomSource randomSource = RandomSource.valueOf(randomSourceName);
165             generator = randomSource.create();
166             if ("SmallMeanPoissonSampler".equals(samplerType)) {
167                 factory = () -> SmallMeanPoissonSampler.of(generator, mean);
168             } else if ("KempSmallMeanPoissonSampler".equals(samplerType)) {
169                 factory = () -> KempSmallMeanPoissonSampler.of(generator, mean);
170             } else if ("BoundedKempSmallMeanPoissonSampler".equals(samplerType)) {
171                 factory = () -> new BoundedKempSmallMeanPoissonSampler(generator, mean);
172             } else if ("KempSmallMeanPoissonSamplerP50".equals(samplerType)) {
173                 factory = () -> new KempSmallMeanPoissonSamplerP50(generator, mean);
174             } else if ("KempSmallMeanPoissonSamplerBinarySearch".equals(samplerType)) {
175                 factory = () -> new KempSmallMeanPoissonSamplerBinarySearch(generator, mean);
176             } else if ("KempSmallMeanPoissonSamplerGuideTable".equals(samplerType)) {
177                 factory = () -> new KempSmallMeanPoissonSamplerGuideTable(generator, mean);
178             } else if ("LargeMeanPoissonSampler".equals(samplerType)) {
179                 factory = () -> LargeMeanPoissonSampler.of(generator, mean);
180             } else if ("TinyMeanPoissonSampler".equals(samplerType)) {
181                 factory = () -> new TinyMeanPoissonSampler(generator, mean);
182             }
183             sampler = factory.get();
184         }
185 
186         /**
187          * Creates a new instance of the sampler.
188          *
189          * @return The sampler.
190          */
191         public DiscreteSampler createSampler() {
192             return factory.get();
193         }
194     }
195 
196     /**
197      * Kemp sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
198      * distribution</a>.
199      *
200      * <ul>
201      *  <li>
202      *   For small means, a Poisson process is simulated using uniform deviates, as
203      *   described in Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed
204      *   Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253.
205      *  </li>
206      * </ul>
207      *
208      * <p>Note: This is similar to {@link KempSmallMeanPoissonSampler} but the sample is
209      * bounded by 1000 * mean.</p>
210      *
211      * @see <a href="https://www.jstor.org/stable/2346348">Kemp, A.W. (1981) JRSS Vol. 30, pp. 249-253</a>
212      */
213     static class BoundedKempSmallMeanPoissonSampler
214         implements DiscreteSampler {
215         /** Underlying source of randomness. */
216         private final UniformRandomProvider rng;
217         /**
218          * Pre-compute {@code Math.exp(-mean)}.
219          * Note: This is the probability of the Poisson sample {@code p(x=0)}.
220          */
221         private final double p0;
222         /** Pre-compute {@code 1000 * mean} as the upper limit of the sample. */
223         private final int limit;
224         /**
225          * The mean of the Poisson sample.
226          */
227         private final double mean;
228 
229         /**
230          * @param rng  Generator of uniformly distributed random numbers.
231          * @param mean Mean.
232          * @throws IllegalArgumentException if {@code mean <= 0} or {@code mean > 700}.
233          */
234         BoundedKempSmallMeanPoissonSampler(UniformRandomProvider rng,
235                                             double mean) {
236             if (mean <= 0) {
237                 throw new IllegalArgumentException();
238             }
239 
240             p0 = Math.exp(-mean);
241             if (p0 == 0) {
242                 throw new IllegalArgumentException();
243             }
244             // The returned sample is bounded by 1000 * mean
245             limit = (int) Math.ceil(1000 * mean);
246             this.rng = rng;
247             this.mean = mean;
248         }
249 
250         /** {@inheritDoc} */
251         @Override
252         public int sample() {
253             // Note on the algorithm:
254             // - X is the unknown sample deviate (the output of the algorithm)
255             // - x is the current value from the distribution
256             // - p is the probability of the current value x, p(X=x)
257             // - u is effectively the cumulative probability that the sample X
258             //   is equal or above the current value x, p(X>=x)
259             // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
260             double u = rng.nextDouble();
261             int x = 0;
262             double p = p0;
263             while (u > p) {
264                 u -= p;
265                 // Compute the next probability using a recurrence relation.
266                 // p(x+1) = p(x) * mean / (x+1)
267                 p *= mean / ++x;
268                 // The algorithm listed in Kemp (1981) does not check that the rolling probability
269                 // is positive. This check is added to ensure a simple bounds in the event that
270                 // p == 0
271                 if (x == limit) {
272                     return x;
273                 }
274             }
275             return x;
276         }
277     }
278 
279     /**
280      * Kemp sampler for the Poisson distribution.
281      *
282      * <p>Note: This is a modification of the original algorithm by Kemp. It implements a hedge
283      * on the cumulative probability set at 50%. This saves computation in half of the samples.</p>
284      */
285     static class KempSmallMeanPoissonSamplerP50
286         implements DiscreteSampler {
287         /** The value of p=0.5. */
288         private static final double ONE_HALF = 0.5;
289         /**
290          * The threshold that defines the cumulative probability for the long tail of the
291          * Poisson distribution. Above this threshold the recurrence relation that computes the
292          * next probability must check that the p-value is not zero.
293          */
294         private static final double LONG_TAIL_THRESHOLD = 0.999;
295 
296         /** Underlying source of randomness. */
297         private final UniformRandomProvider rng;
298         /**
299          * Pre-compute {@code Math.exp(-mean)}.
300          * Note: This is the probability of the Poisson sample {@code p(x=0)}.
301          */
302         private final double p0;
303         /**
304          * The mean of the Poisson sample.
305          */
306         private final double mean;
307         /**
308          * Pre-compute the cumulative probability for all samples up to and including x.
309          * This is F(x) = sum of p(X<=x).
310          *
311          * <p>The value is computed at approximately 50% allowing the algorithm to choose to start
312          * at value (x+1) approximately half of the time.
313          */
314         private final double fx;
315         /**
316          * Store the value (x+1) corresponding to the next value after the cumulative probability is
317          * above 50%.
318          */
319         private final int x1;
320         /**
321          * Store the probability value p(x+1), allowing the algorithm to start from the point x+1.
322          */
323         private final double px1;
324 
325         /**
326          * Create a new instance.
327          *
328          * <p>This is valid for means as large as approximately {@code 744}.</p>
329          *
330          * @param rng  Generator of uniformly distributed random numbers.
331          * @param mean Mean.
332          * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) == 0}.
333          */
334         KempSmallMeanPoissonSamplerP50(UniformRandomProvider rng,
335                                        double mean) {
336             if (mean <= 0) {
337                 throw new IllegalArgumentException();
338             }
339 
340             this.rng = rng;
341             p0 = Math.exp(-mean);
342             this.mean = mean;
343 
344             // Pre-compute a hedge value for the cumulative probability at approximately 50%.
345             // This is only done when p0 is less than the long tail threshold.
346             // The result is that the rolling probability computation should never hit the
347             // long tail where p reaches zero.
348             if (p0 <= LONG_TAIL_THRESHOLD) {
349                 // Check the edge case for no probability
350                 if (p0 == 0) {
351                     throw new IllegalArgumentException();
352                 }
353 
354                 double p = p0;
355                 int x = 0;
356                 // Sum is cumulative probability F(x) = sum p(X<=x)
357                 double sum = p;
358                 while (sum < ONE_HALF) {
359                     // Compute the next probability using a recurrence relation.
360                     // p(x+1) = p(x) * mean / (x+1)
361                     p *= mean / ++x;
362                     sum += p;
363                 }
364                 fx = sum;
365                 x1 = x + 1;
366                 px1 = p * mean / x1;
367             } else {
368                 // Always start at zero.
369                 // Note: If NaN is input as the mean this path is executed and the sample is always zero.
370                 fx = 0;
371                 x1 = 0;
372                 px1 = p0;
373             }
374         }
375 
376         /** {@inheritDoc} */
377         @Override
378         public int sample() {
379             // Note on the algorithm:
380             // - X is the unknown sample deviate (the output of the algorithm)
381             // - x is the current value from the distribution
382             // - p is the probability of the current value x, p(X=x)
383             // - u is effectively the cumulative probability that the sample X
384             //   is equal or above the current value x, p(X>=x)
385             // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
386             final double u = rng.nextDouble();
387 
388             if (u <= fx) {
389                 // Sample from the lower half of the distribution starting at zero
390                 return sampleBeforeLongTail(u, 0, p0);
391             }
392 
393             // Sample from the upper half of the distribution starting at cumulative probability fx.
394             // This is reached when u > fx and sample X > x.
395 
396             // If below the long tail threshold then omit the check on the asymptote of p -> zero
397             if (u <= LONG_TAIL_THRESHOLD) {
398                 return sampleBeforeLongTail(u - fx, x1, px1);
399             }
400 
401             return sampleWithinLongTail(u - fx, x1, px1);
402         }
403 
404         /**
405          * Compute the sample assuming it is <strong>not</strong> in the long tail of the distribution.
406          *
407          * <p>This avoids a check on the next probability value assuming that the cumulative probability
408          * is at a level where the long tail of the Poisson distribution will not be reached.
409          *
410          * @param u the remaining cumulative probability (p(X>x))
411          * @param x the current sample value X
412          * @param p the current probability of the sample (p(X=x))
413          * @return the sample X
414          */
415         private int sampleBeforeLongTail(double u, int x, double p) {
416             while (u > p) {
417                 // Update the remaining cumulative probability
418                 u -= p;
419                 // Compute the next probability using a recurrence relation.
420                 // p(x+1) = p(x) * mean / (x+1)
421                 p *= mean / ++x;
422                 // The algorithm listed in Kemp (1981) does not check that the rolling probability
423                 // is positive (non-zero). This is omitted here on the assumption that the cumulative
424                 // probability will not be in the long tail where the probability asymptotes to zero.
425             }
426             return x;
427         }
428 
429         /**
430          * Compute the sample assuming it is in the long tail of the distribution.
431          *
432          * <p>This requires a check on the next probability value which is expected to asymptote to zero.
433          *
434          * @param u the remaining cumulative probability
435          * @param x the current sample value X
436          * @param p the current probability of the sample (p(X=x))
437          * @return the sample X
438          */
439         private int sampleWithinLongTail(double u, int x, double p) {
440             while (u > p) {
441                 // Update the remaining cumulative probability
442                 u -= p;
443                 // Compute the next probability using a recurrence relation.
444                 // p(x+1) = p(x) * mean / (x+1)
445                 p *= mean / ++x;
446                 // The algorithm listed in Kemp (1981) does not check that the rolling probability
447                 // is positive. This check is added to ensure no errors when the limit of the summation
448                 // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic when
449                 // in the long tail of the distribution.
450                 if (p == 0) {
451                     return x;
452                 }
453             }
454             return x;
455         }
456     }
457 
458     /**
459      * Kemp sampler for the Poisson distribution.
460      *
461      * <p>Note: This is a modification of the original algorithm by Kemp. It stores the
462      * cumulative probability table for repeat use. The table is searched using a binary
463      * search algorithm.</p>
464      */
465     static class KempSmallMeanPoissonSamplerBinarySearch
466         implements DiscreteSampler {
467         /**
468          * Store the cumulative probability table size for integer means so that 99.99%
469          * of the Poisson distribution is covered. This is done until the table size is
470          * 2 * mean.
471          *
472          * <p>At higher mean the expected range is mean + 4 * sqrt(mean). To avoid the sqrt
473          * the conservative limit of 2 * mean is used.
474          */
475         private static final int[] TABLE_SIZE = {
476             /* mean 1 to 10. */
477             8, 10, 12, 14, 16, 18, 20, 22, 24, 25,
478             /* mean 11 to 20. */
479             27, 29, 30, 32, 33, 35, 36, 38, 39, 41,
480         };
481 
482         /** Underlying source of randomness. */
483         private final UniformRandomProvider rng;
484         /**
485          * The mean of the Poisson sample.
486          */
487         private final double mean;
488         /**
489          * Store the cumulative probability for all samples up to and including x.
490          * This is F(x) = sum of p(X<=x).
491          *
492          * <p>This table is initialized to store cumulative probabilities for x up to 2 * mean
493          * or 99.99% (whichever is larger).
494          */
495         private final double[] fx;
496         /**
497          * Store the value x corresponding to the last stored cumulative probability.
498          */
499         private int lastX;
500         /**
501          * Store the probability value p(x) corresponding to last stored cumulative probability,
502          * allowing the algorithm to start from the point x.
503          */
504         private double px;
505 
506         /**
507          * Create a new instance.
508          *
509          * <p>This is valid for means as large as approximately {@code 744}.</p>
510          *
511          * @param rng  Generator of uniformly distributed random numbers.
512          * @param mean Mean.
513          * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) == 0}.
514          */
515         KempSmallMeanPoissonSamplerBinarySearch(UniformRandomProvider rng,
516                                                 double mean) {
517             if (mean <= 0) {
518                 throw new IllegalArgumentException();
519             }
520 
521             px = Math.exp(-mean);
522             if (px > 0) {
523                 this.rng = rng;
524                 this.mean = mean;
525 
526                 // Initialise the cumulative probability table.
527                 // The supported mean where p(x=0) > 0 sets a limit of around 744 so this will always be
528                 // possible.
529                 final int upperMean = (int) Math.ceil(mean);
530                 fx = new double[(upperMean < TABLE_SIZE.length) ? TABLE_SIZE[upperMean] : upperMean * 2];
531                 fx[0] = px;
532             } else {
533                 // This will catch NaN mean values
534                 throw new IllegalArgumentException();
535             }
536         }
537 
538         /** {@inheritDoc} */
539         @Override
540         public int sample() {
541             // Note on the algorithm:
542             // - X is the unknown sample deviate (the output of the algorithm)
543             // - x is the current value from the distribution
544             // - p is the probability of the current value x, p(X=x)
545             // - u is effectively the cumulative probability that the sample X
546             //   is equal or above the current value x, p(X>=x)
547             // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
548             final double u = rng.nextDouble();
549 
550             if (u <= fx[lastX]) {
551                 // Binary search within known cumulative probability table.
552                 // Find x so that u > f[x-1] and u <= f[x].
553                 // This is a looser search than Arrays.binarySearch:
554                 // - The output is x = upper.
555                 // - The pre-condition check ensures u <= f[upper] at the start.
556                 // - The table stores probabilities where f[0] is non-zero.
557                 // - u should be >= 0 (or the random generator is broken).
558                 // - It avoids comparisons using Double.doubleToLongBits.
559                 // - It avoids the low likelihood of equality between two doubles so uses
560                 //   only 1 compare per loop.
561                 // - It uses a weighted middle anticipating that the cumulative distribution
562                 //   is skewed as the expected use case is a small mean.
563                 int lower = 0;
564                 int upper = lastX;
565                 while (lower < upper) {
566                     // Weighted middle is 1/4 of the range between lower and upper
567                     final int mid = (3 * lower + upper) >>> 2;
568                     final double midVal = fx[mid];
569                     if (u > midVal) {
570                         // Change lower such that
571                         // u > f[lower - 1]
572                         lower = mid + 1;
573                     } else {
574                         // Change upper such that
575                         // u <= f[upper]
576                         upper = mid;
577                     }
578                 }
579                 return upper;
580             }
581 
582             // The sample is above x
583             int x1 = lastX + 1;
584 
585             // Fill the cumulative probability table if possible
586             while (x1 < fx.length) {
587                 // Compute the next probability using a recurrence relation.
588                 // p(x+1) = p(x) * mean / (x+1)
589                 px = nextProbability(px, x1);
590                 // Compute the next cumulative probability f(x+1) and update
591                 final double sum = fx[lastX] + px;
592                 fx[++lastX] = sum;
593                 // Check if this is the correct sample
594                 if (u <= sum) {
595                     return lastX;
596                 }
597                 x1 = lastX + 1;
598             }
599 
600             // The sample is above the range of the cumulative probability table.
601             // Compute using the Kemp method.
602             // This requires the remaining cumulative probability and the probability for x+1.
603             return sampleWithinLongTail(u - fx[lastX], x1, nextProbability(px, x1));
604         }
605 
606         /**
607          * Compute the next probability using a recurrence relation.
608          *
609          * <pre>
610          * p(x + 1) = p(x) * mean / (x + 1)
611          * </pre>
612          *
613          * @param p the probability of x
614          * @param x1 the value of x+1
615          * @return the probability of x+1
616          */
617         private double nextProbability(double p, int x1) {
618             return p * mean / x1;
619         }
620 
621         /**
622          * Compute the sample assuming it is in the long tail of the distribution.
623          *
624          * <p>This requires a check on the next probability value which is expected to asymptote to zero.
625          *
626          * @param u the remaining cumulative probability
627          * @param x the current sample value X
628          * @param p the current probability of the sample (p(X=x))
629          * @return the sample X
630          */
631         private int sampleWithinLongTail(double u, int x, double p) {
632             while (u > p) {
633                 // Update the remaining cumulative probability
634                 u -= p;
635                 p = nextProbability(p, ++x);
636                 // The algorithm listed in Kemp (1981) does not check that the rolling probability
637                 // is positive. This check is added to ensure no errors when the limit of the summation
638                 // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic when
639                 // in the long tail of the distribution.
640                 if (p == 0) {
641                     return x;
642                 }
643             }
644             return x;
645         }
646     }
647     /**
648      * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson
649      * distribution</a>.
650      *
651      * <p>Note: This is a modification of the original algorithm by Kemp. It stores the
652      * cumulative probability table for repeat use. The table is computed dynamically and
653      * searched using a guide table.</p>
654      */
655     static class KempSmallMeanPoissonSamplerGuideTable implements DiscreteSampler {
656         /**
657          * Store the cumulative probability table size for integer means so that 99.99% of the
658          * Poisson distribution is covered. This is done until the table size is 2 * mean.
659          *
660          * <p>At higher mean the expected range is mean + 4 * sqrt(mean). To avoid the sqrt
661          * the conservative limit of 2 * mean is used.
662          */
663         private static final int[] TABLE_SIZE = {
664             /* mean 1 to 10. */
665             8, 10, 12, 14, 16, 18, 20, 22, 24, 25,
666             /* mean 11 to 20. */
667             27, 29, 30, 32, 33, 35, 36, 38, 39, 41, };
668 
669         /** Underlying source of randomness. */
670         private final UniformRandomProvider rng;
671         /**
672          * The mean of the Poisson sample.
673          */
674         private final double mean;
675         /**
676          * Store the cumulative probability for all samples up to and including x. This is
677          * F(x) = sum of p(X<=x).
678          *
679          * <p>This table is initialized to store cumulative probabilities for x up to 2 * mean
680          * or 99.99% (whichever is larger).
681          */
682         private final double[] cumulativeProbability;
683         /**
684          * Store the value x corresponding to the last stored cumulative probability.
685          */
686         private int tabulatedX;
687         /**
688          * Store the probability value p(x), allowing the algorithm to start from the point x.
689          */
690         private double probabilityX;
691         /**
692          * The inverse cumulative probability guide table. This is a map between the cumulative
693          * probability (f(x)) and the value x. It is used to set the initial point for search
694          * of the cumulative probability table.
695          *
696          * <p>The index into the table is obtained using {@code f(x) * guideTable.length}. The value
697          * stored at the index is value {@code x+1} such that it is the exclusive upper bound
698          * on the sample value for searching the cumulative probability table. It requires the
699          * table search is towards zero.</p>
700          *
701          * <p>Note: Using x+1 ensures that the table can be zero filled upon initialisation and
702          * any index with zero has yet to be allocated.</p>
703          *
704          * <p>The guide table should never be used when the input f(x) is above the current range of
705          * the cumulative probability table. This would create an index that has not been
706          * allocated a mapping.
707          */
708         private final int[] guideTable;
709 
710         /**
711          * Create a new instance.
712          *
713          * <p>This is valid for means as large as approximately {@code 744}.</p>
714          *
715          * @param rng Generator of uniformly distributed random numbers.
716          * @param mean Mean.
717          * @throws IllegalArgumentException if {@code mean <= 0} or
718          * {@code Math.exp(-mean) == 0}.
719          */
720         KempSmallMeanPoissonSamplerGuideTable(UniformRandomProvider rng, double mean) {
721             if (mean <= 0) {
722                 throw new IllegalArgumentException("mean is not strictly positive: " + mean);
723             }
724 
725             probabilityX = Math.exp(-mean);
726             if (probabilityX > 0) {
727                 this.rng = rng;
728                 this.mean = mean;
729 
730                 // Initialise the cumulative probability table.
731                 // The supported mean where p(x=0) > 0 sets a limit of around 744 so the cast to int
732                 // will always be possible.
733                 final int upperMean = (int) Math.ceil(mean);
734                 final int size = (upperMean < TABLE_SIZE.length) ? TABLE_SIZE[upperMean] : upperMean * 2;
735                 cumulativeProbability = new double[size];
736                 cumulativeProbability[0] = probabilityX;
737 
738                 guideTable = new int[cumulativeProbability.length + 1];
739                 initializeGuideTable(probabilityX);
740             } else {
741                 // This will catch NaN mean values
742                 throw new IllegalArgumentException("No probability for mean " + mean);
743             }
744         }
745 
746         /**
747          * Initialise the cumulative probability guide table. All guide indices at or below the
748          * index corresponding to the given probability will be set to 1.
749          *
750          * @param p0 the probability for x=0
751          */
752         private void initializeGuideTable(double p0) {
753             for (int index = getGuideTableIndex(p0); index >= 0; index--) {
754                 guideTable[index] = 1;
755             }
756         }
757 
758         /**
759          * Fill the inverse cumulative probability guide table. Set the index corresponding to the
760          * given probability to x+1 establishing an exclusive upper bound on x for the probability.
761          * All unused guide indices below the index will also be set to x+1.
762          *
763          * @param p the cumulative probability
764          * @param x the sample value x
765          */
766         private void updateGuideTable(double p, int x) {
767             // Always set the current index as the guide table is the exclusive upper bound
768             // for searching the cumulative probability table for any value p.
769             // Then fill any lower positions that are not allocated.
770             final int x1 = x + 1;
771             int index = getGuideTableIndex(p);
772             do {
773                 guideTable[index--] = x1;
774             } while (index > 0 && guideTable[index] == 0);
775         }
776 
777         /**
778          * Gets the guide table index for the probability. This is obtained using
779          * {@code p * (guideTable.length - 1)} so is inside the length of the table.
780          *
781          * @param p the cumulative probability
782          * @return the guide table index
783          */
784         private int getGuideTableIndex(double p) {
785             // Note: This is only ever called when p is in the range of the cumulative
786             // probability table. So assume 0 <= p <= 1.
787             return (int) (p * (guideTable.length - 1));
788         }
789 
790         /** {@inheritDoc} */
791         @Override
792         public int sample() {
793             // Note on the algorithm:
794             // 1. Compute a cumulative probability with a uniform deviate (u).
795             // 2. If the probability lies within the tabulated cumulative probabilities
796             //    then find the sample value.
797             // 3. If possible expand the tabulated cumulative probabilities up to the value u.
798             // 4. If value u exceeds the capacity for the tabulated cumulative probabilities
799             //    then compute the sample value dynamically without storing the probabilities.
800 
801             // Compute a probability
802             final double u = rng.nextDouble();
803 
804             // Check the tabulated cumulative probability table
805             if (u <= cumulativeProbability[tabulatedX]) {
806                 // Initialise the search using a guide table to find an initial guess.
807                 // The table provides an upper bound on the sample for a known cumulative probability.
808                 int sample = guideTable[getGuideTableIndex(u)] - 1;
809                 // If u is above the sample probability (this occurs due to truncation)
810                 // then return the next value up.
811                 if (u > cumulativeProbability[sample]) {
812                     return sample + 1;
813                 }
814                 // Search down
815                 while (sample != 0 && u <= cumulativeProbability[sample - 1]) {
816                     sample--;
817                 }
818                 return sample;
819             }
820 
821             // The sample is above the tabulated cumulative probability for x
822             int x1 = tabulatedX + 1;
823 
824             // Fill the cumulative probability table if possible
825             while (x1 < cumulativeProbability.length) {
826                 probabilityX = nextProbability(probabilityX, x1);
827                 // Compute the next cumulative probability f(x+1) and update
828                 final double sum = cumulativeProbability[tabulatedX] + probabilityX;
829                 cumulativeProbability[++tabulatedX] = sum;
830                 updateGuideTable(sum, tabulatedX);
831                 // Check if this is the correct sample
832                 if (u <= sum) {
833                     return tabulatedX;
834                 }
835                 x1 = tabulatedX + 1;
836             }
837 
838             // The sample is above the range of the cumulative probability table.
839             // Compute using the Kemp method.
840             // This requires the remaining cumulative probability and the probability for x+1.
841             return sampleWithinLongTail(u - cumulativeProbability[tabulatedX], x1, nextProbability(probabilityX, x1));
842         }
843 
844         /**
845          * Compute the next probability using a recurrence relation.
846          *
847          * <pre>
848          * p(x + 1) = p(x) * mean / (x + 1)
849          * </pre>
850          *
851          * @param px the probability of x
852          * @param x1 the value of x+1
853          * @return the probability of x+1
854          */
855         private double nextProbability(double px, int x1) {
856             return px * mean / x1;
857         }
858 
859         /**
860          * Compute the sample assuming it is in the long tail of the distribution.
861          *
862          * <p>This requires a check on the next probability value which is expected to
863          * asymptote to zero.
864          *
865          * @param u the remaining cumulative probability
866          * @param x the current sample value X
867          * @param p the current probability of the sample (p(X=x))
868          * @return the sample X
869          */
870         private int sampleWithinLongTail(double u, int x, double p) {
871             // Note on the algorithm:
872             // - X is the unknown sample deviate (the output of the algorithm)
873             // - x is the current value from the distribution
874             // - p is the probability of the current value x, p(X=x)
875             // - u is effectively the cumulative probability that the sample X
876             // is equal or above the current value x, p(X>=x)
877             // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x
878             while (u > p) {
879                 // Update the remaining cumulative probability
880                 u -= p;
881                 p = nextProbability(p, ++x);
882                 // The algorithm listed in Kemp (1981) does not check that the rolling
883                 // probability is positive. This check is added to ensure no errors when the
884                 // limit of the summation 1 - sum(p(x)) is above 0 due to cumulative error in
885                 // floating point arithmetic when in the long tail of the distribution.
886                 if (p == 0) {
887                     break;
888                 }
889             }
890             return x;
891         }
892     }
893 
894     /**
895      * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson distribution</a>.
896      *
897      * <ul>
898      *  <li>
899      *   For small means, a Poisson process is simulated using uniform deviates, as
900      *   described in Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming,
901      *   Volume 2. Addison Wesley.
902      *  </li>
903      * </ul>
904      *
905      * <p>This sampler is suitable for {@code mean < 20}.</p>
906      *
907      * <p>Sampling uses {@link UniformRandomProvider#nextInt()} and 32-bit integer arithmetic.</p>
908      *
909      * @see <a href="https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables">
910      * Poisson random variables</a>
911      */
912     static class TinyMeanPoissonSampler
913         implements DiscreteSampler {
914         /** Pre-compute Poisson probability p(n=0) mapped to the range of a 32-bit unsigned fraction. */
915         private final long p0;
916         /** Underlying source of randomness. */
917         private final UniformRandomProvider rng;
918 
919         /**
920          * @param rng  Generator of uniformly distributed random numbers.
921          * @param mean Mean.
922          * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) * (1L << 32)}
923          * is not positive.
924          */
925         TinyMeanPoissonSampler(UniformRandomProvider rng,
926                                double mean) {
927             this.rng = rng;
928             if (mean <= 0) {
929                 throw new IllegalArgumentException();
930             }
931             // Math.exp(-mean) is the probability of a Poisson distribution for n=0 (p(n=0)).
932             // This is mapped to a 32-bit range as the numerator of a 32-bit fraction
933             // for use in optimised 32-bit arithmetic.
934             p0 = (long) (Math.exp(-mean) * 0x100000000L);
935             if (p0 == 0) {
936                 throw new IllegalArgumentException("No p(x=0) probability for mean: " + mean);
937             }
938         }
939 
940         /** {@inheritDoc} */
941         @Override
942         public int sample() {
943             int n = 0;
944             // The unsigned 32-bit sample represents the fraction x / 2^32 where x is [0,2^32-1].
945             // The upper bound is exclusive hence the fraction is a uniform deviate from [0,1).
946             long r = nextUnsigned32();
947             // The limit is the probability p(n=0) converted to an unsigned fraction.
948             while (r >= p0) {
949                 // Compute the fraction:
950                 //  r       [0,2^32)      2^32
951                 // ----  *  --------   /  ----
952                 // 2^32       2^32        2^32
953                 // This rounds down the fraction numerator when the lower 32-bits are discarded.
954                 r = (r * nextUnsigned32()) >>> 32;
955                 n++;
956             }
957             // Ensure the value is positive in the worst case scenario of a broken
958             // generator that returns 0xffffffff for each sample. This will cause
959             // the integer counter to overflow 2^31-1 but not exceed 2^32. The fraction
960             // multiplication effectively turns r into a counter decrementing from 2^32-1
961             // to zero.
962             return (n >= 0) ? n : Integer.MAX_VALUE;
963         }
964 
965         /**
966          * Get the next unsigned 32-bit random integer.
967          *
968          * @return the random integer
969          */
970         private long nextUnsigned32() {
971             return rng.nextInt() & 0xffffffffL;
972         }
973     }
974 
975     // Benchmarks methods below.
976 
977     /**
978      * Baseline for the JMH timing overhead for production of an {@code int} value.
979      *
980      * @return the {@code int} value
981      */
982     @Benchmark
983     public int baselineInt() {
984         return value;
985     }
986 
987     /**
988      * Baseline for {@link Math#exp(double)}.
989      *
990      * @param mean the mean
991      * @return the value
992      */
993     @Benchmark
994     public double baselineExp(Means mean) {
995         return Math.exp(-mean.getMean());
996     }
997 
998     /**
999      * Run the sampler.
1000      *
1001      * @param sources Source of randomness.
1002      * @return the sample value
1003      */
1004     @Benchmark
1005     public int sample(Sources sources) {
1006         return sources.getSampler().sample();
1007     }
1008 
1009     /**
1010      * Run the sampler.
1011      *
1012      * @param sources Source of randomness.
1013      * @return the sample value
1014      */
1015     @Benchmark
1016     public int singleSample(Sources sources) {
1017         return sources.createSampler().sample();
1018     }
1019 }