1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.rng.examples.jmh.sampling.distribution; 19 20 import org.apache.commons.rng.UniformRandomProvider; 21 import org.apache.commons.rng.sampling.distribution.DiscreteSampler; 22 import org.apache.commons.rng.sampling.distribution.KempSmallMeanPoissonSampler; 23 import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler; 24 import org.apache.commons.rng.sampling.distribution.SmallMeanPoissonSampler; 25 import org.apache.commons.rng.simple.RandomSource; 26 27 import org.openjdk.jmh.annotations.Benchmark; 28 import org.openjdk.jmh.annotations.BenchmarkMode; 29 import org.openjdk.jmh.annotations.Fork; 30 import org.openjdk.jmh.annotations.Measurement; 31 import org.openjdk.jmh.annotations.Mode; 32 import org.openjdk.jmh.annotations.OutputTimeUnit; 33 import org.openjdk.jmh.annotations.Param; 34 import org.openjdk.jmh.annotations.Scope; 35 import org.openjdk.jmh.annotations.Setup; 36 import org.openjdk.jmh.annotations.State; 37 import org.openjdk.jmh.annotations.Warmup; 38 39 import java.util.concurrent.TimeUnit; 40 import java.util.function.Supplier; 41 42 /** 43 * Executes benchmark to compare the speed of generation of Poisson distributed random numbers. 44 */ 45 @BenchmarkMode(Mode.AverageTime) 46 @OutputTimeUnit(TimeUnit.NANOSECONDS) 47 @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 48 @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 49 @State(Scope.Benchmark) 50 @Fork(value = 1, jvmArgs = {"-server", "-Xms128M", "-Xmx128M"}) 51 public class PoissonSamplersPerformance { 52 /** 53 * The value for the baseline generation of an {@code int} value. 54 * 55 * <p>This must NOT be final!</p> 56 */ 57 private int value; 58 59 /** 60 * The mean for the call to {@link Math#exp(double)}. 61 */ 62 @State(Scope.Benchmark) 63 public static class Means { 64 /** 65 * The Poisson mean. This is set at a level where the small mean sampler is to be used 66 * in preference to the large mean sampler. 67 */ 68 @Param({"0.25", 69 "0.5", 70 "1", 71 "2", 72 "4", 73 "8", 74 "16", 75 "32"}) 76 private double mean; 77 78 /** 79 * Gets the mean. 80 * 81 * @return the mean 82 */ 83 public double getMean() { 84 return mean; 85 } 86 } 87 88 /** 89 * The {@link DiscreteSampler} samplers to use for testing. Creates the sampler for each 90 * {@link RandomSource} in the default 91 * {@link org.apache.commons.rng.examples.jmh.RandomSources RandomSources}. 92 */ 93 @State(Scope.Benchmark) 94 public static class Sources { 95 /** 96 * RNG providers. 97 * 98 * <p>Use different speeds.</p> 99 * 100 * @see <a href="https://commons.apache.org/proper/commons-rng/userguide/rng.html"> 101 * Commons RNG user guide</a> 102 */ 103 @Param({"WELL_44497_B", 104 //"ISAAC", 105 "XO_RO_SHI_RO_128_PLUS"}) 106 private String randomSourceName; 107 108 /** 109 * The sampler type. 110 */ 111 @Param({"SmallMeanPoissonSampler", 112 "KempSmallMeanPoissonSampler", 113 "BoundedKempSmallMeanPoissonSampler", 114 "KempSmallMeanPoissonSamplerP50", 115 "KempSmallMeanPoissonSamplerBinarySearch", 116 "KempSmallMeanPoissonSamplerGuideTable", 117 "LargeMeanPoissonSampler", 118 "TinyMeanPoissonSampler"}) 119 private String samplerType; 120 121 /** 122 * The Poisson mean. This is set at a level where the small mean sampler is to be used 123 * in preference to the large mean sampler. 124 */ 125 @Param({"0.25", 126 "0.5", 127 "1", 128 "2", 129 "4", 130 "8", 131 "16", 132 "32", 133 "64"}) 134 private double mean; 135 136 /** RNG. */ 137 private UniformRandomProvider generator; 138 139 /** The factory. */ 140 private Supplier<DiscreteSampler> factory; 141 142 /** The sampler. */ 143 private DiscreteSampler sampler; 144 145 /** 146 * @return The RNG. 147 */ 148 public UniformRandomProvider getGenerator() { 149 return generator; 150 } 151 152 /** 153 * Gets the sampler. 154 * 155 * @return The sampler. 156 */ 157 public DiscreteSampler getSampler() { 158 return sampler; 159 } 160 161 /** Instantiates sampler. */ 162 @Setup 163 public void setup() { 164 final RandomSource randomSource = RandomSource.valueOf(randomSourceName); 165 generator = randomSource.create(); 166 if ("SmallMeanPoissonSampler".equals(samplerType)) { 167 factory = () -> SmallMeanPoissonSampler.of(generator, mean); 168 } else if ("KempSmallMeanPoissonSampler".equals(samplerType)) { 169 factory = () -> KempSmallMeanPoissonSampler.of(generator, mean); 170 } else if ("BoundedKempSmallMeanPoissonSampler".equals(samplerType)) { 171 factory = () -> new BoundedKempSmallMeanPoissonSampler(generator, mean); 172 } else if ("KempSmallMeanPoissonSamplerP50".equals(samplerType)) { 173 factory = () -> new KempSmallMeanPoissonSamplerP50(generator, mean); 174 } else if ("KempSmallMeanPoissonSamplerBinarySearch".equals(samplerType)) { 175 factory = () -> new KempSmallMeanPoissonSamplerBinarySearch(generator, mean); 176 } else if ("KempSmallMeanPoissonSamplerGuideTable".equals(samplerType)) { 177 factory = () -> new KempSmallMeanPoissonSamplerGuideTable(generator, mean); 178 } else if ("LargeMeanPoissonSampler".equals(samplerType)) { 179 factory = () -> LargeMeanPoissonSampler.of(generator, mean); 180 } else if ("TinyMeanPoissonSampler".equals(samplerType)) { 181 factory = () -> new TinyMeanPoissonSampler(generator, mean); 182 } 183 sampler = factory.get(); 184 } 185 186 /** 187 * Creates a new instance of the sampler. 188 * 189 * @return The sampler. 190 */ 191 public DiscreteSampler createSampler() { 192 return factory.get(); 193 } 194 } 195 196 /** 197 * Kemp sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson 198 * distribution</a>. 199 * 200 * <ul> 201 * <li> 202 * For small means, a Poisson process is simulated using uniform deviates, as 203 * described in Kemp, A, W, (1981) Efficient Generation of Logarithmically Distributed 204 * Pseudo-Random Variables. Journal of the Royal Statistical Society. Vol. 30, No. 3, pp. 249-253. 205 * </li> 206 * </ul> 207 * 208 * <p>Note: This is similar to {@link KempSmallMeanPoissonSampler} but the sample is 209 * bounded by 1000 * mean.</p> 210 * 211 * @see <a href="https://www.jstor.org/stable/2346348">Kemp, A.W. (1981) JRSS Vol. 30, pp. 249-253</a> 212 */ 213 static class BoundedKempSmallMeanPoissonSampler 214 implements DiscreteSampler { 215 /** Underlying source of randomness. */ 216 private final UniformRandomProvider rng; 217 /** 218 * Pre-compute {@code Math.exp(-mean)}. 219 * Note: This is the probability of the Poisson sample {@code p(x=0)}. 220 */ 221 private final double p0; 222 /** Pre-compute {@code 1000 * mean} as the upper limit of the sample. */ 223 private final int limit; 224 /** 225 * The mean of the Poisson sample. 226 */ 227 private final double mean; 228 229 /** 230 * @param rng Generator of uniformly distributed random numbers. 231 * @param mean Mean. 232 * @throws IllegalArgumentException if {@code mean <= 0} or {@code mean > 700}. 233 */ 234 BoundedKempSmallMeanPoissonSampler(UniformRandomProvider rng, 235 double mean) { 236 if (mean <= 0) { 237 throw new IllegalArgumentException(); 238 } 239 240 p0 = Math.exp(-mean); 241 if (p0 == 0) { 242 throw new IllegalArgumentException(); 243 } 244 // The returned sample is bounded by 1000 * mean 245 limit = (int) Math.ceil(1000 * mean); 246 this.rng = rng; 247 this.mean = mean; 248 } 249 250 /** {@inheritDoc} */ 251 @Override 252 public int sample() { 253 // Note on the algorithm: 254 // - X is the unknown sample deviate (the output of the algorithm) 255 // - x is the current value from the distribution 256 // - p is the probability of the current value x, p(X=x) 257 // - u is effectively the cumulative probability that the sample X 258 // is equal or above the current value x, p(X>=x) 259 // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x 260 double u = rng.nextDouble(); 261 int x = 0; 262 double p = p0; 263 while (u > p) { 264 u -= p; 265 // Compute the next probability using a recurrence relation. 266 // p(x+1) = p(x) * mean / (x+1) 267 p *= mean / ++x; 268 // The algorithm listed in Kemp (1981) does not check that the rolling probability 269 // is positive. This check is added to ensure a simple bounds in the event that 270 // p == 0 271 if (x == limit) { 272 return x; 273 } 274 } 275 return x; 276 } 277 } 278 279 /** 280 * Kemp sampler for the Poisson distribution. 281 * 282 * <p>Note: This is a modification of the original algorithm by Kemp. It implements a hedge 283 * on the cumulative probability set at 50%. This saves computation in half of the samples.</p> 284 */ 285 static class KempSmallMeanPoissonSamplerP50 286 implements DiscreteSampler { 287 /** The value of p=0.5. */ 288 private static final double ONE_HALF = 0.5; 289 /** 290 * The threshold that defines the cumulative probability for the long tail of the 291 * Poisson distribution. Above this threshold the recurrence relation that computes the 292 * next probability must check that the p-value is not zero. 293 */ 294 private static final double LONG_TAIL_THRESHOLD = 0.999; 295 296 /** Underlying source of randomness. */ 297 private final UniformRandomProvider rng; 298 /** 299 * Pre-compute {@code Math.exp(-mean)}. 300 * Note: This is the probability of the Poisson sample {@code p(x=0)}. 301 */ 302 private final double p0; 303 /** 304 * The mean of the Poisson sample. 305 */ 306 private final double mean; 307 /** 308 * Pre-compute the cumulative probability for all samples up to and including x. 309 * This is F(x) = sum of p(X<=x). 310 * 311 * <p>The value is computed at approximately 50% allowing the algorithm to choose to start 312 * at value (x+1) approximately half of the time. 313 */ 314 private final double fx; 315 /** 316 * Store the value (x+1) corresponding to the next value after the cumulative probability is 317 * above 50%. 318 */ 319 private final int x1; 320 /** 321 * Store the probability value p(x+1), allowing the algorithm to start from the point x+1. 322 */ 323 private final double px1; 324 325 /** 326 * Create a new instance. 327 * 328 * <p>This is valid for means as large as approximately {@code 744}.</p> 329 * 330 * @param rng Generator of uniformly distributed random numbers. 331 * @param mean Mean. 332 * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) == 0}. 333 */ 334 KempSmallMeanPoissonSamplerP50(UniformRandomProvider rng, 335 double mean) { 336 if (mean <= 0) { 337 throw new IllegalArgumentException(); 338 } 339 340 this.rng = rng; 341 p0 = Math.exp(-mean); 342 this.mean = mean; 343 344 // Pre-compute a hedge value for the cumulative probability at approximately 50%. 345 // This is only done when p0 is less than the long tail threshold. 346 // The result is that the rolling probability computation should never hit the 347 // long tail where p reaches zero. 348 if (p0 <= LONG_TAIL_THRESHOLD) { 349 // Check the edge case for no probability 350 if (p0 == 0) { 351 throw new IllegalArgumentException(); 352 } 353 354 double p = p0; 355 int x = 0; 356 // Sum is cumulative probability F(x) = sum p(X<=x) 357 double sum = p; 358 while (sum < ONE_HALF) { 359 // Compute the next probability using a recurrence relation. 360 // p(x+1) = p(x) * mean / (x+1) 361 p *= mean / ++x; 362 sum += p; 363 } 364 fx = sum; 365 x1 = x + 1; 366 px1 = p * mean / x1; 367 } else { 368 // Always start at zero. 369 // Note: If NaN is input as the mean this path is executed and the sample is always zero. 370 fx = 0; 371 x1 = 0; 372 px1 = p0; 373 } 374 } 375 376 /** {@inheritDoc} */ 377 @Override 378 public int sample() { 379 // Note on the algorithm: 380 // - X is the unknown sample deviate (the output of the algorithm) 381 // - x is the current value from the distribution 382 // - p is the probability of the current value x, p(X=x) 383 // - u is effectively the cumulative probability that the sample X 384 // is equal or above the current value x, p(X>=x) 385 // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x 386 final double u = rng.nextDouble(); 387 388 if (u <= fx) { 389 // Sample from the lower half of the distribution starting at zero 390 return sampleBeforeLongTail(u, 0, p0); 391 } 392 393 // Sample from the upper half of the distribution starting at cumulative probability fx. 394 // This is reached when u > fx and sample X > x. 395 396 // If below the long tail threshold then omit the check on the asymptote of p -> zero 397 if (u <= LONG_TAIL_THRESHOLD) { 398 return sampleBeforeLongTail(u - fx, x1, px1); 399 } 400 401 return sampleWithinLongTail(u - fx, x1, px1); 402 } 403 404 /** 405 * Compute the sample assuming it is <strong>not</strong> in the long tail of the distribution. 406 * 407 * <p>This avoids a check on the next probability value assuming that the cumulative probability 408 * is at a level where the long tail of the Poisson distribution will not be reached. 409 * 410 * @param u the remaining cumulative probability (p(X>x)) 411 * @param x the current sample value X 412 * @param p the current probability of the sample (p(X=x)) 413 * @return the sample X 414 */ 415 private int sampleBeforeLongTail(double u, int x, double p) { 416 while (u > p) { 417 // Update the remaining cumulative probability 418 u -= p; 419 // Compute the next probability using a recurrence relation. 420 // p(x+1) = p(x) * mean / (x+1) 421 p *= mean / ++x; 422 // The algorithm listed in Kemp (1981) does not check that the rolling probability 423 // is positive (non-zero). This is omitted here on the assumption that the cumulative 424 // probability will not be in the long tail where the probability asymptotes to zero. 425 } 426 return x; 427 } 428 429 /** 430 * Compute the sample assuming it is in the long tail of the distribution. 431 * 432 * <p>This requires a check on the next probability value which is expected to asymptote to zero. 433 * 434 * @param u the remaining cumulative probability 435 * @param x the current sample value X 436 * @param p the current probability of the sample (p(X=x)) 437 * @return the sample X 438 */ 439 private int sampleWithinLongTail(double u, int x, double p) { 440 while (u > p) { 441 // Update the remaining cumulative probability 442 u -= p; 443 // Compute the next probability using a recurrence relation. 444 // p(x+1) = p(x) * mean / (x+1) 445 p *= mean / ++x; 446 // The algorithm listed in Kemp (1981) does not check that the rolling probability 447 // is positive. This check is added to ensure no errors when the limit of the summation 448 // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic when 449 // in the long tail of the distribution. 450 if (p == 0) { 451 return x; 452 } 453 } 454 return x; 455 } 456 } 457 458 /** 459 * Kemp sampler for the Poisson distribution. 460 * 461 * <p>Note: This is a modification of the original algorithm by Kemp. It stores the 462 * cumulative probability table for repeat use. The table is searched using a binary 463 * search algorithm.</p> 464 */ 465 static class KempSmallMeanPoissonSamplerBinarySearch 466 implements DiscreteSampler { 467 /** 468 * Store the cumulative probability table size for integer means so that 99.99% 469 * of the Poisson distribution is covered. This is done until the table size is 470 * 2 * mean. 471 * 472 * <p>At higher mean the expected range is mean + 4 * sqrt(mean). To avoid the sqrt 473 * the conservative limit of 2 * mean is used. 474 */ 475 private static final int[] TABLE_SIZE = { 476 /* mean 1 to 10. */ 477 8, 10, 12, 14, 16, 18, 20, 22, 24, 25, 478 /* mean 11 to 20. */ 479 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 480 }; 481 482 /** Underlying source of randomness. */ 483 private final UniformRandomProvider rng; 484 /** 485 * The mean of the Poisson sample. 486 */ 487 private final double mean; 488 /** 489 * Store the cumulative probability for all samples up to and including x. 490 * This is F(x) = sum of p(X<=x). 491 * 492 * <p>This table is initialized to store cumulative probabilities for x up to 2 * mean 493 * or 99.99% (whichever is larger). 494 */ 495 private final double[] fx; 496 /** 497 * Store the value x corresponding to the last stored cumulative probability. 498 */ 499 private int lastX; 500 /** 501 * Store the probability value p(x) corresponding to last stored cumulative probability, 502 * allowing the algorithm to start from the point x. 503 */ 504 private double px; 505 506 /** 507 * Create a new instance. 508 * 509 * <p>This is valid for means as large as approximately {@code 744}.</p> 510 * 511 * @param rng Generator of uniformly distributed random numbers. 512 * @param mean Mean. 513 * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) == 0}. 514 */ 515 KempSmallMeanPoissonSamplerBinarySearch(UniformRandomProvider rng, 516 double mean) { 517 if (mean <= 0) { 518 throw new IllegalArgumentException(); 519 } 520 521 px = Math.exp(-mean); 522 if (px > 0) { 523 this.rng = rng; 524 this.mean = mean; 525 526 // Initialise the cumulative probability table. 527 // The supported mean where p(x=0) > 0 sets a limit of around 744 so this will always be 528 // possible. 529 final int upperMean = (int) Math.ceil(mean); 530 fx = new double[(upperMean < TABLE_SIZE.length) ? TABLE_SIZE[upperMean] : upperMean * 2]; 531 fx[0] = px; 532 } else { 533 // This will catch NaN mean values 534 throw new IllegalArgumentException(); 535 } 536 } 537 538 /** {@inheritDoc} */ 539 @Override 540 public int sample() { 541 // Note on the algorithm: 542 // - X is the unknown sample deviate (the output of the algorithm) 543 // - x is the current value from the distribution 544 // - p is the probability of the current value x, p(X=x) 545 // - u is effectively the cumulative probability that the sample X 546 // is equal or above the current value x, p(X>=x) 547 // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x 548 final double u = rng.nextDouble(); 549 550 if (u <= fx[lastX]) { 551 // Binary search within known cumulative probability table. 552 // Find x so that u > f[x-1] and u <= f[x]. 553 // This is a looser search than Arrays.binarySearch: 554 // - The output is x = upper. 555 // - The pre-condition check ensures u <= f[upper] at the start. 556 // - The table stores probabilities where f[0] is non-zero. 557 // - u should be >= 0 (or the random generator is broken). 558 // - It avoids comparisons using Double.doubleToLongBits. 559 // - It avoids the low likelihood of equality between two doubles so uses 560 // only 1 compare per loop. 561 // - It uses a weighted middle anticipating that the cumulative distribution 562 // is skewed as the expected use case is a small mean. 563 int lower = 0; 564 int upper = lastX; 565 while (lower < upper) { 566 // Weighted middle is 1/4 of the range between lower and upper 567 final int mid = (3 * lower + upper) >>> 2; 568 final double midVal = fx[mid]; 569 if (u > midVal) { 570 // Change lower such that 571 // u > f[lower - 1] 572 lower = mid + 1; 573 } else { 574 // Change upper such that 575 // u <= f[upper] 576 upper = mid; 577 } 578 } 579 return upper; 580 } 581 582 // The sample is above x 583 int x1 = lastX + 1; 584 585 // Fill the cumulative probability table if possible 586 while (x1 < fx.length) { 587 // Compute the next probability using a recurrence relation. 588 // p(x+1) = p(x) * mean / (x+1) 589 px = nextProbability(px, x1); 590 // Compute the next cumulative probability f(x+1) and update 591 final double sum = fx[lastX] + px; 592 fx[++lastX] = sum; 593 // Check if this is the correct sample 594 if (u <= sum) { 595 return lastX; 596 } 597 x1 = lastX + 1; 598 } 599 600 // The sample is above the range of the cumulative probability table. 601 // Compute using the Kemp method. 602 // This requires the remaining cumulative probability and the probability for x+1. 603 return sampleWithinLongTail(u - fx[lastX], x1, nextProbability(px, x1)); 604 } 605 606 /** 607 * Compute the next probability using a recurrence relation. 608 * 609 * <pre> 610 * p(x + 1) = p(x) * mean / (x + 1) 611 * </pre> 612 * 613 * @param p the probability of x 614 * @param x1 the value of x+1 615 * @return the probability of x+1 616 */ 617 private double nextProbability(double p, int x1) { 618 return p * mean / x1; 619 } 620 621 /** 622 * Compute the sample assuming it is in the long tail of the distribution. 623 * 624 * <p>This requires a check on the next probability value which is expected to asymptote to zero. 625 * 626 * @param u the remaining cumulative probability 627 * @param x the current sample value X 628 * @param p the current probability of the sample (p(X=x)) 629 * @return the sample X 630 */ 631 private int sampleWithinLongTail(double u, int x, double p) { 632 while (u > p) { 633 // Update the remaining cumulative probability 634 u -= p; 635 p = nextProbability(p, ++x); 636 // The algorithm listed in Kemp (1981) does not check that the rolling probability 637 // is positive. This check is added to ensure no errors when the limit of the summation 638 // 1 - sum(p(x)) is above 0 due to cumulative error in floating point arithmetic when 639 // in the long tail of the distribution. 640 if (p == 0) { 641 return x; 642 } 643 } 644 return x; 645 } 646 } 647 /** 648 * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson 649 * distribution</a>. 650 * 651 * <p>Note: This is a modification of the original algorithm by Kemp. It stores the 652 * cumulative probability table for repeat use. The table is computed dynamically and 653 * searched using a guide table.</p> 654 */ 655 static class KempSmallMeanPoissonSamplerGuideTable implements DiscreteSampler { 656 /** 657 * Store the cumulative probability table size for integer means so that 99.99% of the 658 * Poisson distribution is covered. This is done until the table size is 2 * mean. 659 * 660 * <p>At higher mean the expected range is mean + 4 * sqrt(mean). To avoid the sqrt 661 * the conservative limit of 2 * mean is used. 662 */ 663 private static final int[] TABLE_SIZE = { 664 /* mean 1 to 10. */ 665 8, 10, 12, 14, 16, 18, 20, 22, 24, 25, 666 /* mean 11 to 20. */ 667 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, }; 668 669 /** Underlying source of randomness. */ 670 private final UniformRandomProvider rng; 671 /** 672 * The mean of the Poisson sample. 673 */ 674 private final double mean; 675 /** 676 * Store the cumulative probability for all samples up to and including x. This is 677 * F(x) = sum of p(X<=x). 678 * 679 * <p>This table is initialized to store cumulative probabilities for x up to 2 * mean 680 * or 99.99% (whichever is larger). 681 */ 682 private final double[] cumulativeProbability; 683 /** 684 * Store the value x corresponding to the last stored cumulative probability. 685 */ 686 private int tabulatedX; 687 /** 688 * Store the probability value p(x), allowing the algorithm to start from the point x. 689 */ 690 private double probabilityX; 691 /** 692 * The inverse cumulative probability guide table. This is a map between the cumulative 693 * probability (f(x)) and the value x. It is used to set the initial point for search 694 * of the cumulative probability table. 695 * 696 * <p>The index into the table is obtained using {@code f(x) * guideTable.length}. The value 697 * stored at the index is value {@code x+1} such that it is the exclusive upper bound 698 * on the sample value for searching the cumulative probability table. It requires the 699 * table search is towards zero.</p> 700 * 701 * <p>Note: Using x+1 ensures that the table can be zero filled upon initialisation and 702 * any index with zero has yet to be allocated.</p> 703 * 704 * <p>The guide table should never be used when the input f(x) is above the current range of 705 * the cumulative probability table. This would create an index that has not been 706 * allocated a mapping. 707 */ 708 private final int[] guideTable; 709 710 /** 711 * Create a new instance. 712 * 713 * <p>This is valid for means as large as approximately {@code 744}.</p> 714 * 715 * @param rng Generator of uniformly distributed random numbers. 716 * @param mean Mean. 717 * @throws IllegalArgumentException if {@code mean <= 0} or 718 * {@code Math.exp(-mean) == 0}. 719 */ 720 KempSmallMeanPoissonSamplerGuideTable(UniformRandomProvider rng, double mean) { 721 if (mean <= 0) { 722 throw new IllegalArgumentException("mean is not strictly positive: " + mean); 723 } 724 725 probabilityX = Math.exp(-mean); 726 if (probabilityX > 0) { 727 this.rng = rng; 728 this.mean = mean; 729 730 // Initialise the cumulative probability table. 731 // The supported mean where p(x=0) > 0 sets a limit of around 744 so the cast to int 732 // will always be possible. 733 final int upperMean = (int) Math.ceil(mean); 734 final int size = (upperMean < TABLE_SIZE.length) ? TABLE_SIZE[upperMean] : upperMean * 2; 735 cumulativeProbability = new double[size]; 736 cumulativeProbability[0] = probabilityX; 737 738 guideTable = new int[cumulativeProbability.length + 1]; 739 initializeGuideTable(probabilityX); 740 } else { 741 // This will catch NaN mean values 742 throw new IllegalArgumentException("No probability for mean " + mean); 743 } 744 } 745 746 /** 747 * Initialise the cumulative probability guide table. All guide indices at or below the 748 * index corresponding to the given probability will be set to 1. 749 * 750 * @param p0 the probability for x=0 751 */ 752 private void initializeGuideTable(double p0) { 753 for (int index = getGuideTableIndex(p0); index >= 0; index--) { 754 guideTable[index] = 1; 755 } 756 } 757 758 /** 759 * Fill the inverse cumulative probability guide table. Set the index corresponding to the 760 * given probability to x+1 establishing an exclusive upper bound on x for the probability. 761 * All unused guide indices below the index will also be set to x+1. 762 * 763 * @param p the cumulative probability 764 * @param x the sample value x 765 */ 766 private void updateGuideTable(double p, int x) { 767 // Always set the current index as the guide table is the exclusive upper bound 768 // for searching the cumulative probability table for any value p. 769 // Then fill any lower positions that are not allocated. 770 final int x1 = x + 1; 771 int index = getGuideTableIndex(p); 772 do { 773 guideTable[index--] = x1; 774 } while (index > 0 && guideTable[index] == 0); 775 } 776 777 /** 778 * Gets the guide table index for the probability. This is obtained using 779 * {@code p * (guideTable.length - 1)} so is inside the length of the table. 780 * 781 * @param p the cumulative probability 782 * @return the guide table index 783 */ 784 private int getGuideTableIndex(double p) { 785 // Note: This is only ever called when p is in the range of the cumulative 786 // probability table. So assume 0 <= p <= 1. 787 return (int) (p * (guideTable.length - 1)); 788 } 789 790 /** {@inheritDoc} */ 791 @Override 792 public int sample() { 793 // Note on the algorithm: 794 // 1. Compute a cumulative probability with a uniform deviate (u). 795 // 2. If the probability lies within the tabulated cumulative probabilities 796 // then find the sample value. 797 // 3. If possible expand the tabulated cumulative probabilities up to the value u. 798 // 4. If value u exceeds the capacity for the tabulated cumulative probabilities 799 // then compute the sample value dynamically without storing the probabilities. 800 801 // Compute a probability 802 final double u = rng.nextDouble(); 803 804 // Check the tabulated cumulative probability table 805 if (u <= cumulativeProbability[tabulatedX]) { 806 // Initialise the search using a guide table to find an initial guess. 807 // The table provides an upper bound on the sample for a known cumulative probability. 808 int sample = guideTable[getGuideTableIndex(u)] - 1; 809 // If u is above the sample probability (this occurs due to truncation) 810 // then return the next value up. 811 if (u > cumulativeProbability[sample]) { 812 return sample + 1; 813 } 814 // Search down 815 while (sample != 0 && u <= cumulativeProbability[sample - 1]) { 816 sample--; 817 } 818 return sample; 819 } 820 821 // The sample is above the tabulated cumulative probability for x 822 int x1 = tabulatedX + 1; 823 824 // Fill the cumulative probability table if possible 825 while (x1 < cumulativeProbability.length) { 826 probabilityX = nextProbability(probabilityX, x1); 827 // Compute the next cumulative probability f(x+1) and update 828 final double sum = cumulativeProbability[tabulatedX] + probabilityX; 829 cumulativeProbability[++tabulatedX] = sum; 830 updateGuideTable(sum, tabulatedX); 831 // Check if this is the correct sample 832 if (u <= sum) { 833 return tabulatedX; 834 } 835 x1 = tabulatedX + 1; 836 } 837 838 // The sample is above the range of the cumulative probability table. 839 // Compute using the Kemp method. 840 // This requires the remaining cumulative probability and the probability for x+1. 841 return sampleWithinLongTail(u - cumulativeProbability[tabulatedX], x1, nextProbability(probabilityX, x1)); 842 } 843 844 /** 845 * Compute the next probability using a recurrence relation. 846 * 847 * <pre> 848 * p(x + 1) = p(x) * mean / (x + 1) 849 * </pre> 850 * 851 * @param px the probability of x 852 * @param x1 the value of x+1 853 * @return the probability of x+1 854 */ 855 private double nextProbability(double px, int x1) { 856 return px * mean / x1; 857 } 858 859 /** 860 * Compute the sample assuming it is in the long tail of the distribution. 861 * 862 * <p>This requires a check on the next probability value which is expected to 863 * asymptote to zero. 864 * 865 * @param u the remaining cumulative probability 866 * @param x the current sample value X 867 * @param p the current probability of the sample (p(X=x)) 868 * @return the sample X 869 */ 870 private int sampleWithinLongTail(double u, int x, double p) { 871 // Note on the algorithm: 872 // - X is the unknown sample deviate (the output of the algorithm) 873 // - x is the current value from the distribution 874 // - p is the probability of the current value x, p(X=x) 875 // - u is effectively the cumulative probability that the sample X 876 // is equal or above the current value x, p(X>=x) 877 // So if p(X>=x) > p(X=x) the sample must be above x, otherwise it is x 878 while (u > p) { 879 // Update the remaining cumulative probability 880 u -= p; 881 p = nextProbability(p, ++x); 882 // The algorithm listed in Kemp (1981) does not check that the rolling 883 // probability is positive. This check is added to ensure no errors when the 884 // limit of the summation 1 - sum(p(x)) is above 0 due to cumulative error in 885 // floating point arithmetic when in the long tail of the distribution. 886 if (p == 0) { 887 break; 888 } 889 } 890 return x; 891 } 892 } 893 894 /** 895 * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson distribution</a>. 896 * 897 * <ul> 898 * <li> 899 * For small means, a Poisson process is simulated using uniform deviates, as 900 * described in Knuth (1969). Seminumerical Algorithms. The Art of Computer Programming, 901 * Volume 2. Addison Wesley. 902 * </li> 903 * </ul> 904 * 905 * <p>This sampler is suitable for {@code mean < 20}.</p> 906 * 907 * <p>Sampling uses {@link UniformRandomProvider#nextInt()} and 32-bit integer arithmetic.</p> 908 * 909 * @see <a href="https://en.wikipedia.org/wiki/Poisson_distribution#Generating_Poisson-distributed_random_variables"> 910 * Poisson random variables</a> 911 */ 912 static class TinyMeanPoissonSampler 913 implements DiscreteSampler { 914 /** Pre-compute Poisson probability p(n=0) mapped to the range of a 32-bit unsigned fraction. */ 915 private final long p0; 916 /** Underlying source of randomness. */ 917 private final UniformRandomProvider rng; 918 919 /** 920 * @param rng Generator of uniformly distributed random numbers. 921 * @param mean Mean. 922 * @throws IllegalArgumentException if {@code mean <= 0} or {@code Math.exp(-mean) * (1L << 32)} 923 * is not positive. 924 */ 925 TinyMeanPoissonSampler(UniformRandomProvider rng, 926 double mean) { 927 this.rng = rng; 928 if (mean <= 0) { 929 throw new IllegalArgumentException(); 930 } 931 // Math.exp(-mean) is the probability of a Poisson distribution for n=0 (p(n=0)). 932 // This is mapped to a 32-bit range as the numerator of a 32-bit fraction 933 // for use in optimised 32-bit arithmetic. 934 p0 = (long) (Math.exp(-mean) * 0x100000000L); 935 if (p0 == 0) { 936 throw new IllegalArgumentException("No p(x=0) probability for mean: " + mean); 937 } 938 } 939 940 /** {@inheritDoc} */ 941 @Override 942 public int sample() { 943 int n = 0; 944 // The unsigned 32-bit sample represents the fraction x / 2^32 where x is [0,2^32-1]. 945 // The upper bound is exclusive hence the fraction is a uniform deviate from [0,1). 946 long r = nextUnsigned32(); 947 // The limit is the probability p(n=0) converted to an unsigned fraction. 948 while (r >= p0) { 949 // Compute the fraction: 950 // r [0,2^32) 2^32 951 // ---- * -------- / ---- 952 // 2^32 2^32 2^32 953 // This rounds down the fraction numerator when the lower 32-bits are discarded. 954 r = (r * nextUnsigned32()) >>> 32; 955 n++; 956 } 957 // Ensure the value is positive in the worst case scenario of a broken 958 // generator that returns 0xffffffff for each sample. This will cause 959 // the integer counter to overflow 2^31-1 but not exceed 2^32. The fraction 960 // multiplication effectively turns r into a counter decrementing from 2^32-1 961 // to zero. 962 return (n >= 0) ? n : Integer.MAX_VALUE; 963 } 964 965 /** 966 * Get the next unsigned 32-bit random integer. 967 * 968 * @return the random integer 969 */ 970 private long nextUnsigned32() { 971 return rng.nextInt() & 0xffffffffL; 972 } 973 } 974 975 // Benchmarks methods below. 976 977 /** 978 * Baseline for the JMH timing overhead for production of an {@code int} value. 979 * 980 * @return the {@code int} value 981 */ 982 @Benchmark 983 public int baselineInt() { 984 return value; 985 } 986 987 /** 988 * Baseline for {@link Math#exp(double)}. 989 * 990 * @param mean the mean 991 * @return the value 992 */ 993 @Benchmark 994 public double baselineExp(Means mean) { 995 return Math.exp(-mean.getMean()); 996 } 997 998 /** 999 * Run the sampler. 1000 * 1001 * @param sources Source of randomness. 1002 * @return the sample value 1003 */ 1004 @Benchmark 1005 public int sample(Sources sources) { 1006 return sources.getSampler().sample(); 1007 } 1008 1009 /** 1010 * Run the sampler. 1011 * 1012 * @param sources Source of randomness. 1013 * @return the sample value 1014 */ 1015 @Benchmark 1016 public int singleSample(Sources sources) { 1017 return sources.createSampler().sample(); 1018 } 1019 }