1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.numbers.examples.jmh.core; 19 20 import java.util.concurrent.TimeUnit; 21 import java.util.function.DoubleBinaryOperator; 22 23 import org.apache.commons.rng.UniformRandomProvider; 24 import org.apache.commons.rng.simple.RandomSource; 25 import org.openjdk.jmh.annotations.Benchmark; 26 import org.openjdk.jmh.annotations.BenchmarkMode; 27 import org.openjdk.jmh.annotations.Fork; 28 import org.openjdk.jmh.annotations.Measurement; 29 import org.openjdk.jmh.annotations.Mode; 30 import org.openjdk.jmh.annotations.OutputTimeUnit; 31 import org.openjdk.jmh.annotations.Param; 32 import org.openjdk.jmh.annotations.Scope; 33 import org.openjdk.jmh.annotations.Setup; 34 import org.openjdk.jmh.annotations.State; 35 import org.openjdk.jmh.annotations.Warmup; 36 import org.openjdk.jmh.infra.Blackhole; 37 38 /** 39 * Executes a benchmark to measure the speed of operations in the {@link LinearCombination} class. 40 * Benchmarks focus on the sticky summation of two double values. 41 * 42 * <p>Details of the sticky bit can be found in: 43 * <blockquote> 44 * Coonen, J.T., "An Implementation Guide to a Proposed Standard for Floating Point 45 * Arithmetic", Computer, Vol. 13, No. 1, Jan. 1980, pp 68-79. 46 * </blockquote> 47 */ 48 @BenchmarkMode(Mode.AverageTime) 49 @OutputTimeUnit(TimeUnit.NANOSECONDS) 50 @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 51 @Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 52 @State(Scope.Benchmark) 53 @Fork(value = 1, jvmArgs = {"-server", "-Xms512M", "-Xmx512M"}) 54 public class StickySumPerformance { 55 /** The mask for the sign bit and the mantissa. */ 56 private static final long SIGN_MATISSA_MASK = 0x800f_ffff_ffff_ffffL; 57 58 /** Constant for no method. */ 59 private static final String NONE = "none"; 60 /** Constant for branched method. */ 61 private static final String BRANCHED = "branched"; 62 /** Constant for branchless method. */ 63 private static final String BRANCHLESS = "branchless"; 64 /** Constant for single branch method based on the low bit of the high part. */ 65 private static final String BRANCH_ON_HI = "branch_on_hi"; 66 /** Constant for single branch method based on the low part. */ 67 private static final String BRANCH_ON_LO = "branch_on_lo"; 68 69 /** 70 * The factors to sum. 71 */ 72 @State(Scope.Benchmark) 73 public static class BiFactors { 74 /** The exponent for small numbers. */ 75 private static final long EXP = Double.doubleToRawLongBits(1.0); 76 77 /** 78 * The count of sums. 79 */ 80 @Param({"10000"}) 81 private int size; 82 83 /** 84 * The fraction of numbers that have a zero round-off. 85 */ 86 @Param({"0", "0.1"}) 87 private double zeroRoundoff; 88 89 /** Factors. */ 90 private double[] a; 91 92 /** 93 * Gets the factors to be summed as pairs. The array length will be even. 94 * 95 * @return Factors. 96 */ 97 public double[] getFactors() { 98 return a; 99 } 100 101 /** 102 * Create the factors. 103 */ 104 @Setup 105 public void setup() { 106 final UniformRandomProvider rng = RandomSource.XO_RO_SHI_RO_1024_PP.create(); 107 a = new double[size * 2]; 108 // Report on the dataset 109 int nonZero = 0; 110 int unsetSticky = 0; 111 for (int i = 0; i < a.length; i += 2) { 112 // Create data similar to the summation of an expansion. 113 // E.g. Generate 2 numbers with large differences in exponents. 114 // Their sum should have a round-off term containing many bits. 115 final double x = nextDouble(rng); 116 // The round-off can conditionally be forced to zero. 117 final double y = (rng.nextDouble() < zeroRoundoff) ? 118 0.0 : 119 nextDouble(rng) * 0x1.0p52; 120 121 // Initial expansion 122 double e1 = x + y; 123 double e0 = DoublePrecision.twoSumLow(x, y, e1); 124 125 // Validate methods 126 final double expected = fastSumWithStickyBitBranched(e0, e1); 127 assertEqual(expected, fastSumWithStickyBitBranchless(e0, e1), BRANCHLESS); 128 assertEqual(expected, fastSumWithStickyBitBranchedOnHigh(e0, e1), BRANCH_ON_HI); 129 assertEqual(expected, fastSumWithStickyBitBranchedOnLow(e0, e1), BRANCH_ON_LO); 130 131 // Lower parts of expansion for use in sticky sum 132 a[i] = e0; 133 a[i + 1] = e1; 134 135 // Check the sum and round-off 136 final double sum = e1 + e0; 137 final double r = e0 - (sum - e1); 138 if (r != 0) { 139 nonZero++; 140 } 141 if ((Double.doubleToRawLongBits(sum) & 0x1) == 0) { 142 unsetSticky++; 143 } 144 } 145 // CHECKSTYLE: stop Regexp 146 System.out.printf("%n%nNon-zero %d/%d (%.3f) : Unset sticky %d/%d (%.3f)%n%n", 147 nonZero, size, 148 (double) nonZero / size, unsetSticky, size, (double) unsetSticky / size); 149 // CHECKSTYLE: resume Regexp 150 } 151 152 /** 153 * Create the next double in the range [1, 2). All mantissa bits have an equal 154 * probability of being set. 155 * 156 * @param rng Generator of random numbers. 157 * @return the double 158 */ 159 private static double nextDouble(UniformRandomProvider rng) { 160 final long bits = rng.nextLong() & SIGN_MATISSA_MASK; 161 return Double.longBitsToDouble(bits | EXP); 162 } 163 164 /** 165 * Assert the two values are equal. 166 * 167 * @param expected Expected value 168 * @param actual Actual value 169 * @param msg Error message 170 */ 171 private static void assertEqual(double expected, double actual, String msg) { 172 if (Double.compare(expected, actual) != 0) { 173 throw new IllegalStateException("Methods do not match: " + msg); 174 } 175 } 176 } 177 178 /** 179 * The summation method. 180 */ 181 @State(Scope.Benchmark) 182 public static class SumMethod { 183 /** 184 * The name of the method. 185 */ 186 @Param({NONE, BRANCHED, BRANCHLESS, BRANCH_ON_HI, BRANCH_ON_LO}) 187 private String name; 188 189 /** The function. */ 190 private DoubleBinaryOperator fun; 191 192 /** 193 * Gets the function. 194 * 195 * @return the function 196 */ 197 public DoubleBinaryOperator getFunction() { 198 return fun; 199 } 200 201 /** 202 * Create the function. 203 */ 204 @Setup 205 public void setup() { 206 if (NONE.equals(name)) { 207 fun = (a, b) -> a + b; 208 } else if (BRANCHED.equals(name)) { 209 fun = StickySumPerformance::fastSumWithStickyBitBranched; 210 } else if (BRANCHLESS.equals(name)) { 211 fun = StickySumPerformance::fastSumWithStickyBitBranchless; 212 } else if (BRANCH_ON_HI.equals(name)) { 213 fun = StickySumPerformance::fastSumWithStickyBitBranchedOnHigh; 214 } else if (BRANCH_ON_LO.equals(name)) { 215 fun = StickySumPerformance::fastSumWithStickyBitBranchedOnLow; 216 } else { 217 throw new IllegalStateException("Unknown sum method: " + name); 218 } 219 } 220 } 221 222 /** 223 * Compute the sum of two numbers {@code a} and {@code b} using 224 * Dekker's two-sum algorithm. The values are required to be ordered by magnitude 225 * {@code |a| >= |b|}. The result is adjusted to set the lowest bit as a sticky 226 * bit that summarises the magnitude of the round-off that were lost. The 227 * result is not the correctly rounded result; it is intended the result is to 228 * be used in an addition with a value with a greater magnitude exponent. This 229 * addition will have exact round-to-nearest, ties-to-even rounding taking account 230 * of bits lots in the previous sum. 231 * 232 * @param a First part of sum. 233 * @param b Second part of sum. 234 * @return <code>b - (sum - a)</code> 235 * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps"> 236 * Shewchuk (1997) Theorum 6</a> 237 */ 238 private static double fastSumWithStickyBitBranched(double a, double b) { 239 double sum = a + b; 240 // bVirtual = sum - a 241 // b - bVirtual == b round-off 242 final double r = b - (sum - a); 243 244 if (r != 0) { 245 // Bits will be lost. 246 // In floating-point arithmetic the sticky bit is the bit-wise OR 247 // of the rest of the binary bits that cannot be stored in the 248 // preliminary representation of the result: 249 // 250 // sgn | exp | V | N | .............. | L | G | R | S 251 // 252 // sgn : sign bit 253 // exp : exponent 254 // V : overflow for significand field 255 // N and L : most and least significant bits of the result 256 // G and R : the two bits beyond 257 // S : sticky bit, bitwise OR of all bits thereafter 258 // 259 // The sticky bit is a flag indicating if there is more magnitude beyond 260 // the last bits. Here the round-off is signed so we have to consider the 261 // sign of the sum and round-off together and either add the sticky or 262 // remove it. The final bit is thus used to push up the next addition using 263 // the sum to a higher value, or down to a lower value, when tie breaking for 264 // the correct round-to-nearest, ties-to-even result. 265 long hi = Double.doubleToRawLongBits(sum); 266 // Can only set a sticky bit if the bit is not set. 267 if ((hi & 0x1) == 0) { 268 // In a standard extended precision result for (a+b) the bits are extra 269 // magnitude lost and the sticky bit is positive. 270 // Here the round-off magnitude (r) can be negative so the sticky 271 // bit should be added (same sign) or subtracted (different sign). 272 if (sum > 0) { 273 hi += (r > 0) ? 1 : -1; 274 } else { 275 hi += (r < 0) ? 1 : -1; 276 } 277 sum = Double.longBitsToDouble(hi); 278 } 279 } 280 return sum; 281 } 282 283 /** 284 * Compute the sum of two numbers {@code a} and {@code b} using 285 * Dekker's two-sum algorithm. The values are required to be ordered by magnitude 286 * {@code |a| >= |b|}. The result is adjusted to set the lowest bit as a sticky 287 * bit that summarises the magnitude of the round-off that were lost. The 288 * result is not the correctly rounded result; it is intended the result is to 289 * be used in an addition with a value with a greater magnitude exponent. This 290 * addition will have exact round-to-nearest, ties-to-even rounding taking account 291 * of bits lots in the previous sum. 292 * 293 * @param a First part of sum. 294 * @param b Second part of sum. 295 * @return <code>b - (sum - a)</code> 296 * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps"> 297 * Shewchuk (1997) Theorum 6</a> 298 */ 299 private static double fastSumWithStickyBitBranchless(double a, double b) { 300 final double sum = a + b; 301 // bVirtual = sum - a 302 // b - bVirtual == b round-off 303 final double r = b - (sum - a); 304 305 // In floating-point arithmetic the sticky bit is the bit-wise OR 306 // of the rest of the binary bits that cannot be stored in the 307 // preliminary representation of the result: 308 // 309 // sgn | exp | V | N | .............. | L | G | R | S 310 // 311 // sgn : sign bit 312 // exp : exponent 313 // V : overflow for significand field 314 // N and L : most and least significant bits of the result 315 // G and R : the two bits beyond 316 // S : sticky bit, bitwise OR of all bits thereafter 317 // 318 // The sticky bit is a flag indicating if there is more magnitude beyond 319 // the last bits. 320 // 321 // Here the round-off is signed so we have to consider the 322 // sign of the sum and round-off together and either add the sticky or 323 // remove it. The final bit is thus used to push up the next addition using 324 // the sum to a higher value, or down to a lower value, when tie breaking for 325 // the correct round-to-nearest, ties-to-even result. 326 // 327 // One extra consideration: sum is already rounded. Since we are using the 328 // last bit to store a sticky bit then if the final bit is 1 then this was 329 // not created by a ties-to-even rounding and is already a sticky bit. 330 331 // Compute the sticky bit addition: 332 // sign sum last bit sum sign r magnitude r sticky 333 // x 1 x x +0 334 // 335 // 1 0 1 1 +1 336 // 1 0 1 0 +0 337 // 1 0 0 1 -1 338 // 1 0 0 0 +0 339 // 0 0 1 1 -1 340 // 0 0 1 0 +0 341 // 0 0 0 1 +1 342 // 0 0 0 0 +0 343 // 344 // Magnitude of r is computed by bitwise OR of the 63-bits from exponent+mantissa 345 // Sign of sum and r is the sign-bit of sum or r 346 347 final long hi = Double.doubleToRawLongBits(sum); 348 349 // Note: >50% of the time all code below here is redundant 350 //if ((hi & 0x1) == 0x1) { 351 // // Already sticky 352 // return sum; 353 //} 354 355 final long lo = Double.doubleToRawLongBits(r); 356 357 // OR compress least significant 63-bits into lowest bit 358 long sticky = lo; 359 sticky |= sticky >>> 31; // Discard sign bit 360 sticky |= sticky >>> 16; 361 sticky |= sticky >>> 8; 362 sticky |= sticky >>> 4; 363 sticky |= sticky >>> 2; 364 sticky |= sticky >>> 1; // final sticky bit is in position 0 365 366 // AND with the inverse of the trailing bit from hi to set it to zero 367 // if the last bit in hi is 1 (already sticky). 368 sticky = sticky & ~hi; 369 370 // Clear the rest. Sticky is now 0 if r was 0.0; or 1 if r was non-zero. 371 sticky = sticky & 0x1; 372 373 // The sign bit is created as + or - using the XOR of hi and lo. 374 // Signed shift will create a flag: -1 to negate, else 0. 375 final long fNegate = (hi ^ lo) >> 63; 376 377 // Conditionally negate a value without branching: 378 // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate 379 // (Logic updated since fNegate is already negative.) 380 // (1 ^ 0) - 0 = 1 - 0 = 1 381 // (0 ^ 0) - 0 = 0 - 0 = 0 382 // (1 ^ -1) - -1 = -2 - -1 = -1 383 // (0 ^ -1) - -1 = -1 - -1 = 0 384 sticky = (sticky ^ fNegate) - fNegate; 385 386 return Double.longBitsToDouble(hi + sticky); 387 } 388 389 /** 390 * Compute the sum of two numbers {@code a} and {@code b} using 391 * Dekker's two-sum algorithm. The values are required to be ordered by magnitude 392 * {@code |a| >= |b|}. The result is adjusted to set the lowest bit as a sticky 393 * bit that summarises the magnitude of the round-off that were lost. The 394 * result is not the correctly rounded result; it is intended the result is to 395 * be used in an addition with a value with a greater magnitude exponent. This 396 * addition will have exact round-to-nearest, ties-to-even rounding taking account 397 * of bits lots in the previous sum. 398 * 399 * @param a First part of sum. 400 * @param b Second part of sum. 401 * @return <code>b - (sum - a)</code> 402 * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps"> 403 * Shewchuk (1997) Theorum 6</a> 404 */ 405 private static double fastSumWithStickyBitBranchedOnHigh(double a, double b) { 406 final double sum = a + b; 407 // bVirtual = sum - a 408 // b - bVirtual == b round-off 409 final double r = b - (sum - a); 410 411 // In floating-point arithmetic the sticky bit is the bit-wise OR 412 // of the rest of the binary bits that cannot be stored in the 413 // preliminary representation of the result: 414 // 415 // sgn | exp | V | N | .............. | L | G | R | S 416 // 417 // sgn : sign bit 418 // exp : exponent 419 // V : overflow for significand field 420 // N and L : most and least significant bits of the result 421 // G and R : the two bits beyond 422 // S : sticky bit, bitwise OR of all bits thereafter 423 // 424 // The sticky bit is a flag indicating if there is more magnitude beyond 425 // the last bits. 426 // 427 // Here the round-off is signed so we have to consider the 428 // sign of the sum and round-off together and either add the sticky or 429 // remove it. The final bit is thus used to push up the next addition using 430 // the sum to a higher value, or down to a lower value, when tie breaking for 431 // the correct round-to-nearest, ties-to-even result. 432 // 433 // One extra consideration: sum is already rounded. Since we are using the 434 // last bit to store a sticky bit then if the final bit is 1 then this was 435 // not created by a ties-to-even rounding and is already a sticky bit. 436 437 // Compute the sticky bit addition: 438 // sign sum last bit sum sign r magnitude r sticky 439 // x 1 x x +0 440 // 441 // 1 0 1 1 +1 442 // 1 0 1 0 +0 443 // 1 0 0 1 -1 444 // 1 0 0 0 +0 445 // 0 0 1 1 -1 446 // 0 0 1 0 +0 447 // 0 0 0 1 +1 448 // 0 0 0 0 +0 449 // 450 // Magnitude of r is computed by bitwise OR of the 63-bits from exponent+mantissa 451 // Sign of sum and r is the sign-bit of sum or r 452 453 final long hi = Double.doubleToRawLongBits(sum); 454 455 if ((hi & 0x1) == 0x1) { 456 // Already sticky 457 return sum; 458 } 459 460 final long lo = Double.doubleToRawLongBits(r); 461 462 // OR compress least significant 63-bits into lowest bit 463 long sticky = lo; 464 sticky |= sticky >>> 31; // Discard sign bit 465 sticky |= sticky >>> 16; 466 sticky |= sticky >>> 8; 467 sticky |= sticky >>> 4; 468 sticky |= sticky >>> 2; 469 sticky |= sticky >>> 1; // final sticky bit is in position 0 470 471 // No requirement for AND with the inverse of the trailing bit from hi as we 472 // have eliminated that condition. 473 474 // Clear the rest. Sticky is now 0 if r was 0.0; or 1 if r was non-zero. 475 sticky = sticky & 0x1; 476 477 // The sign bit is created as + or - using the XOR of hi and lo. 478 // Signed shift will create a flag: -1 to negate, else 0. 479 final long fNegate = (hi ^ lo) >> 63; 480 481 // Conditionally negate a value without branching: 482 // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate 483 // (Logic updated since fNegate is already negative.) 484 // (1 ^ 0) - 0 = 1 - 0 = 1 485 // (0 ^ 0) - 0 = 0 - 0 = 0 486 // (1 ^ -1) - -1 = -2 - -1 = -1 487 // (0 ^ -1) - -1 = -1 - -1 = 0 488 sticky = (sticky ^ fNegate) - fNegate; 489 490 return Double.longBitsToDouble(hi + sticky); 491 492 } 493 494 /** 495 * Compute the sum of two numbers {@code a} and {@code b} using 496 * Dekker's two-sum algorithm. The values are required to be ordered by magnitude 497 * {@code |a| >= |b|}. The result is adjusted to set the lowest bit as a sticky 498 * bit that summarises the magnitude of the round-off that were lost. The 499 * result is not the correctly rounded result; it is intended the result is to 500 * be used in an addition with a value with a greater magnitude exponent. This 501 * addition will have exact round-to-nearest, ties-to-even rounding taking account 502 * of bits lots in the previous sum. 503 * 504 * @param a First part of sum. 505 * @param b Second part of sum. 506 * @return <code>b - (sum - a)</code> 507 * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps"> 508 * Shewchuk (1997) Theorum 6</a> 509 */ 510 private static double fastSumWithStickyBitBranchedOnLow(double a, double b) { 511 final double sum = a + b; 512 // bVirtual = sum - a 513 // b - bVirtual == b round-off 514 final double r = b - (sum - a); 515 516 // In floating-point arithmetic the sticky bit is the bit-wise OR 517 // of the rest of the binary bits that cannot be stored in the 518 // preliminary representation of the result: 519 // 520 // sgn | exp | V | N | .............. | L | G | R | S 521 // 522 // sgn : sign bit 523 // exp : exponent 524 // V : overflow for significand field 525 // N and L : most and least significant bits of the result 526 // G and R : the two bits beyond 527 // S : sticky bit, bitwise OR of all bits thereafter 528 // 529 // The sticky bit is a flag indicating if there is more magnitude beyond 530 // the last bits. 531 // 532 // Here the round-off is signed so we have to consider the 533 // sign of the sum and round-off together and either add the sticky or 534 // remove it. The final bit is thus used to push up the next addition using 535 // the sum to a higher value, or down to a lower value, when tie breaking for 536 // the correct round-to-nearest, ties-to-even result. 537 // 538 // One extra consideration: sum is already rounded. Since we are using the 539 // last bit to store a sticky bit then if the final bit is 1 then this was 540 // not created by a ties-to-even rounding and is already a sticky bit. 541 542 // Compute the sticky bit addition. 543 // This is only done when r is non-zero: 544 // sign sum last bit sum sign r sticky 545 // x 1 x +0 546 // 547 // 1 0 1 +1 548 // 1 0 0 -1 549 // 0 0 1 -1 550 // 0 0 0 +1 551 // 552 // Sign of sum and r is the sign-bit of sum or r 553 554 // In the majority of cases there is some round-off. 555 // Testing for non-zero allows the branch to assume the sticky bit magnitude 556 // is 1 (unless the final bit of hi is already set). 557 if (r != 0) { 558 // Bits will be lost. 559 final long hi = Double.doubleToRawLongBits(sum); 560 final long lo = Double.doubleToRawLongBits(r); 561 562 // Can only set a sticky bit if the bit is not already set. 563 // Flip the bits and extract the lowest bit. This is 1 if 564 // the sticky bit is not currently set. If already set then 565 // 'sticky' is zero and the rest of this execution path has 566 // no effect but we have eliminated the requirement to check the 567 // 50/50 branch: 568 // if ((hi & 0x1) == 0) { 569 // // set sticky ... 570 // } 571 int sticky = ~((int) hi) & 0x1; 572 573 // The sign bit is created as + or - using the XOR of hi and lo. 574 // Signed shift will create a flag: -1 to negate, else 0. 575 final int fNegate = (int) ((hi ^ lo) >> 63); 576 577 // Conditionally negate a value without branching: 578 // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate 579 // (Logic updated since fNegate is already negative.) 580 // (1 ^ 0) - 0 = 1 - 0 = 1 581 // (0 ^ 0) - 0 = 0 - 0 = 0 582 // (1 ^ -1) - -1 = -2 - -1 = -1 583 // (0 ^ -1) - -1 = -1 - -1 = 0 584 sticky = (sticky ^ fNegate) - fNegate; 585 586 return Double.longBitsToDouble(hi + sticky); 587 } 588 589 return sum; 590 } 591 592 // Benchmark methods. 593 594 /** 595 * Benchmark the sticky summation of two numbers. 596 * 597 * @param factors Factors. 598 * @param bh Data sink. 599 * @param method Summation method. 600 */ 601 @Benchmark 602 public void stickySum(BiFactors factors, Blackhole bh, SumMethod method) { 603 final DoubleBinaryOperator fun = method.getFunction(); 604 final double[] a = factors.getFactors(); 605 for (int i = 0; i < a.length; i += 2) { 606 bh.consume(fun.applyAsDouble(a[i], a[i + 1])); 607 } 608 } 609 }