1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.numbers.examples.jmh.core; 18 19 /** 20 * Computes double-length precision floating-point operations. 21 * 22 * <p>It is based on the 1971 paper 23 * <a href="https://doi.org/10.1007/BF01397083"> 24 * Dekker (1971) A floating-point technique for extending the available precision</a>. 25 */ 26 final class DoublePrecision { 27 /* 28 * Caveat: 29 * 30 * The code below uses many additions/subtractions that may 31 * appear redundant. However, they should NOT be simplified, as they 32 * do use IEEE754 floating point arithmetic rounding properties. 33 * 34 * Algorithms are based on computing the product or sum of two values x and y in 35 * extended precision. The standard result is stored using a double (high part z) and 36 * the round-off error (or low part zz) is stored in a second double, e.g: 37 * x * y = (z, zz); z + zz = x * y 38 * x + y = (z, zz); z + zz = x + y 39 * 40 * To sum multiple (z, zz) results ideally the parts are sorted in order of 41 * non-decreasing magnitude and summed. This is exact if each number's most significant 42 * bit is below the least significant bit of the next (i.e. does not 43 * overlap). Creating non-overlapping parts requires a rebalancing 44 * of adjacent pairs using a summation z + zz = (z1, zz1) iteratively through the parts 45 * (see Shewchuk (1997) Grow-Expansion and Expansion-Sum [1]). 46 * 47 * [1] Shewchuk (1997): Arbitrary Precision Floating-Point Arithmetic 48 * http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps 49 */ 50 51 /** 52 * The multiplier used to split the double value into high and low parts. From 53 * Dekker (1971): "The constant should be chosen equal to 2^(p - p/2) + 1, 54 * where p is the number of binary digits in the mantissa". Here p is 53 55 * and the multiplier is {@code 2^27 + 1}. 56 */ 57 private static final double MULTIPLIER = 1.34217729E8; 58 59 /** The upper limit above which a number may overflow during the split into a high part. 60 * Assuming the multiplier is above 2^27 and the maximum exponent is 1023 then a safe 61 * limit is a value with an exponent of (1023 - 27) = 2^996. */ 62 private static final double SAFE_UPPER = 0x1.0p996; 63 64 /** The scale to use when down-scaling during a split into a high part. 65 * This must be smaller than the inverse of the multiplier and a power of 2 for exact scaling. */ 66 private static final double DOWN_SCALE = 0x1.0p-30; 67 68 /** The scale to use when re-scaling during a split into a high part. 69 * This is the inverse of {@link #DOWN_SCALE}. */ 70 private static final double UP_SCALE = 0x1.0p30; 71 72 /** The mask to zero the lower 27-bits of a long . */ 73 private static final long ZERO_LOWER_27_BITS = 0xffff_ffff_f800_0000L; 74 75 /** The mask to extract the raw 11-bit exponent. 76 * The value must be shifted 52-bits to remove the mantissa bits. */ 77 private static final int EXP_MASK = 0x7ff; 78 79 /** The value 2046 converted for use if using {@link Integer#compareUnsigned(int, int)}. 80 * This requires adding {@link Integer#MIN_VALUE} to 2046. */ 81 private static final int CMP_UNSIGNED_2046 = Integer.MIN_VALUE + 2046; 82 83 /** The value -1 converted for use if using {@link Integer#compareUnsigned(int, int)}. 84 * This requires adding {@link Integer#MIN_VALUE} to -1. */ 85 private static final int CMP_UNSIGNED_MINUS_1 = Integer.MIN_VALUE - 1; 86 87 /** 88 * Represents a floating-point number with twice the precision of a {@code double}. 89 */ 90 static final class Quad { 91 // This is treated as a simple struct. 92 // CHECKSTYLE: stop VisibilityModifier 93 /** The high part of the number. */ 94 double hi; 95 /** The low part of the number. */ 96 double lo; 97 // CHECKSTYLE: resume VisibilityModifier 98 } 99 100 /** Private constructor. */ 101 private DoublePrecision() { 102 // intentionally empty. 103 } 104 105 /** 106 * Multiply the values {@code x} and {@code y} into a double-precision result {@code z}. 107 * It is assumed the numbers are normalized so no over/underflow will occurs. 108 * 109 * <p>Implements Dekker's mul12 method to split the numbers and multiply them 110 * in extended precision. 111 * 112 * <p>Note: The quad satisfies the condition {@code x * y == z.hi + z.lo}. The high 113 * part may be different from {@code x * y} by 1 ulp due to rounding. 114 * 115 * @param x First value 116 * @param y Second value 117 * @param z Result 118 */ 119 static void multiplyUnscaled(double x, double y, Quad z) { 120 // Note: The original mul12 algorithm avoids x * y and saves 1 multiplication. 121 double p; 122 p = x * MULTIPLIER; 123 final double hx = x - p + p; 124 final double lx = x - hx; 125 p = y * MULTIPLIER; 126 final double hy = y - p + p; 127 final double ly = y - hy; 128 p = hx * hy; 129 final double q = hx * ly + lx * hy; 130 z.hi = p + q; 131 z.lo = p - z.hi + q + lx * ly; 132 } 133 134 /** 135 * Multiply the values {@code x} and {@code y} into a double-precision result {@code c}. 136 * Scaling is performed on the numbers to protect against intermediate over/underflow. 137 * 138 * <p>The quadruple precision result has the standard double precision result 139 * {@code x * y} in the high part and the round-off in the low part, 140 * 141 * <p>Special cases: 142 * 143 * <ul> 144 * <li>If {@code x * y} is sub-normal or zero then the low part is 0.0. 145 * <li>If {@code x * y} is infinite or NaN then the low part is NaN. 146 * </ul> 147 * 148 * <p>Note: This does not represent the low part of infinity with zero. This is because the 149 * method is intended to be used for extended precision computations. The NaN low part 150 * signals that an extended precision computation using the result is invalid (i.e. the 151 * result of summation/multiplication of the parts will not be finite). 152 * 153 * @param x First value 154 * @param y Second value 155 * @param c Result 156 * @see DoublePrecision#productLowUnscaled(double, double, double) 157 */ 158 static void multiply(double x, double y, Quad c) { 159 // Special cases. Check the product. 160 final double xy = x * y; 161 if (isNotNormal(xy)) { 162 c.hi = xy; 163 // Returns 0.0 for sub-normal xy, otherwise NaN for inf/nan 164 c.lo = xy - xy; 165 return; 166 } 167 // Extract biased exponent and normalise. 168 // Sub-normals are scaled by 2^54 and the exponent adjusted. 169 // This is equivalent to the c function frexp which decomposes given floating 170 // point value arg into a normalized fraction and an integral power of two. 171 // Here we use a biased exponent as it is later adjusted when re-scaling. 172 long xb = Double.doubleToRawLongBits(x); 173 int xe = getBiasedExponent(xb); 174 double xs; 175 if (xe == 0) { 176 // Sub-normal. Scale up and extract again 177 xs = x * 0x1.0p54; 178 xb = Double.doubleToRawLongBits(xs); 179 xe = getBiasedExponent(xb) - 54; 180 } 181 xs = getNormalisedFraction(xb); 182 183 long yb = Double.doubleToRawLongBits(y); 184 int ye = getBiasedExponent(yb); 185 double ys; 186 if (ye == 0) { 187 // Sub-normal. Scale up and extract again 188 ys = y * 0x1.0p54; 189 yb = Double.doubleToRawLongBits(ys); 190 ye = getBiasedExponent(yb) - 54; 191 } 192 ys = getNormalisedFraction(yb); 193 194 // Compute hi as x*y. 195 // Thus if the standard precision result is finite (as verified in the initial test 196 // on x * y) then the extended precision result will be. 197 double z = xs * ys; 198 double zz = productLowUnscaled(xs, ys, z); 199 200 // Re-scale. The result is currently in the range [0.25, 1) so no checks for 201 // 0, nan, inf (the result exponent will be -2 or -1). 202 // Both exponents are currently biased so subtract 1023 to get the biased scale. 203 int scale = xe + ye - 1023; 204 // Compute scaling by multiplication so we can scale both together. 205 // If a single multiplication to a normal number then handle here. 206 if (scale <= 2046 && scale > 0) { 207 // Convert to a normalised power of 2 208 final double d = Double.longBitsToDouble(((long) scale) << 52); 209 z *= d; 210 zz *= d; 211 } else { 212 // Delegate to java.util.Math 213 // We have to adjust the biased scale to unbiased using the exponent offset 1023. 214 scale -= 1023; 215 z = Math.scalb(z, scale); 216 zz = Math.scalb(zz, scale); 217 } 218 219 // Final result. The hi part should be same as the IEEE754 result. 220 // assert z == xy; 221 c.hi = z; 222 c.lo = zz; 223 } 224 225 /** 226 * Checks if the number is not normal. This is functionally equivalent to: 227 * <pre> 228 * final double abs = Math.abs(a); 229 * return (abs <= Double.MIN_NORMAL || !(absXy <= Double.MAX_VALUE)); 230 * </pre> 231 * 232 * @param a The value. 233 * @return true if the value is not normal 234 */ 235 static boolean isNotNormal(double a) { 236 // Sub-normal numbers have a biased exponent of 0. 237 // Inf/NaN numbers have a biased exponent of 2047. 238 // Catch both cases by extracting the raw exponent, subtracting 1 239 // and compare unsigned (so 0 underflows to a large value). 240 final int baisedExponent = ((int) (Double.doubleToRawLongBits(a) >>> 52)) & EXP_MASK; 241 // Pre-compute the additions used by Integer.compareUnsigned 242 return baisedExponent + CMP_UNSIGNED_MINUS_1 >= CMP_UNSIGNED_2046; 243 } 244 245 /** 246 * Gets the exponent. 247 * 248 * @param bits the bits 249 * @return the exponent 250 */ 251 private static int getBiasedExponent(long bits) { 252 return (int)(bits >>> 52) & 0x7ff; 253 } 254 255 /** 256 * Gets the normalised fraction in the range [0.5, 1). 257 * 258 * @param bits the bits 259 * @return the exponent 260 */ 261 private static double getNormalisedFraction(long bits) { 262 // Mask out the exponent and set it to 1022. 263 return Double.longBitsToDouble((bits & 0x800f_ffff_ffff_ffffL) | 0x3fe0_0000_0000_0000L); 264 } 265 266 /** 267 * Implement Dekker's method to split a value into two parts. Multiplying by (2^s + 1) creates 268 * a big value from which to derive the two split parts. 269 * <pre> 270 * c = (2^s + 1) * a 271 * a_big = c - a 272 * a_hi = c - a_big 273 * a_lo = a - a_hi 274 * a = a_hi + a_lo 275 * </pre> 276 * 277 * <p>The multiplicand allows a p-bit value to be split into 278 * (p-s)-bit value {@code a_hi} and a non-overlapping (s-1)-bit value {@code a_lo}. 279 * Combined they have (p-1) bits of significand but the sign bit of {@code a_lo} 280 * contains a bit of information. The constant is chosen so that s is ceil(p/2) where 281 * the precision p for a double is 53-bits (1-bit of the mantissa is assumed to be 282 * 1 for a non sub-normal number) and s is 27. 283 * 284 * <p>This conversion uses scaling to avoid overflow in intermediate computations. 285 * 286 * <p>Splitting a NaN or infinite value will return NaN. Any finite value will return 287 * a finite value. 288 * 289 * @param value Value. 290 * @return the high part of the value. 291 */ 292 static double highPart(double value) { 293 // Avoid overflow 294 if (Math.abs(value) >= SAFE_UPPER) { 295 // Do scaling. 296 final double hi = highPartUnscaled(value * DOWN_SCALE) * UP_SCALE; 297 if (Double.isInfinite(hi)) { 298 // Number is too large. 299 // This occurs if value is infinite or close to Double.MAX_VALUE. 300 // Note that Dekker's split creates an approximating 26-bit number which may 301 // have an exponent 1 greater than the input value. This will overflow if the 302 // exponent is already +1023. Revert to the raw upper 26 bits of the 53-bit 303 // mantissa (including the assumed leading 1 bit). This conversion will result in 304 // the low part being a 27-bit significand and the potential loss of bits during 305 // addition and multiplication. (Contrast to the Dekker split which creates two 306 // 26-bit numbers with a bit of information moved to the sign of low.) 307 // The conversion will maintain Infinite in the high part where the resulting 308 // low part a_lo = a - a_hi = inf - inf = NaN. 309 return highPartSplit(value); 310 } 311 return hi; 312 } 313 // normal conversion 314 return highPartUnscaled(value); 315 } 316 317 /** 318 * Implement Dekker's method to split a value into two parts (see {@link #highPart(double)}). 319 * 320 * <p>This conversion does not use scaling and the result of overflow is NaN. Overflow 321 * may occur when the exponent of the input value is above 996. 322 * 323 * <p>Splitting a NaN or infinite value will return NaN. 324 * 325 * @param value Value. 326 * @return the high part of the value. 327 * @see Math#getExponent(double) 328 */ 329 static double highPartUnscaled(double value) { 330 final double c = MULTIPLIER * value; 331 return c - (c - value); 332 } 333 334 /** 335 * Implement a split using the upper and lower raw bits from the value. 336 * 337 * <p>Note: This method will not work for very small sub-normal numbers 338 * ({@code <= 27} bits) as the high part will be zero and the low part will 339 * have all the information. Methods that assume {@code hi > lo} will have 340 * undefined behaviour. 341 * 342 * <p>Splitting a NaN value will return NaN or infinite. Splitting an infinite 343 * value will return infinite. Any finite value will return a finite value. 344 * 345 * @param value Value. 346 * @return the high part of the value. 347 */ 348 static double highPartSplit(double value) { 349 return Double.longBitsToDouble(Double.doubleToRawLongBits(value) & ZERO_LOWER_27_BITS); 350 } 351 352 /** 353 * Compute the low part of the double length number {@code (z,zz)} for the exact 354 * product of {@code x} and {@code y}. This is equivalent to computing a {@code double} 355 * containing the magnitude of the rounding error when converting the exact 106-bit 356 * significand of the multiplication result to a 53-bit significand. 357 * 358 * <p>The method is written to be functionally similar to using a fused multiply add (FMA) 359 * operation to compute the low part, for example JDK 9's Math.fma function (note the sign 360 * change in the input argument for the product): 361 * <pre> 362 * double x = ...; 363 * double y = ...; 364 * double xy = x * y; 365 * double low1 = Math.fma(x, y, -xy); 366 * double low2 = DoublePrecision.productLow(x, y, xy); 367 * </pre> 368 * 369 * <p>Special cases: 370 * 371 * <ul> 372 * <li>If {@code x * y} is sub-normal or zero then the result is 0.0. 373 * <li>If {@code x * y} is infinite or NaN then the result is NaN. 374 * </ul> 375 * 376 * @param x First factor. 377 * @param y Second factor. 378 * @param xy Product of the factors (x * y). 379 * @return the low part of the product double length number 380 * @see #highPart(double) 381 * @see #productLow(double, double, double, double, double) 382 */ 383 static double productLow(double x, double y, double xy) { 384 // Verify the input. This must be NaN safe. 385 //assert Double.compare(x * y, xy) == 0 386 387 // If the number is sub-normal, inf or nan there is no round-off. 388 if (isNotNormal(xy)) { 389 // Returns 0.0 for sub-normal xy, otherwise NaN for inf/nan: 390 return xy - xy; 391 } 392 393 // The result xy is finite and normal. 394 // Use Dekker's mul12 algorithm that splits the values into high and low parts. 395 // Dekker's split using multiplication will overflow if the value is within 2^27 396 // of double max value. It can also produce 26-bit approximations that are larger 397 // than the input numbers for the high part causing overflow in hx * hy when 398 // x * y does not overflow. So we must scale down big numbers. 399 // We only have to scale the largest number as we know the product does not overflow 400 // (if one is too big then the other cannot be). 401 // We also scale if the product is close to overflow to avoid intermediate overflow. 402 // This could be done at a higher limit (e.g. Math.abs(xy) > Double.MAX_VALUE / 4) 403 // but is included here to have a single low probability branch condition. 404 405 // Add the absolute inputs for a single comparison. The sum will not be more than 406 // 3-fold higher than any component. 407 final double a = Math.abs(x); 408 final double b = Math.abs(y); 409 if (a + b + Math.abs(xy) >= SAFE_UPPER) { 410 // Only required to scale the largest number as x*y does not overflow. 411 if (a > b) { 412 return productLowUnscaled(x * DOWN_SCALE, y, xy * DOWN_SCALE) * UP_SCALE; 413 } 414 return productLowUnscaled(x, y * DOWN_SCALE, xy * DOWN_SCALE) * UP_SCALE; 415 } 416 417 // No scaling required 418 return productLowUnscaled(x, y, xy); 419 } 420 421 /** 422 * Compute the low part of the double length number {@code (z,zz)} for the exact 423 * product of {@code x} and {@code y}. This is equivalent to computing a {@code double} 424 * containing the magnitude of the rounding error when converting the exact 106-bit 425 * significand of the multiplication result to a 53-bit significand. 426 * 427 * <p>Special cases: 428 * 429 * <ul> 430 * <li>If {@code x * y} is sub-normal or zero then the result is 0.0. 431 * <li>If {@code x * y} is infinite, and {@code x} and {@code y} are finite then the 432 * result is the opposite infinity. 433 * <li>If {@code x} or {@code y} are infinite then the result is NaN. 434 * <li>If {@code x * y} is NaN then the result is NaN. 435 * </ul> 436 * 437 * @param x First factor. 438 * @param y Second factor. 439 * @param xy Product of the factors (x * y). 440 * @return the low part of the product double length number 441 * @see #highPart(double) 442 * @see #productLow(double, double, double, double, double) 443 */ 444 static double productLow1(double x, double y, double xy) { 445 // Verify the input. This must be NaN safe. 446 //assert Double.compare(x * y, xy) == 0 447 448 // Logic as per productLow but with no check for sub-normal or NaN. 449 final double a = Math.abs(x); 450 final double b = Math.abs(y); 451 if (a + b + Math.abs(xy) >= SAFE_UPPER) { 452 // Only required to scale the largest number as x*y does not overflow. 453 if (a > b) { 454 return productLowUnscaled(x * DOWN_SCALE, y, xy * DOWN_SCALE) * UP_SCALE; 455 } 456 return productLowUnscaled(x, y * DOWN_SCALE, xy * DOWN_SCALE) * UP_SCALE; 457 } 458 459 // No scaling required 460 return productLowUnscaled(x, y, xy); 461 } 462 /** 463 * Compute the low part of the double length number {@code (z,zz)} for the exact 464 * product of {@code x} and {@code y}. This is equivalent to computing a {@code double} 465 * containing the magnitude of the rounding error when converting the exact 106-bit 466 * significand of the multiplication result to a 53-bit significand. 467 * 468 * <p>The method is written to be functionally similar to using a fused multiply add (FMA) 469 * operation to compute the low part, for example JDK 9's Math.fma function (note the sign 470 * change in the input argument for the product): 471 * <pre> 472 * double x = ...; 473 * double y = ...; 474 * double xy = x * y; 475 * double low1 = Math.fma(x, y, -xy); 476 * double low2 = DoublePrecision.productLow(x, y, xy); 477 * </pre> 478 * 479 * <p>Special cases: 480 * 481 * <ul> 482 * <li>If {@code x * y} is sub-normal or zero then the result is 0.0. 483 * <li>If {@code x * y} is infinite or NaN then the result is NaN. 484 * </ul> 485 * 486 * @param x First factor. 487 * @param y Second factor. 488 * @param xy Product of the factors (x * y). 489 * @return the low part of the product double length number 490 * @see #highPart(double) 491 * @see #productLow(double, double, double, double, double) 492 */ 493 static double productLow2(double x, double y, double xy) { 494 // Verify the input. This must be NaN safe. 495 //assert Double.compare(x * y, xy) == 0 496 497 // If the number is sub-normal, inf or nan there is no round-off. 498 if (isNotNormal(xy)) { 499 // Returns 0.0 for sub-normal xy, otherwise NaN for inf/nan: 500 return xy - xy; 501 } 502 503 // The result xy is finite and normal. 504 // Use Dekker's mul12 algorithm that splits the values into high and low parts. 505 // Dekker's split using multiplication will overflow if the value is within 2^27 506 // of double max value. It can also produce 26-bit approximations that are larger 507 // than the input numbers for the high part causing overflow in hx * hy when 508 // x * y does not overflow. So we must scale down big numbers. 509 // We only have to scale the largest number as we know the product does not overflow 510 // (if one is too big then the other cannot be). 511 // Also scale if the product is close to max value. 512 513 if (Math.abs(x) >= SAFE_UPPER) { 514 return productLowUnscaled(x * DOWN_SCALE, y, xy * DOWN_SCALE) * UP_SCALE; 515 } 516 if (Math.abs(y) >= SAFE_UPPER || Math.abs(xy) >= Double.MAX_VALUE / 4) { 517 return productLowUnscaled(x, y * DOWN_SCALE, xy * DOWN_SCALE) * UP_SCALE; 518 } 519 // No scaling required 520 return productLowUnscaled(x, y, xy); 521 } 522 523 /** 524 * Compute the low part of the double length number {@code (z,zz)} for the exact 525 * product of {@code x} and {@code y} using Dekker's mult12 algorithm. The standard 526 * precision product {@code x*y} must be provided. The numbers {@code x} and {@code y} 527 * are split into high and low parts using Dekker's algorithm. 528 * 529 * <p>This method performs scaling in Dekker's split for large finite numbers to avoid 530 * overflow when generating the high part of the number. 531 * 532 * <p>Warning: Dekker's split can produce high parts that are larger in magnitude than 533 * the input number as the high part is a 26-bit approximation of the number. Thus it is 534 * possible that the standard product {@code x * y} does not overflow but the extended 535 * precision sub-product {@code hx * hy} does overflow. This method should not be 536 * considered safe for all combinations where {@code Double.isFinite(x * y)} is true. 537 * The method is used for benchmarking. 538 * 539 * @param x First factor. 540 * @param y Second factor. 541 * @param xy Product of the factors (x * y). 542 * @return the low part of the product double length number 543 * @see #highPart(double) 544 * @see #productLow(double, double, double, double, double) 545 */ 546 static double productLow3(double x, double y, double xy) { 547 // Split the numbers using Dekker's algorithm 548 final double hx = highPart(x); 549 final double lx = x - hx; 550 551 final double hy = highPart(y); 552 final double ly = y - hy; 553 554 return productLow(hx, lx, hy, ly, xy); 555 } 556 557 /** 558 * Compute the low part of the double length number {@code (z,zz)} for the 559 * product of {@code x} and {@code y} using a Dekker's mult12 algorithm. The 560 * standard precision product {@code x*y} must be provided. The numbers 561 * {@code x} and {@code y} are split into high and low parts by zeroing the 562 * lower 27-bits of the mantissa to create the high part. This may lose 1 bit of 563 * precision in the resulting low part computed by subtraction. The intermediate 564 * computations will not overflow as the split results are always smaller in 565 * magnitude than the input numbers. 566 * 567 * <p>The method is used for benchmarking as results may not be exact due to 568 * loss of a bit during splitting of the input factors. 569 * 570 * @param x First factor. 571 * @param y Second factor. 572 * @param xy Product of the factors (x * y). 573 * @return the low part of the product double length number 574 * @see #highPart(double) 575 * @see #productLow(double, double, double, double, double) 576 */ 577 static double productLowSplit(double x, double y, double xy) { 578 // Split the numbers using Dekker's algorithm 579 final double hx = highPartSplit(x); 580 final double lx = x - hx; 581 582 final double hy = highPartSplit(y); 583 final double ly = y - hy; 584 585 return productLow(hx, lx, hy, ly, xy); 586 } 587 588 /** 589 * Compute the low part of the double length number {@code (z,zz)} for the exact 590 * product of {@code x} and {@code y} using Dekker's mult12 algorithm. The standard 591 * precision product {@code x*y} must be provided. The numbers {@code x} and {@code y} 592 * are split into high and low parts using Dekker's algorithm. 593 * 594 * <p>Warning: This method does not perform scaling in Dekker's split and large 595 * finite numbers can create NaN results. 596 * 597 * @param x First factor. 598 * @param y Second factor. 599 * @param xy Product of the factors (x * y). 600 * @return the low part of the product double length number 601 * @see #highPartUnscaled(double) 602 * @see #productLow(double, double, double, double, double) 603 */ 604 static double productLowUnscaled(double x, double y, double xy) { 605 // Split the numbers using Dekker's algorithm without scaling 606 final double hx = highPartUnscaled(x); 607 final double lx = x - hx; 608 609 final double hy = highPartUnscaled(y); 610 final double ly = y - hy; 611 612 return productLow(hx, lx, hy, ly, xy); 613 } 614 615 /** 616 * Compute the low part of the double length number {@code (z,zz)} for the exact 617 * product of {@code x} and {@code y} using Dekker's mult12 algorithm. The standard 618 * precision product {@code x*y} must be provided. The numbers {@code x} and {@code y} 619 * should already be split into low and high parts. 620 * 621 * <p>Note: This uses the high part of the result {@code (z,zz)} as {@code x * y} and not 622 * {@code hx * hy + hx * ty + tx * hy} as specified in Dekker's original paper. 623 * See Shewchuk (1997) for working examples. 624 * 625 * @param hx High part of first factor. 626 * @param lx Low part of first factor. 627 * @param hy High part of second factor. 628 * @param ly Low part of second factor. 629 * @param xy Product of the factors. 630 * @return <code>lx * ly - (((xy - hx * hy) - lx * hy) - hx * ly)</code> 631 * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps"> 632 * Shewchuk (1997) Theorum 18</a> 633 */ 634 static double productLow(double hx, double lx, double hy, double ly, double xy) { 635 // Compute the multiply low part: 636 // err1 = xy - hx * hy 637 // err2 = err1 - lx * hy 638 // err3 = err2 - hx * ly 639 // low = lx * ly - err3 640 return lx * ly - (((xy - hx * hy) - lx * hy) - hx * ly); 641 } 642 643 /** 644 * Compute the round-off {@code s} from the sum of two split numbers {@code (x, xx)} 645 * and {@code (y, yy)} using Dekker's add2 algorithm. The values are not required to be 646 * ordered by magnitude as an absolute comparison is made to determine the summation order. 647 * The sum of the high parts {@code r} must be provided. 648 * 649 * <p>The result {@code (r, s)} must be re-balanced to create the split result {@code (z, zz)}: 650 * <pre> 651 * z = r + s 652 * zz = r - z + s 653 * </pre> 654 * 655 * @param x High part of first number. 656 * @param xx Low part of first number. 657 * @param y High part of second number. 658 * @param yy Low part of second number. 659 * @param r Sum of the parts (x + y) = r 660 * @return The round-off from the sum (x + y) = s 661 */ 662 static double sumLow(double x, double xx, double y, double yy, double r) { 663 return Math.abs(x) > Math.abs(y) ? 664 x - r + y + yy + xx : 665 y - r + x + xx + yy; 666 } 667 668 /** 669 * Compute the round-off from the sum of two numbers {@code a} and {@code b} using 670 * Dekker's two-sum algorithm. The values are required to be ordered by magnitude 671 * {@code |a| >= |b|}. The standard precision sum must be provided. 672 * 673 * @param a First part of sum. 674 * @param b Second part of sum. 675 * @param sum Sum of the parts (a + b). 676 * @return <code>b - (sum - a)</code> 677 * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps"> 678 * Shewchuk (1997) Theorum 6</a> 679 */ 680 static double fastTwoSumLow(double a, double b, double sum) { 681 // bVirtual = sum - a 682 // b - bVirtual == b round-off 683 return b - (sum - a); 684 } 685 686 /** 687 * Compute the round-off from the sum of two numbers {@code a} and {@code b} using 688 * Knuth's two-sum algorithm. The values are not required to be ordered by magnitude. 689 * The standard precision sum must be provided. 690 * 691 * @param a First part of sum. 692 * @param b Second part of sum. 693 * @param sum Sum of the parts (a + b). 694 * @return <code>(b - (sum - (sum - b))) + (a - (sum - b))</code> 695 * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps"> 696 * Shewchuk (1997) Theorum 7</a> 697 */ 698 static double twoSumLow(double a, double b, double sum) { 699 final double bVirtual = sum - a; 700 // sum - bVirtual == aVirtual. 701 // a - aVirtual == a round-off 702 // b - bVirtual == b round-off 703 return (a - (sum - bVirtual)) + (b - bVirtual); 704 } 705 }