1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 // (C) Copyright John Maddock 2006. 19 // Use, modification and distribution are subject to the 20 // Boost Software License, Version 1.0. (See accompanying file 21 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 22 23 package org.apache.commons.numbers.gamma; 24 25 /** 26 * Implementation of the <a href="http://mathworld.wolfram.com/Erf.html">error function</a> and 27 * its inverse. 28 * 29 * <p>This code has been adapted from the <a href="https://www.boost.org/">Boost</a> 30 * {@code c++} implementation {@code <boost/math/special_functions/erf.hpp>}. 31 * The erf/erfc functions and their inverses are copyright John Maddock 2006 and subject to 32 * the Boost Software License. 33 * 34 * <p>Additions made to support the erfcx function are original work under the Apache software 35 * license. 36 * 37 * @see 38 * <a href="https://www.boost.org/doc/libs/1_77_0/libs/math/doc/html/math_toolkit/sf_erf/error_function.html"> 39 * Boost C++ Error functions</a> 40 */ 41 final class BoostErf { 42 /** 43 * The multiplier used to split the double value into high and low parts. From 44 * Dekker (1971): "The constant should be chosen equal to 2^(p - p/2) + 1, 45 * where p is the number of binary digits in the mantissa". Here p is 53 46 * and the multiplier is {@code 2^27 + 1}. 47 */ 48 private static final double MULTIPLIER = 1.0 + 0x1.0p27; 49 /** 1 / sqrt(pi). Used for the scaled complementary error function erfcx. */ 50 private static final double ONE_OVER_ROOT_PI = 0.5641895835477562869480794515607725858; 51 /** Threshold for the scaled complementary error function erfcx 52 * where the approximation {@code (1 / sqrt(pi)) / x} can be used. */ 53 private static final double ERFCX_APPROX = 6.71e7; 54 /** Threshold for the erf implementation for |x| where the computation 55 * uses {@code erf(x)}; otherwise {@code erfc(x)} is computed. The final result is 56 * achieved by suitable application of symmetry. */ 57 private static final double COMPUTE_ERF = 0.5; 58 /** Threshold for the scaled complementary error function erfcx for negative x 59 * where {@code 2 * exp(x*x)} will overflow. Value is 26.62873571375149. */ 60 private static final double ERFCX_NEG_X_MAX = Math.sqrt(Math.log(Double.MAX_VALUE / 2)); 61 /** Threshold for the scaled complementary error function erfcx for x 62 * where {@code exp(x*x) == 1; x <= t}. Value is (1 + 5/16) * 2^-27 = 9.778887033462524E-9. 63 * <p>Note: This is used for performance. If set to 0 then the result is computed 64 * using expm1(x*x) with the same final result. */ 65 private static final double EXP_XX_1 = 0x1.5p-27; 66 67 /** Private constructor. */ 68 private BoostErf() { 69 // intentionally empty. 70 } 71 72 // Code ported from Boost 1.77.0 73 // 74 // boost/math/special_functions/erf.hpp 75 // boost/math/special_functions/detail/erf_inv.hpp 76 // 77 // Original code comments, including measured deviations, are preserved. 78 // 79 // Changes to the Boost implementation: 80 // - Update method names to replace underscores with camel case 81 // - Explicitly inline the polynomial function evaluation 82 // using Horner's method (https://en.wikipedia.org/wiki/Horner%27s_method) 83 // - Support odd function for f(0.0) = -f(-0.0) 84 // - Support the scaled complementary error function erfcx 85 // Erf: 86 // - Change extended precision z*z to compute the square round-off 87 // using Dekker's method 88 // - Change extended precision exp(-z*z) to compute using a 89 // round-off addition to the standard exp result (see NUMBERS-177) 90 // - Change the erf threshold for z when erf(z)=1 from 91 // z > 5.8f to z > 5.930664 92 // - Change the erfc threshold for z when erfc(z)=0 from 93 // z < 28 to z < 27.3 94 // - Change rational function approximation for z > 4 to a function 95 // suitable for erfcx (see NUMBERS-177) 96 // Inverse erf: 97 // - Change inverse erf edge case detection to include NaN 98 // - Change edge case detection for integer z 99 // 100 // Note: 101 // Constants using the 'f' suffix are machine 102 // representable as a float, e.g. 103 // assert 0.0891314744949340820313f == 0.0891314744949340820313; 104 // The values are unchanged from the Boost reference. 105 106 /** 107 * Returns the complementary error function. 108 * 109 * @param x the value. 110 * @return the complementary error function. 111 */ 112 static double erfc(double x) { 113 return erfImp(x, true, false); 114 } 115 116 /** 117 * Returns the error function. 118 * 119 * @param x the value. 120 * @return the error function. 121 */ 122 static double erf(double x) { 123 return erfImp(x, false, false); 124 } 125 126 /** 127 * 53-bit implementation for the error function. 128 * 129 * <p>Note: The {@code scaled} flag only applies when 130 * {@code z >= 0.5} and {@code invert == true}. 131 * This functionality is used to compute erfcx(z) for positive z. 132 * 133 * @param z Point to evaluate 134 * @param invert true to invert the result (for the complementary error function) 135 * @param scaled true to compute the scaled complementary error function 136 * @return the error function result 137 */ 138 private static double erfImp(double z, boolean invert, boolean scaled) { 139 if (Double.isNaN(z)) { 140 return Double.NaN; 141 } 142 143 if (z < 0) { 144 // Here the scaled flag is ignored. 145 if (!invert) { 146 return -erfImp(-z, invert, false); 147 } else if (z < -0.5) { 148 return 2 - erfImp(-z, invert, false); 149 } else { 150 return 1 + erfImp(-z, false, false); 151 } 152 } 153 154 double result; 155 156 // 157 // Big bunch of selection statements now to pick 158 // which implementation to use, 159 // try to put most likely options first: 160 // 161 if (z < COMPUTE_ERF) { 162 // 163 // We're going to calculate erf: 164 // 165 // Here the scaled flag is ignored. 166 if (z < 1e-10) { 167 if (z == 0) { 168 result = z; 169 } else { 170 final double c = 0.003379167095512573896158903121545171688; 171 result = z * 1.125f + z * c; 172 } 173 } else { 174 // Maximum Deviation Found: 1.561e-17 175 // Expected Error Term: 1.561e-17 176 // Maximum Relative Change in Control Points: 1.155e-04 177 // Max Error found at double precision = 2.961182e-17 178 179 final double Y = 1.044948577880859375f; 180 final double zz = z * z; 181 double P; 182 P = -0.000322780120964605683831; 183 P = -0.00772758345802133288487 + P * zz; 184 P = -0.0509990735146777432841 + P * zz; 185 P = -0.338165134459360935041 + P * zz; 186 P = 0.0834305892146531832907 + P * zz; 187 double Q; 188 Q = 0.000370900071787748000569; 189 Q = 0.00858571925074406212772 + Q * zz; 190 Q = 0.0875222600142252549554 + Q * zz; 191 Q = 0.455004033050794024546 + Q * zz; 192 Q = 1.0 + Q * zz; 193 result = z * (Y + P / Q); 194 } 195 // Note: Boost threshold of 5.8f has been raised to approximately 5.93 (6073 / 1024); 196 // threshold of 28 has been lowered to approximately 27.3 (6989/256) where exp(-z*z) = 0. 197 } else if (scaled || (invert ? (z < 27.300781f) : (z < 5.9306640625f))) { 198 // 199 // We'll be calculating erfc: 200 // 201 // Here the scaled flag is used. 202 invert = !invert; 203 if (z < 1.5f) { 204 // Maximum Deviation Found: 3.702e-17 205 // Expected Error Term: 3.702e-17 206 // Maximum Relative Change in Control Points: 2.845e-04 207 // Max Error found at double precision = 4.841816e-17 208 final double Y = 0.405935764312744140625f; 209 final double zm = z - 0.5; 210 double P; 211 P = 0.00180424538297014223957; 212 P = 0.0195049001251218801359 + P * zm; 213 P = 0.0888900368967884466578 + P * zm; 214 P = 0.191003695796775433986 + P * zm; 215 P = 0.178114665841120341155 + P * zm; 216 P = -0.098090592216281240205 + P * zm; 217 double Q; 218 Q = 0.337511472483094676155e-5; 219 Q = 0.0113385233577001411017 + Q * zm; 220 Q = 0.12385097467900864233 + Q * zm; 221 Q = 0.578052804889902404909 + Q * zm; 222 Q = 1.42628004845511324508 + Q * zm; 223 Q = 1.84759070983002217845 + Q * zm; 224 Q = 1.0 + Q * zm; 225 result = Y + P / Q; 226 if (scaled) { 227 result /= z; 228 } else { 229 result *= expmxx(z) / z; 230 } 231 } else if (z < 2.5f) { 232 // Max Error found at double precision = 6.599585e-18 233 // Maximum Deviation Found: 3.909e-18 234 // Expected Error Term: 3.909e-18 235 // Maximum Relative Change in Control Points: 9.886e-05 236 final double Y = 0.50672817230224609375f; 237 final double zm = z - 1.5; 238 double P; 239 P = 0.000235839115596880717416; 240 P = 0.00323962406290842133584 + P * zm; 241 P = 0.0175679436311802092299 + P * zm; 242 P = 0.04394818964209516296 + P * zm; 243 P = 0.0386540375035707201728 + P * zm; 244 P = -0.0243500476207698441272 + P * zm; 245 double Q; 246 Q = 0.00410369723978904575884; 247 Q = 0.0563921837420478160373 + Q * zm; 248 Q = 0.325732924782444448493 + Q * zm; 249 Q = 0.982403709157920235114 + Q * zm; 250 Q = 1.53991494948552447182 + Q * zm; 251 Q = 1.0 + Q * zm; 252 result = Y + P / Q; 253 if (scaled) { 254 result /= z; 255 } else { 256 result *= expmxx(z) / z; 257 } 258 // Lowered Boost threshold from 4.5 to 4.0 as this is the limit 259 // for the Cody erfc approximation 260 } else if (z < 4.0f) { 261 // Maximum Deviation Found: 1.512e-17 262 // Expected Error Term: 1.512e-17 263 // Maximum Relative Change in Control Points: 2.222e-04 264 // Max Error found at double precision = 2.062515e-17 265 final double Y = 0.5405750274658203125f; 266 final double zm = z - 3.5; 267 double P; 268 P = 0.113212406648847561139e-4; 269 P = 0.000250269961544794627958 + P * zm; 270 P = 0.00212825620914618649141 + P * zm; 271 P = 0.00840807615555585383007 + P * zm; 272 P = 0.0137384425896355332126 + P * zm; 273 P = 0.00295276716530971662634 + P * zm; 274 double Q; 275 Q = 0.000479411269521714493907; 276 Q = 0.0105982906484876531489 + Q * zm; 277 Q = 0.0958492726301061423444 + Q * zm; 278 Q = 0.442597659481563127003 + Q * zm; 279 Q = 1.04217814166938418171 + Q * zm; 280 Q = 1.0 + Q * zm; 281 result = Y + P / Q; 282 if (scaled) { 283 result /= z; 284 } else { 285 result *= expmxx(z) / z; 286 } 287 } else { 288 // Rational function approximation for erfc(x > 4.0) 289 // 290 // This approximation is not the Boost implementation. 291 // The Boost function is suitable for [4.5 < z < 28]. 292 // 293 // This function is suitable for erfcx(z) as it asymptotes 294 // to (1 / sqrt(pi)) / z at large z. 295 // 296 // Taken from "Rational Chebyshev approximations for the error function" 297 // by W. J. Cody, Math. Comp., 1969, PP. 631-638. 298 // 299 // See NUMBERS-177. 300 301 final double izz = 1 / (z * z); 302 double p; 303 p = 1.63153871373020978498e-2; 304 p = 3.05326634961232344035e-1 + p * izz; 305 p = 3.60344899949804439429e-1 + p * izz; 306 p = 1.25781726111229246204e-1 + p * izz; 307 p = 1.60837851487422766278e-2 + p * izz; 308 p = 6.58749161529837803157e-4 + p * izz; 309 double q; 310 q = 1; 311 q = 2.56852019228982242072e00 + q * izz; 312 q = 1.87295284992346047209e00 + q * izz; 313 q = 5.27905102951428412248e-1 + q * izz; 314 q = 6.05183413124413191178e-2 + q * izz; 315 q = 2.33520497626869185443e-3 + q * izz; 316 317 result = izz * p / q; 318 result = (ONE_OVER_ROOT_PI - result) / z; 319 320 if (!scaled) { 321 // exp(-z*z) can be sub-normal so 322 // multiply by any sub-normal after divide by z 323 result *= expmxx(z); 324 } 325 } 326 } else { 327 // 328 // Any value of z larger than 27.3 will underflow to zero: 329 // 330 result = 0; 331 invert = !invert; 332 } 333 334 if (invert) { 335 // Note: If 0.5 <= z < 28 and the scaled flag is true then 336 // invert will have been flipped to false and the 337 // the result is unchanged as erfcx(z) 338 result = 1 - result; 339 } 340 341 return result; 342 } 343 344 /** 345 * Returns the scaled complementary error function. 346 * <pre> 347 * erfcx(x) = exp(x^2) * erfc(x) 348 * </pre> 349 * 350 * @param x the value. 351 * @return the scaled complementary error function. 352 */ 353 static double erfcx(double x) { 354 if (Double.isNaN(x)) { 355 return Double.NaN; 356 } 357 358 // For |z| < 0.5 erfc is computed using erf 359 final double ax = Math.abs(x); 360 if (ax < COMPUTE_ERF) { 361 // Use the erf(x) result. 362 // (1 - erf(x)) * exp(x*x) 363 364 final double erfx = erf(x); 365 if (ax < EXP_XX_1) { 366 // No exponential required 367 return 1 - erfx; 368 } 369 370 // exp(x*x) - exp(x*x) * erf(x) 371 // Avoid use of exp(x*x) with expm1: 372 // exp(x*x) - 1 - (erf(x) * (exp(x*x) - 1)) - erf(x) + 1 373 374 // Sum small to large: |erf(x)| > expm1(x*x) 375 // -erf(x) * expm1(x*x) + expm1(x*x) - erf(x) + 1 376 // Negative x: erf(x) < 0, summed terms are positive, no cancellation occurs. 377 // Positive x: erf(x) > 0 so cancellation can occur. 378 // When terms are ordered by absolute magnitude the magnitude of the next term 379 // is above the round-off from adding the previous term to the sum. Thus 380 // cancellation is negligible compared to errors in the largest computed term (erf(x)). 381 382 final double em1 = Math.expm1(x * x); 383 return -erfx * em1 + em1 - erfx + 1; 384 } 385 386 // Handle negative arguments 387 if (x < 0) { 388 // erfcx(x) = 2*exp(x*x) - erfcx(-x) 389 390 if (x < -ERFCX_NEG_X_MAX) { 391 // Overflow 392 return Double.POSITIVE_INFINITY; 393 } 394 395 final double e = expxx(x); 396 return e - erfImp(-x, true, true) + e; 397 } 398 399 // Approximation for large positive x 400 if (x > ERFCX_APPROX) { 401 return ONE_OVER_ROOT_PI / x; 402 } 403 404 // Compute erfc scaled 405 return erfImp(x, true, true); 406 } 407 408 /** 409 * Returns the inverse complementary error function. 410 * 411 * @param z Value (in {@code [0, 2]}). 412 * @return t such that {@code z = erfc(t)} 413 */ 414 static double erfcInv(double z) { 415 // 416 // Begin by testing for domain errors, and other special cases: 417 // 418 if (z < 0 || z > 2 || Double.isNaN(z)) { 419 // Argument outside range [0,2] in inverse erfc function 420 return Double.NaN; 421 } 422 // Domain bounds must be detected as the implementation computes NaN. 423 // (log(q=0) creates infinity and the rational number is 424 // infinity / infinity) 425 if (z == (int) z) { 426 // z return 427 // 2 -inf 428 // 1 0 429 // 0 inf 430 return z == 1 ? 0 : (1 - z) * Double.POSITIVE_INFINITY; 431 } 432 433 // 434 // Normalise the input, so it's in the range [0,1], we will 435 // negate the result if z is outside that range. This is a simple 436 // application of the erfc reflection formula: erfc(-z) = 2 - erfc(z) 437 // 438 double p; 439 double q; 440 double s; 441 if (z > 1) { 442 q = 2 - z; 443 p = 1 - q; 444 s = -1; 445 } else { 446 p = 1 - z; 447 q = z; 448 s = 1; 449 } 450 451 // 452 // And get the result, negating where required: 453 // 454 return s * erfInvImp(p, q); 455 } 456 457 /** 458 * Returns the inverse error function. 459 * 460 * @param z Value (in {@code [-1, 1]}). 461 * @return t such that {@code z = erf(t)} 462 */ 463 static double erfInv(double z) { 464 // 465 // Begin by testing for domain errors, and other special cases: 466 // 467 if (z < -1 || z > 1 || Double.isNaN(z)) { 468 // Argument outside range [-1, 1] in inverse erf function 469 return Double.NaN; 470 } 471 // Domain bounds must be detected as the implementation computes NaN. 472 // (log(q=0) creates infinity and the rational number is 473 // infinity / infinity) 474 if (z == (int) z) { 475 // z return 476 // -1 -inf 477 // -0 -0 478 // 0 0 479 // 1 inf 480 return z == 0 ? z : z * Double.POSITIVE_INFINITY; 481 } 482 483 // 484 // Normalise the input, so it's in the range [0,1], we will 485 // negate the result if z is outside that range. This is a simple 486 // application of the erf reflection formula: erf(-z) = -erf(z) 487 // 488 double p; 489 double q; 490 double s; 491 if (z < 0) { 492 p = -z; 493 q = 1 - p; 494 s = -1; 495 } else { 496 p = z; 497 q = 1 - z; 498 s = 1; 499 } 500 // 501 // And get the result, negating where required: 502 // 503 return s * erfInvImp(p, q); 504 } 505 506 /** 507 * Common implementation for inverse erf and erfc functions. 508 * 509 * @param p P-value 510 * @param q Q-value (1-p) 511 * @return the inverse 512 */ 513 private static double erfInvImp(double p, double q) { 514 double result = 0; 515 516 if (p <= 0.5) { 517 // 518 // Evaluate inverse erf using the rational approximation: 519 // 520 // x = p(p+10)(Y+R(p)) 521 // 522 // Where Y is a constant, and R(p) is optimised for a low 523 // absolute error compared to |Y|. 524 // 525 // double: Max error found: 2.001849e-18 526 // long double: Max error found: 1.017064e-20 527 // Maximum Deviation Found (actual error term at infinite precision) 8.030e-21 528 // 529 final float Y = 0.0891314744949340820313f; 530 double P; 531 P = -0.00538772965071242932965; 532 P = 0.00822687874676915743155 + P * p; 533 P = 0.0219878681111168899165 + P * p; 534 P = -0.0365637971411762664006 + P * p; 535 P = -0.0126926147662974029034 + P * p; 536 P = 0.0334806625409744615033 + P * p; 537 P = -0.00836874819741736770379 + P * p; 538 P = -0.000508781949658280665617 + P * p; 539 double Q; 540 Q = 0.000886216390456424707504; 541 Q = -0.00233393759374190016776 + Q * p; 542 Q = 0.0795283687341571680018 + Q * p; 543 Q = -0.0527396382340099713954 + Q * p; 544 Q = -0.71228902341542847553 + Q * p; 545 Q = 0.662328840472002992063 + Q * p; 546 Q = 1.56221558398423026363 + Q * p; 547 Q = -1.56574558234175846809 + Q * p; 548 Q = -0.970005043303290640362 + Q * p; 549 Q = 1.0 + Q * p; 550 final double g = p * (p + 10); 551 final double r = P / Q; 552 result = g * Y + g * r; 553 } else if (q >= 0.25) { 554 // 555 // Rational approximation for 0.5 > q >= 0.25 556 // 557 // x = sqrt(-2*log(q)) / (Y + R(q)) 558 // 559 // Where Y is a constant, and R(q) is optimised for a low 560 // absolute error compared to Y. 561 // 562 // double : Max error found: 7.403372e-17 563 // long double : Max error found: 6.084616e-20 564 // Maximum Deviation Found (error term) 4.811e-20 565 // 566 final float Y = 2.249481201171875f; 567 final double xs = q - 0.25f; 568 double P; 569 P = -3.67192254707729348546; 570 P = 21.1294655448340526258 + P * xs; 571 P = 17.445385985570866523 + P * xs; 572 P = -44.6382324441786960818 + P * xs; 573 P = -18.8510648058714251895 + P * xs; 574 P = 17.6447298408374015486 + P * xs; 575 P = 8.37050328343119927838 + P * xs; 576 P = 0.105264680699391713268 + P * xs; 577 P = -0.202433508355938759655 + P * xs; 578 double Q; 579 Q = 1.72114765761200282724; 580 Q = -22.6436933413139721736 + Q * xs; 581 Q = 10.8268667355460159008 + Q * xs; 582 Q = 48.5609213108739935468 + Q * xs; 583 Q = -20.1432634680485188801 + Q * xs; 584 Q = -28.6608180499800029974 + Q * xs; 585 Q = 3.9713437953343869095 + Q * xs; 586 Q = 6.24264124854247537712 + Q * xs; 587 Q = 1.0 + Q * xs; 588 final double g = Math.sqrt(-2 * Math.log(q)); 589 final double r = P / Q; 590 result = g / (Y + r); 591 } else { 592 // 593 // For q < 0.25 we have a series of rational approximations all 594 // of the general form: 595 // 596 // let: x = sqrt(-log(q)) 597 // 598 // Then the result is given by: 599 // 600 // x(Y+R(x-B)) 601 // 602 // where Y is a constant, B is the lowest value of x for which 603 // the approximation is valid, and R(x-B) is optimised for a low 604 // absolute error compared to Y. 605 // 606 // Note that almost all code will really go through the first 607 // or maybe second approximation. After than we're dealing with very 608 // small input values indeed. 609 // 610 // Limit for a double: Math.sqrt(-Math.log(Double.MIN_VALUE)) = 27.28... 611 // Branches for x >= 44 (supporting 80 and 128 bit long double) have been removed. 612 final double x = Math.sqrt(-Math.log(q)); 613 if (x < 3) { 614 // Max error found: 1.089051e-20 615 final float Y = 0.807220458984375f; 616 final double xs = x - 1.125f; 617 double P; 618 P = -0.681149956853776992068e-9; 619 P = 0.285225331782217055858e-7 + P * xs; 620 P = -0.679465575181126350155e-6 + P * xs; 621 P = 0.00214558995388805277169 + P * xs; 622 P = 0.0290157910005329060432 + P * xs; 623 P = 0.142869534408157156766 + P * xs; 624 P = 0.337785538912035898924 + P * xs; 625 P = 0.387079738972604337464 + P * xs; 626 P = 0.117030156341995252019 + P * xs; 627 P = -0.163794047193317060787 + P * xs; 628 P = -0.131102781679951906451 + P * xs; 629 double Q; 630 Q = 0.01105924229346489121; 631 Q = 0.152264338295331783612 + Q * xs; 632 Q = 0.848854343457902036425 + Q * xs; 633 Q = 2.59301921623620271374 + Q * xs; 634 Q = 4.77846592945843778382 + Q * xs; 635 Q = 5.38168345707006855425 + Q * xs; 636 Q = 3.46625407242567245975 + Q * xs; 637 Q = 1.0 + Q * xs; 638 final double R = P / Q; 639 result = Y * x + R * x; 640 } else if (x < 6) { 641 // Max error found: 8.389174e-21 642 final float Y = 0.93995571136474609375f; 643 final double xs = x - 3; 644 double P; 645 P = 0.266339227425782031962e-11; 646 P = -0.230404776911882601748e-9 + P * xs; 647 P = 0.460469890584317994083e-5 + P * xs; 648 P = 0.000157544617424960554631 + P * xs; 649 P = 0.00187123492819559223345 + P * xs; 650 P = 0.00950804701325919603619 + P * xs; 651 P = 0.0185573306514231072324 + P * xs; 652 P = -0.00222426529213447927281 + P * xs; 653 P = -0.0350353787183177984712 + P * xs; 654 double Q; 655 Q = 0.764675292302794483503e-4; 656 Q = 0.00263861676657015992959 + Q * xs; 657 Q = 0.0341589143670947727934 + Q * xs; 658 Q = 0.220091105764131249824 + Q * xs; 659 Q = 0.762059164553623404043 + Q * xs; 660 Q = 1.3653349817554063097 + Q * xs; 661 Q = 1.0 + Q * xs; 662 final double R = P / Q; 663 result = Y * x + R * x; 664 } else if (x < 18) { 665 // Max error found: 1.481312e-19 666 final float Y = 0.98362827301025390625f; 667 final double xs = x - 6; 668 double P; 669 P = 0.99055709973310326855e-16; 670 P = -0.281128735628831791805e-13 + P * xs; 671 P = 0.462596163522878599135e-8 + P * xs; 672 P = 0.449696789927706453732e-6 + P * xs; 673 P = 0.149624783758342370182e-4 + P * xs; 674 P = 0.000209386317487588078668 + P * xs; 675 P = 0.00105628862152492910091 + P * xs; 676 P = -0.00112951438745580278863 + P * xs; 677 P = -0.0167431005076633737133 + P * xs; 678 double Q; 679 Q = 0.282243172016108031869e-6; 680 Q = 0.275335474764726041141e-4 + Q * xs; 681 Q = 0.000964011807005165528527 + Q * xs; 682 Q = 0.0160746087093676504695 + Q * xs; 683 Q = 0.138151865749083321638 + Q * xs; 684 Q = 0.591429344886417493481 + Q * xs; 685 Q = 1.0 + Q * xs; 686 final double R = P / Q; 687 result = Y * x + R * x; 688 } else { 689 // x < 44 690 // Max error found: 5.697761e-20 691 final float Y = 0.99714565277099609375f; 692 final double xs = x - 18; 693 double P; 694 P = -0.116765012397184275695e-17; 695 P = 0.145596286718675035587e-11 + P * xs; 696 P = 0.411632831190944208473e-9 + P * xs; 697 P = 0.396341011304801168516e-7 + P * xs; 698 P = 0.162397777342510920873e-5 + P * xs; 699 P = 0.254723037413027451751e-4 + P * xs; 700 P = -0.779190719229053954292e-5 + P * xs; 701 P = -0.0024978212791898131227 + P * xs; 702 double Q; 703 Q = 0.509761276599778486139e-9; 704 Q = 0.144437756628144157666e-6 + Q * xs; 705 Q = 0.145007359818232637924e-4 + Q * xs; 706 Q = 0.000690538265622684595676 + Q * xs; 707 Q = 0.0169410838120975906478 + Q * xs; 708 Q = 0.207123112214422517181 + Q * xs; 709 Q = 1.0 + Q * xs; 710 final double R = P / Q; 711 result = Y * x + R * x; 712 } 713 } 714 return result; 715 } 716 717 /** 718 * Compute {@code exp(x*x)} with high accuracy. This is performed using 719 * information in the round-off from {@code x*x}. 720 * 721 * <p>This is accurate at large x to 1 ulp. 722 * 723 * <p>At small x the accuracy cannot be improved over using exp(x*x). 724 * This occurs at {@code x <= 1}. 725 * 726 * <p>Warning: This has no checks for overflow. The method is never called 727 * when {@code x*x > log(MAX_VALUE/2)}. 728 * 729 * @param x Value 730 * @return exp(x*x) 731 */ 732 static double expxx(double x) { 733 // Note: If exp(a) overflows this can create NaN if the 734 // round-off b is negative or zero: 735 // exp(a) * exp1m(b) + exp(a) 736 // inf * 0 + inf or inf * -b + inf 737 final double a = x * x; 738 final double b = squareLowUnscaled(x, a); 739 return expxx(a, b); 740 } 741 742 /** 743 * Compute {@code exp(-x*x)} with high accuracy. This is performed using 744 * information in the round-off from {@code x*x}. 745 * 746 * <p>This is accurate at large x to 1 ulp until exp(-x*x) is close to 747 * sub-normal. For very small exp(-x*x) the adjustment is sub-normal and 748 * bits can be lost in the adjustment for a max observed error of {@code < 2} ulp. 749 * 750 * <p>At small x the accuracy cannot be improved over using exp(-x*x). 751 * This occurs at {@code x <= 1}. 752 * 753 * @param x Value 754 * @return exp(-x*x) 755 */ 756 static double expmxx(double x) { 757 final double a = x * x; 758 final double b = squareLowUnscaled(x, a); 759 return expxx(-a, -b); 760 } 761 762 /** 763 * Compute {@code exp(a+b)} with high accuracy assuming {@code a+b = a}. 764 * 765 * <p>This is accurate at large positive a to 1 ulp. If a is negative and exp(a) is 766 * close to sub-normal a bit of precision may be lost when adjusting result 767 * as the adjustment is sub-normal (max observed error {@code < 2} ulp). 768 * For the use case of multiplication of a number less than 1 by exp(-x*x), a = -x*x, 769 * the result will be sub-normal and the rounding error is lost. 770 * 771 * <p>At small |a| the accuracy cannot be improved over using exp(a) as the 772 * round-off is too small to create terms that can adjust the standard result by 773 * more than 0.5 ulp. This occurs at {@code |a| <= 1}. 774 * 775 * @param a High bits of a split number 776 * @param b Low bits of a split number 777 * @return exp(a+b) 778 */ 779 private static double expxx(double a, double b) { 780 // exp(a+b) = exp(a) * exp(b) 781 // = exp(a) * (exp(b) - 1) + exp(a) 782 // Assuming: 783 // 1. -746 < a < 710 for no under/overflow of exp(a) 784 // 2. a+b = a 785 // As b -> 0 then exp(b) -> 1; expm1(b) -> b 786 // The round-off b is limited to ~ 0.5 * ulp(746) ~ 5.68e-14 787 // and we can use an approximation for expm1 (x/1! + x^2/2! + ...) 788 // The second term is required for the expm1 result but the 789 // bits are not significant to change the product with exp(a) 790 791 final double ea = Math.exp(a); 792 // b ~ expm1(b) 793 return ea * b + ea; 794 } 795 796 // Extended precision multiplication specialised for the square adapted from: 797 // org.apache.commons.numbers.core.ExtendedPrecision 798 799 /** 800 * Compute the low part of the double length number {@code (z,zz)} for the exact 801 * square of {@code x} using Dekker's mult12 algorithm. The standard precision product 802 * {@code x*x} must be provided. The number {@code x} is split into high and low parts 803 * using Dekker's algorithm. 804 * 805 * <p>Warning: This method does not perform scaling in Dekker's split and large 806 * finite numbers can create NaN results. 807 * 808 * @param x Number to square 809 * @param xx Standard precision product {@code x*x} 810 * @return the low part of the square double length number 811 */ 812 private static double squareLowUnscaled(double x, double xx) { 813 // Split the numbers using Dekker's algorithm without scaling 814 final double hx = highPartUnscaled(x); 815 final double lx = x - hx; 816 817 return squareLow(hx, lx, xx); 818 } 819 820 /** 821 * Implement Dekker's method to split a value into two parts. Multiplying by (2^s + 1) creates 822 * a big value from which to derive the two split parts. 823 * <pre> 824 * c = (2^s + 1) * a 825 * a_big = c - a 826 * a_hi = c - a_big 827 * a_lo = a - a_hi 828 * a = a_hi + a_lo 829 * </pre> 830 * 831 * <p>The multiplicand allows a p-bit value to be split into 832 * (p-s)-bit value {@code a_hi} and a non-overlapping (s-1)-bit value {@code a_lo}. 833 * Combined they have (p-1) bits of significand but the sign bit of {@code a_lo} 834 * contains a bit of information. The constant is chosen so that s is ceil(p/2) where 835 * the precision p for a double is 53-bits (1-bit of the mantissa is assumed to be 836 * 1 for a non sub-normal number) and s is 27. 837 * 838 * <p>This conversion does not use scaling and the result of overflow is NaN. Overflow 839 * may occur when the exponent of the input value is above 996. 840 * 841 * <p>Splitting a NaN or infinite value will return NaN. 842 * 843 * @param value Value. 844 * @return the high part of the value. 845 * @see Math#getExponent(double) 846 */ 847 private static double highPartUnscaled(double value) { 848 final double c = MULTIPLIER * value; 849 return c - (c - value); 850 } 851 852 /** 853 * Compute the low part of the double length number {@code (z,zz)} for the exact 854 * square of {@code x} using Dekker's mult12 algorithm. The standard 855 * precision product {@code x*x} must be provided. The number {@code x} 856 * should already be split into low and high parts. 857 * 858 * <p>Note: This uses the high part of the result {@code (z,zz)} as {@code x * x} and not 859 * {@code hx * hx + hx * lx + lx * hx} as specified in Dekker's original paper. 860 * See Shewchuk (1997) for working examples. 861 * 862 * @param hx High part of factor. 863 * @param lx Low part of factor. 864 * @param xx Square of the factor. 865 * @return <code>lx * ly - (((xy - hx * hy) - lx * hy) - hx * ly)</code> 866 * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps"> 867 * Shewchuk (1997) Theorum 18</a> 868 */ 869 private static double squareLow(double hx, double lx, double xx) { 870 // Compute the multiply low part: 871 // err1 = xy - hx * hy 872 // err2 = err1 - lx * hy 873 // err3 = err2 - hx * ly 874 // low = lx * ly - err3 875 return lx * lx - ((xx - hx * hx) - 2 * lx * hx); 876 } 877 } 878