View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  //  (C) Copyright John Maddock 2006.
19  //  Use, modification and distribution are subject to the
20  //  Boost Software License, Version 1.0. (See accompanying file
21  //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
22  
23  package org.apache.commons.numbers.gamma;
24  
25  /**
26   * Implementation of the <a href="http://mathworld.wolfram.com/Erf.html">error function</a> and
27   * its inverse.
28   *
29   * <p>This code has been adapted from the <a href="https://www.boost.org/">Boost</a>
30   * {@code c++} implementation {@code <boost/math/special_functions/erf.hpp>}.
31   * The erf/erfc functions and their inverses are copyright John Maddock 2006 and subject to
32   * the Boost Software License.
33   *
34   * <p>Additions made to support the erfcx function are original work under the Apache software
35   * license.
36   *
37   * @see
38   * <a href="https://www.boost.org/doc/libs/1_77_0/libs/math/doc/html/math_toolkit/sf_erf/error_function.html">
39   * Boost C++ Error functions</a>
40   */
41  final class BoostErf {
42      /**
43       * The multiplier used to split the double value into high and low parts. From
44       * Dekker (1971): "The constant should be chosen equal to 2^(p - p/2) + 1,
45       * where p is the number of binary digits in the mantissa". Here p is 53
46       * and the multiplier is {@code 2^27 + 1}.
47       */
48      private static final double MULTIPLIER = 1.0 + 0x1.0p27;
49      /** 1 / sqrt(pi). Used for the scaled complementary error function erfcx. */
50      private static final double ONE_OVER_ROOT_PI = 0.5641895835477562869480794515607725858;
51      /** Threshold for the scaled complementary error function erfcx
52       * where the approximation {@code (1 / sqrt(pi)) / x} can be used. */
53      private static final double ERFCX_APPROX = 6.71e7;
54      /** Threshold for the erf implementation for |x| where the computation
55       * uses {@code erf(x)}; otherwise {@code erfc(x)} is computed. The final result is
56       * achieved by suitable application of symmetry. */
57      private static final double COMPUTE_ERF = 0.5;
58      /** Threshold for the scaled complementary error function erfcx for negative x
59       * where {@code 2 * exp(x*x)} will overflow. Value is 26.62873571375149. */
60      private static final double ERFCX_NEG_X_MAX = Math.sqrt(Math.log(Double.MAX_VALUE / 2));
61      /** Threshold for the scaled complementary error function erfcx for x
62       * where {@code exp(x*x) == 1; x <= t}. Value is (1 + 5/16) * 2^-27 = 9.778887033462524E-9.
63       * <p>Note: This is used for performance. If set to 0 then the result is computed
64       * using expm1(x*x) with the same final result. */
65      private static final double EXP_XX_1 = 0x1.5p-27;
66  
67      /** Private constructor. */
68      private BoostErf() {
69          // intentionally empty.
70      }
71  
72      // Code ported from Boost 1.77.0
73      //
74      // boost/math/special_functions/erf.hpp
75      // boost/math/special_functions/detail/erf_inv.hpp
76      //
77      // Original code comments, including measured deviations, are preserved.
78      //
79      // Changes to the Boost implementation:
80      // - Update method names to replace underscores with camel case
81      // - Explicitly inline the polynomial function evaluation
82      //   using Horner's method (https://en.wikipedia.org/wiki/Horner%27s_method)
83      // - Support odd function for f(0.0) = -f(-0.0)
84      // - Support the scaled complementary error function erfcx
85      // Erf:
86      // - Change extended precision z*z to compute the square round-off
87      //   using Dekker's method
88      // - Change extended precision exp(-z*z) to compute using a
89      //   round-off addition to the standard exp result (see NUMBERS-177)
90      // - Change the erf threshold for z when erf(z)=1 from
91      //   z > 5.8f to z > 5.930664
92      // - Change the erfc threshold for z when erfc(z)=0 from
93      //   z < 28 to z < 27.3
94      // - Change rational function approximation for z > 4 to a function
95      //   suitable for erfcx (see NUMBERS-177)
96      // Inverse erf:
97      // - Change inverse erf edge case detection to include NaN
98      // - Change edge case detection for integer z
99      //
100     // Note:
101     // Constants using the 'f' suffix are machine
102     // representable as a float, e.g.
103     // assert 0.0891314744949340820313f == 0.0891314744949340820313;
104     // The values are unchanged from the Boost reference.
105 
106     /**
107      * Returns the complementary error function.
108      *
109      * @param x the value.
110      * @return the complementary error function.
111      */
112     static double erfc(double x) {
113         return erfImp(x, true, false);
114     }
115 
116     /**
117      * Returns the error function.
118      *
119      * @param x the value.
120      * @return the error function.
121      */
122     static double erf(double x) {
123         return erfImp(x, false, false);
124     }
125 
126     /**
127      * 53-bit implementation for the error function.
128      *
129      * <p>Note: The {@code scaled} flag only applies when
130      * {@code z >= 0.5} and {@code invert == true}.
131      * This functionality is used to compute erfcx(z) for positive z.
132      *
133      * @param z Point to evaluate
134      * @param invert true to invert the result (for the complementary error function)
135      * @param scaled true to compute the scaled complementary error function
136      * @return the error function result
137      */
138     private static double erfImp(double z, boolean invert, boolean scaled) {
139         if (Double.isNaN(z)) {
140             return Double.NaN;
141         }
142 
143         if (z < 0) {
144             // Here the scaled flag is ignored.
145             if (!invert) {
146                 return -erfImp(-z, invert, false);
147             } else if (z < -0.5) {
148                 return 2 - erfImp(-z, invert, false);
149             } else {
150                 return 1 + erfImp(-z, false, false);
151             }
152         }
153 
154         double result;
155 
156         //
157         // Big bunch of selection statements now to pick
158         // which implementation to use,
159         // try to put most likely options first:
160         //
161         if (z < COMPUTE_ERF) {
162             //
163             // We're going to calculate erf:
164             //
165             // Here the scaled flag is ignored.
166             if (z < 1e-10) {
167                 if (z == 0) {
168                     result = z;
169                 } else {
170                     final double c = 0.003379167095512573896158903121545171688;
171                     result = z * 1.125f + z * c;
172                 }
173             } else {
174                 // Maximum Deviation Found:                      1.561e-17
175                 // Expected Error Term:                          1.561e-17
176                 // Maximum Relative Change in Control Points:    1.155e-04
177                 // Max Error found at double precision =         2.961182e-17
178 
179                 final double Y = 1.044948577880859375f;
180                 final double zz = z * z;
181                 double P;
182                 P = -0.000322780120964605683831;
183                 P =  -0.00772758345802133288487 + P * zz;
184                 P =   -0.0509990735146777432841 + P * zz;
185                 P =    -0.338165134459360935041 + P * zz;
186                 P =    0.0834305892146531832907 + P * zz;
187                 double Q;
188                 Q = 0.000370900071787748000569;
189                 Q =  0.00858571925074406212772 + Q * zz;
190                 Q =   0.0875222600142252549554 + Q * zz;
191                 Q =    0.455004033050794024546 + Q * zz;
192                 Q =                        1.0 + Q * zz;
193                 result = z * (Y + P / Q);
194             }
195         // Note: Boost threshold of 5.8f has been raised to approximately 5.93 (6073 / 1024);
196         // threshold of 28 has been lowered to approximately 27.3 (6989/256) where exp(-z*z) = 0.
197         } else if (scaled || (invert ? (z < 27.300781f) : (z < 5.9306640625f))) {
198             //
199             // We'll be calculating erfc:
200             //
201             // Here the scaled flag is used.
202             invert = !invert;
203             if (z < 1.5f) {
204                 // Maximum Deviation Found:                     3.702e-17
205                 // Expected Error Term:                         3.702e-17
206                 // Maximum Relative Change in Control Points:   2.845e-04
207                 // Max Error found at double precision =        4.841816e-17
208                 final double Y = 0.405935764312744140625f;
209                 final double zm = z - 0.5;
210                 double P;
211                 P = 0.00180424538297014223957;
212                 P =  0.0195049001251218801359 + P * zm;
213                 P =  0.0888900368967884466578 + P * zm;
214                 P =   0.191003695796775433986 + P * zm;
215                 P =   0.178114665841120341155 + P * zm;
216                 P =  -0.098090592216281240205 + P * zm;
217                 double Q;
218                 Q = 0.337511472483094676155e-5;
219                 Q =   0.0113385233577001411017 + Q * zm;
220                 Q =     0.12385097467900864233 + Q * zm;
221                 Q =    0.578052804889902404909 + Q * zm;
222                 Q =     1.42628004845511324508 + Q * zm;
223                 Q =     1.84759070983002217845 + Q * zm;
224                 Q =                        1.0 + Q * zm;
225                 result = Y + P / Q;
226                 if (scaled) {
227                     result /= z;
228                 } else {
229                     result *= expmxx(z) / z;
230                 }
231             } else if (z < 2.5f) {
232                 // Max Error found at double precision =        6.599585e-18
233                 // Maximum Deviation Found:                     3.909e-18
234                 // Expected Error Term:                         3.909e-18
235                 // Maximum Relative Change in Control Points:   9.886e-05
236                 final double Y = 0.50672817230224609375f;
237                 final double zm = z - 1.5;
238                 double P;
239                 P = 0.000235839115596880717416;
240                 P =  0.00323962406290842133584 + P * zm;
241                 P =   0.0175679436311802092299 + P * zm;
242                 P =     0.04394818964209516296 + P * zm;
243                 P =   0.0386540375035707201728 + P * zm;
244                 P =  -0.0243500476207698441272 + P * zm;
245                 double Q;
246                 Q = 0.00410369723978904575884;
247                 Q =  0.0563921837420478160373 + Q * zm;
248                 Q =   0.325732924782444448493 + Q * zm;
249                 Q =   0.982403709157920235114 + Q * zm;
250                 Q =    1.53991494948552447182 + Q * zm;
251                 Q =                       1.0 + Q * zm;
252                 result = Y + P / Q;
253                 if (scaled) {
254                     result /= z;
255                 } else {
256                     result *= expmxx(z) / z;
257                 }
258             // Lowered Boost threshold from 4.5 to 4.0 as this is the limit
259             // for the Cody erfc approximation
260             } else if (z < 4.0f) {
261                 // Maximum Deviation Found:                     1.512e-17
262                 // Expected Error Term:                         1.512e-17
263                 // Maximum Relative Change in Control Points:   2.222e-04
264                 // Max Error found at double precision =        2.062515e-17
265                 final double Y = 0.5405750274658203125f;
266                 final double zm = z - 3.5;
267                 double P;
268                 P = 0.113212406648847561139e-4;
269                 P = 0.000250269961544794627958 + P * zm;
270                 P =  0.00212825620914618649141 + P * zm;
271                 P =  0.00840807615555585383007 + P * zm;
272                 P =   0.0137384425896355332126 + P * zm;
273                 P =  0.00295276716530971662634 + P * zm;
274                 double Q;
275                 Q = 0.000479411269521714493907;
276                 Q =   0.0105982906484876531489 + Q * zm;
277                 Q =   0.0958492726301061423444 + Q * zm;
278                 Q =    0.442597659481563127003 + Q * zm;
279                 Q =     1.04217814166938418171 + Q * zm;
280                 Q =                        1.0 + Q * zm;
281                 result = Y + P / Q;
282                 if (scaled) {
283                     result /= z;
284                 } else {
285                     result *= expmxx(z) / z;
286                 }
287             } else {
288                 // Rational function approximation for erfc(x > 4.0)
289                 //
290                 // This approximation is not the Boost implementation.
291                 // The Boost function is suitable for [4.5 < z < 28].
292                 //
293                 // This function is suitable for erfcx(z) as it asymptotes
294                 // to (1 / sqrt(pi)) / z at large z.
295                 //
296                 // Taken from "Rational Chebyshev approximations for the error function"
297                 // by W. J. Cody, Math. Comp., 1969, PP. 631-638.
298                 //
299                 // See NUMBERS-177.
300 
301                 final double izz = 1 / (z * z);
302                 double p;
303                 p = 1.63153871373020978498e-2;
304                 p = 3.05326634961232344035e-1 + p * izz;
305                 p = 3.60344899949804439429e-1 + p * izz;
306                 p = 1.25781726111229246204e-1 + p * izz;
307                 p = 1.60837851487422766278e-2 + p * izz;
308                 p = 6.58749161529837803157e-4 + p * izz;
309                 double q;
310                 q = 1;
311                 q = 2.56852019228982242072e00 + q * izz;
312                 q = 1.87295284992346047209e00 + q * izz;
313                 q = 5.27905102951428412248e-1 + q * izz;
314                 q = 6.05183413124413191178e-2 + q * izz;
315                 q = 2.33520497626869185443e-3 + q * izz;
316 
317                 result = izz * p / q;
318                 result = (ONE_OVER_ROOT_PI - result) / z;
319 
320                 if (!scaled) {
321                     // exp(-z*z) can be sub-normal so
322                     // multiply by any sub-normal after divide by z
323                     result *= expmxx(z);
324                 }
325             }
326         } else {
327             //
328             // Any value of z larger than 27.3 will underflow to zero:
329             //
330             result = 0;
331             invert = !invert;
332         }
333 
334         if (invert) {
335             // Note: If 0.5 <= z < 28 and the scaled flag is true then
336             // invert will have been flipped to false and the
337             // the result is unchanged as erfcx(z)
338             result = 1 - result;
339         }
340 
341         return result;
342     }
343 
344     /**
345      * Returns the scaled complementary error function.
346      * <pre>
347      * erfcx(x) = exp(x^2) * erfc(x)
348      * </pre>
349      *
350      * @param x the value.
351      * @return the scaled complementary error function.
352      */
353     static double erfcx(double x) {
354         if (Double.isNaN(x)) {
355             return Double.NaN;
356         }
357 
358         // For |z| < 0.5 erfc is computed using erf
359         final double ax = Math.abs(x);
360         if (ax < COMPUTE_ERF) {
361             // Use the erf(x) result.
362             // (1 - erf(x)) * exp(x*x)
363 
364             final double erfx = erf(x);
365             if (ax < EXP_XX_1) {
366                 // No exponential required
367                 return 1 - erfx;
368             }
369 
370             // exp(x*x) - exp(x*x) * erf(x)
371             // Avoid use of exp(x*x) with expm1:
372             // exp(x*x) - 1 - (erf(x) * (exp(x*x) - 1)) - erf(x) + 1
373 
374             // Sum small to large: |erf(x)| > expm1(x*x)
375             // -erf(x) * expm1(x*x) + expm1(x*x) - erf(x) + 1
376             // Negative x: erf(x) < 0, summed terms are positive, no cancellation occurs.
377             // Positive x: erf(x) > 0 so cancellation can occur.
378             // When terms are ordered by absolute magnitude the magnitude of the next term
379             // is above the round-off from adding the previous term to the sum. Thus
380             // cancellation is negligible compared to errors in the largest computed term (erf(x)).
381 
382             final double em1 = Math.expm1(x * x);
383             return -erfx * em1 + em1 - erfx + 1;
384         }
385 
386         // Handle negative arguments
387         if (x < 0) {
388             // erfcx(x) = 2*exp(x*x) - erfcx(-x)
389 
390             if (x < -ERFCX_NEG_X_MAX) {
391                 // Overflow
392                 return Double.POSITIVE_INFINITY;
393             }
394 
395             final double e = expxx(x);
396             return e - erfImp(-x, true, true) + e;
397         }
398 
399         // Approximation for large positive x
400         if (x > ERFCX_APPROX) {
401             return ONE_OVER_ROOT_PI / x;
402         }
403 
404         // Compute erfc scaled
405         return erfImp(x, true, true);
406     }
407 
408     /**
409      * Returns the inverse complementary error function.
410      *
411      * @param z Value (in {@code [0, 2]}).
412      * @return t such that {@code z = erfc(t)}
413      */
414     static double erfcInv(double z) {
415         //
416         // Begin by testing for domain errors, and other special cases:
417         //
418         if (z < 0 || z > 2 || Double.isNaN(z)) {
419             // Argument outside range [0,2] in inverse erfc function
420             return Double.NaN;
421         }
422         // Domain bounds must be detected as the implementation computes NaN.
423         // (log(q=0) creates infinity and the rational number is
424         // infinity / infinity)
425         if (z == (int) z) {
426             // z   return
427             // 2   -inf
428             // 1   0
429             // 0   inf
430             return z == 1 ? 0 : (1 - z) * Double.POSITIVE_INFINITY;
431         }
432 
433         //
434         // Normalise the input, so it's in the range [0,1], we will
435         // negate the result if z is outside that range. This is a simple
436         // application of the erfc reflection formula: erfc(-z) = 2 - erfc(z)
437         //
438         double p;
439         double q;
440         double s;
441         if (z > 1) {
442             q = 2 - z;
443             p = 1 - q;
444             s = -1;
445         } else {
446             p = 1 - z;
447             q = z;
448             s = 1;
449         }
450 
451         //
452         // And get the result, negating where required:
453         //
454         return s * erfInvImp(p, q);
455     }
456 
457     /**
458      * Returns the inverse error function.
459      *
460      * @param z Value (in {@code [-1, 1]}).
461      * @return t such that {@code z = erf(t)}
462      */
463     static double erfInv(double z) {
464         //
465         // Begin by testing for domain errors, and other special cases:
466         //
467         if (z < -1 || z > 1 || Double.isNaN(z)) {
468             // Argument outside range [-1, 1] in inverse erf function
469             return Double.NaN;
470         }
471         // Domain bounds must be detected as the implementation computes NaN.
472         // (log(q=0) creates infinity and the rational number is
473         // infinity / infinity)
474         if (z == (int) z) {
475             // z   return
476             // -1  -inf
477             // -0  -0
478             // 0   0
479             // 1   inf
480             return z == 0 ? z : z * Double.POSITIVE_INFINITY;
481         }
482 
483         //
484         // Normalise the input, so it's in the range [0,1], we will
485         // negate the result if z is outside that range. This is a simple
486         // application of the erf reflection formula: erf(-z) = -erf(z)
487         //
488         double p;
489         double q;
490         double s;
491         if (z < 0) {
492             p = -z;
493             q = 1 - p;
494             s = -1;
495         } else {
496             p = z;
497             q = 1 - z;
498             s = 1;
499         }
500         //
501         // And get the result, negating where required:
502         //
503         return s * erfInvImp(p, q);
504     }
505 
506     /**
507      * Common implementation for inverse erf and erfc functions.
508      *
509      * @param p P-value
510      * @param q Q-value (1-p)
511      * @return the inverse
512      */
513     private static double erfInvImp(double p, double q) {
514         double result = 0;
515 
516         if (p <= 0.5) {
517             //
518             // Evaluate inverse erf using the rational approximation:
519             //
520             // x = p(p+10)(Y+R(p))
521             //
522             // Where Y is a constant, and R(p) is optimised for a low
523             // absolute error compared to |Y|.
524             //
525             // double: Max error found: 2.001849e-18
526             // long double: Max error found: 1.017064e-20
527             // Maximum Deviation Found (actual error term at infinite precision) 8.030e-21
528             //
529             final float Y = 0.0891314744949340820313f;
530             double P;
531             P =  -0.00538772965071242932965;
532             P =   0.00822687874676915743155 + P * p;
533             P =    0.0219878681111168899165 + P * p;
534             P =   -0.0365637971411762664006 + P * p;
535             P =   -0.0126926147662974029034 + P * p;
536             P =    0.0334806625409744615033 + P * p;
537             P =  -0.00836874819741736770379 + P * p;
538             P = -0.000508781949658280665617 + P * p;
539             double Q;
540             Q = 0.000886216390456424707504;
541             Q = -0.00233393759374190016776 + Q * p;
542             Q =   0.0795283687341571680018 + Q * p;
543             Q =  -0.0527396382340099713954 + Q * p;
544             Q =    -0.71228902341542847553 + Q * p;
545             Q =    0.662328840472002992063 + Q * p;
546             Q =     1.56221558398423026363 + Q * p;
547             Q =    -1.56574558234175846809 + Q * p;
548             Q =   -0.970005043303290640362 + Q * p;
549             Q =                        1.0 + Q * p;
550             final double g = p * (p + 10);
551             final double r = P / Q;
552             result = g * Y + g * r;
553         } else if (q >= 0.25) {
554             //
555             // Rational approximation for 0.5 > q >= 0.25
556             //
557             // x = sqrt(-2*log(q)) / (Y + R(q))
558             //
559             // Where Y is a constant, and R(q) is optimised for a low
560             // absolute error compared to Y.
561             //
562             // double : Max error found: 7.403372e-17
563             // long double : Max error found: 6.084616e-20
564             // Maximum Deviation Found (error term) 4.811e-20
565             //
566             final float Y = 2.249481201171875f;
567             final double xs = q - 0.25f;
568             double P;
569             P =  -3.67192254707729348546;
570             P =   21.1294655448340526258 + P * xs;
571             P =    17.445385985570866523 + P * xs;
572             P =  -44.6382324441786960818 + P * xs;
573             P =  -18.8510648058714251895 + P * xs;
574             P =   17.6447298408374015486 + P * xs;
575             P =   8.37050328343119927838 + P * xs;
576             P =  0.105264680699391713268 + P * xs;
577             P = -0.202433508355938759655 + P * xs;
578             double Q;
579             Q =  1.72114765761200282724;
580             Q = -22.6436933413139721736 + Q * xs;
581             Q =  10.8268667355460159008 + Q * xs;
582             Q =  48.5609213108739935468 + Q * xs;
583             Q = -20.1432634680485188801 + Q * xs;
584             Q = -28.6608180499800029974 + Q * xs;
585             Q =   3.9713437953343869095 + Q * xs;
586             Q =  6.24264124854247537712 + Q * xs;
587             Q =                     1.0 + Q * xs;
588             final double g = Math.sqrt(-2 * Math.log(q));
589             final double r = P / Q;
590             result = g / (Y + r);
591         } else {
592             //
593             // For q < 0.25 we have a series of rational approximations all
594             // of the general form:
595             //
596             // let: x = sqrt(-log(q))
597             //
598             // Then the result is given by:
599             //
600             // x(Y+R(x-B))
601             //
602             // where Y is a constant, B is the lowest value of x for which
603             // the approximation is valid, and R(x-B) is optimised for a low
604             // absolute error compared to Y.
605             //
606             // Note that almost all code will really go through the first
607             // or maybe second approximation. After than we're dealing with very
608             // small input values indeed.
609             //
610             // Limit for a double: Math.sqrt(-Math.log(Double.MIN_VALUE)) = 27.28...
611             // Branches for x >= 44 (supporting 80 and 128 bit long double) have been removed.
612             final double x = Math.sqrt(-Math.log(q));
613             if (x < 3) {
614                 // Max error found: 1.089051e-20
615                 final float Y = 0.807220458984375f;
616                 final double xs = x - 1.125f;
617                 double P;
618                 P = -0.681149956853776992068e-9;
619                 P =  0.285225331782217055858e-7 + P * xs;
620                 P = -0.679465575181126350155e-6 + P * xs;
621                 P =   0.00214558995388805277169 + P * xs;
622                 P =    0.0290157910005329060432 + P * xs;
623                 P =     0.142869534408157156766 + P * xs;
624                 P =     0.337785538912035898924 + P * xs;
625                 P =     0.387079738972604337464 + P * xs;
626                 P =     0.117030156341995252019 + P * xs;
627                 P =    -0.163794047193317060787 + P * xs;
628                 P =    -0.131102781679951906451 + P * xs;
629                 double Q;
630                 Q =  0.01105924229346489121;
631                 Q = 0.152264338295331783612 + Q * xs;
632                 Q = 0.848854343457902036425 + Q * xs;
633                 Q =  2.59301921623620271374 + Q * xs;
634                 Q =  4.77846592945843778382 + Q * xs;
635                 Q =  5.38168345707006855425 + Q * xs;
636                 Q =  3.46625407242567245975 + Q * xs;
637                 Q =                     1.0 + Q * xs;
638                 final double R = P / Q;
639                 result = Y * x + R * x;
640             } else if (x < 6) {
641                 // Max error found: 8.389174e-21
642                 final float Y = 0.93995571136474609375f;
643                 final double xs = x - 3;
644                 double P;
645                 P = 0.266339227425782031962e-11;
646                 P = -0.230404776911882601748e-9 + P * xs;
647                 P =  0.460469890584317994083e-5 + P * xs;
648                 P =  0.000157544617424960554631 + P * xs;
649                 P =   0.00187123492819559223345 + P * xs;
650                 P =   0.00950804701325919603619 + P * xs;
651                 P =    0.0185573306514231072324 + P * xs;
652                 P =  -0.00222426529213447927281 + P * xs;
653                 P =   -0.0350353787183177984712 + P * xs;
654                 double Q;
655                 Q = 0.764675292302794483503e-4;
656                 Q =  0.00263861676657015992959 + Q * xs;
657                 Q =   0.0341589143670947727934 + Q * xs;
658                 Q =    0.220091105764131249824 + Q * xs;
659                 Q =    0.762059164553623404043 + Q * xs;
660                 Q =      1.3653349817554063097 + Q * xs;
661                 Q =                        1.0 + Q * xs;
662                 final double R = P / Q;
663                 result = Y * x + R * x;
664             } else if (x < 18) {
665                 // Max error found: 1.481312e-19
666                 final float Y = 0.98362827301025390625f;
667                 final double xs = x - 6;
668                 double P;
669                 P =   0.99055709973310326855e-16;
670                 P = -0.281128735628831791805e-13 + P * xs;
671                 P =   0.462596163522878599135e-8 + P * xs;
672                 P =   0.449696789927706453732e-6 + P * xs;
673                 P =   0.149624783758342370182e-4 + P * xs;
674                 P =   0.000209386317487588078668 + P * xs;
675                 P =    0.00105628862152492910091 + P * xs;
676                 P =   -0.00112951438745580278863 + P * xs;
677                 P =    -0.0167431005076633737133 + P * xs;
678                 double Q;
679                 Q = 0.282243172016108031869e-6;
680                 Q = 0.275335474764726041141e-4 + Q * xs;
681                 Q = 0.000964011807005165528527 + Q * xs;
682                 Q =   0.0160746087093676504695 + Q * xs;
683                 Q =    0.138151865749083321638 + Q * xs;
684                 Q =    0.591429344886417493481 + Q * xs;
685                 Q =                        1.0 + Q * xs;
686                 final double R = P / Q;
687                 result = Y * x + R * x;
688             } else {
689                 // x < 44
690                 // Max error found: 5.697761e-20
691                 final float Y = 0.99714565277099609375f;
692                 final double xs = x - 18;
693                 double P;
694                 P = -0.116765012397184275695e-17;
695                 P =  0.145596286718675035587e-11 + P * xs;
696                 P =   0.411632831190944208473e-9 + P * xs;
697                 P =   0.396341011304801168516e-7 + P * xs;
698                 P =   0.162397777342510920873e-5 + P * xs;
699                 P =   0.254723037413027451751e-4 + P * xs;
700                 P =  -0.779190719229053954292e-5 + P * xs;
701                 P =    -0.0024978212791898131227 + P * xs;
702                 double Q;
703                 Q = 0.509761276599778486139e-9;
704                 Q = 0.144437756628144157666e-6 + Q * xs;
705                 Q = 0.145007359818232637924e-4 + Q * xs;
706                 Q = 0.000690538265622684595676 + Q * xs;
707                 Q =   0.0169410838120975906478 + Q * xs;
708                 Q =    0.207123112214422517181 + Q * xs;
709                 Q =                        1.0 + Q * xs;
710                 final double R = P / Q;
711                 result = Y * x + R * x;
712             }
713         }
714         return result;
715     }
716 
717     /**
718      * Compute {@code exp(x*x)} with high accuracy. This is performed using
719      * information in the round-off from {@code x*x}.
720      *
721      * <p>This is accurate at large x to 1 ulp.
722      *
723      * <p>At small x the accuracy cannot be improved over using exp(x*x).
724      * This occurs at {@code x <= 1}.
725      *
726      * <p>Warning: This has no checks for overflow. The method is never called
727      * when {@code x*x > log(MAX_VALUE/2)}.
728      *
729      * @param x Value
730      * @return exp(x*x)
731      */
732     static double expxx(double x) {
733         // Note: If exp(a) overflows this can create NaN if the
734         // round-off b is negative or zero:
735         // exp(a) * exp1m(b) + exp(a)
736         // inf * 0 + inf   or   inf * -b  + inf
737         final double a = x * x;
738         final double b = squareLowUnscaled(x, a);
739         return expxx(a, b);
740     }
741 
742     /**
743      * Compute {@code exp(-x*x)} with high accuracy. This is performed using
744      * information in the round-off from {@code x*x}.
745      *
746      * <p>This is accurate at large x to 1 ulp until exp(-x*x) is close to
747      * sub-normal. For very small exp(-x*x) the adjustment is sub-normal and
748      * bits can be lost in the adjustment for a max observed error of {@code < 2} ulp.
749      *
750      * <p>At small x the accuracy cannot be improved over using exp(-x*x).
751      * This occurs at {@code x <= 1}.
752      *
753      * @param x Value
754      * @return exp(-x*x)
755      */
756     static double expmxx(double x) {
757         final double a = x * x;
758         final double b = squareLowUnscaled(x, a);
759         return expxx(-a, -b);
760     }
761 
762     /**
763      * Compute {@code exp(a+b)} with high accuracy assuming {@code a+b = a}.
764      *
765      * <p>This is accurate at large positive a to 1 ulp. If a is negative and exp(a) is
766      * close to sub-normal a bit of precision may be lost when adjusting result
767      * as the adjustment is sub-normal (max observed error {@code < 2} ulp).
768      * For the use case of multiplication of a number less than 1 by exp(-x*x), a = -x*x,
769      * the result will be sub-normal and the rounding error is lost.
770      *
771      * <p>At small |a| the accuracy cannot be improved over using exp(a) as the
772      * round-off is too small to create terms that can adjust the standard result by
773      * more than 0.5 ulp. This occurs at {@code |a| <= 1}.
774      *
775      * @param a High bits of a split number
776      * @param b Low bits of a split number
777      * @return exp(a+b)
778      */
779     private static double expxx(double a, double b) {
780         // exp(a+b) = exp(a) * exp(b)
781         //          = exp(a) * (exp(b) - 1) + exp(a)
782         // Assuming:
783         // 1. -746 < a < 710 for no under/overflow of exp(a)
784         // 2. a+b = a
785         // As b -> 0 then exp(b) -> 1; expm1(b) -> b
786         // The round-off b is limited to ~ 0.5 * ulp(746) ~ 5.68e-14
787         // and we can use an approximation for expm1 (x/1! + x^2/2! + ...)
788         // The second term is required for the expm1 result but the
789         // bits are not significant to change the product with exp(a)
790 
791         final double ea = Math.exp(a);
792         // b ~ expm1(b)
793         return ea * b + ea;
794     }
795 
796     // Extended precision multiplication specialised for the square adapted from:
797     // org.apache.commons.numbers.core.ExtendedPrecision
798 
799     /**
800      * Compute the low part of the double length number {@code (z,zz)} for the exact
801      * square of {@code x} using Dekker's mult12 algorithm. The standard precision product
802      * {@code x*x} must be provided. The number {@code x} is split into high and low parts
803      * using Dekker's algorithm.
804      *
805      * <p>Warning: This method does not perform scaling in Dekker's split and large
806      * finite numbers can create NaN results.
807      *
808      * @param x Number to square
809      * @param xx Standard precision product {@code x*x}
810      * @return the low part of the square double length number
811      */
812     private static double squareLowUnscaled(double x, double xx) {
813         // Split the numbers using Dekker's algorithm without scaling
814         final double hx = highPartUnscaled(x);
815         final double lx = x - hx;
816 
817         return squareLow(hx, lx, xx);
818     }
819 
820     /**
821      * Implement Dekker's method to split a value into two parts. Multiplying by (2^s + 1) creates
822      * a big value from which to derive the two split parts.
823      * <pre>
824      * c = (2^s + 1) * a
825      * a_big = c - a
826      * a_hi = c - a_big
827      * a_lo = a - a_hi
828      * a = a_hi + a_lo
829      * </pre>
830      *
831      * <p>The multiplicand allows a p-bit value to be split into
832      * (p-s)-bit value {@code a_hi} and a non-overlapping (s-1)-bit value {@code a_lo}.
833      * Combined they have (p-1) bits of significand but the sign bit of {@code a_lo}
834      * contains a bit of information. The constant is chosen so that s is ceil(p/2) where
835      * the precision p for a double is 53-bits (1-bit of the mantissa is assumed to be
836      * 1 for a non sub-normal number) and s is 27.
837      *
838      * <p>This conversion does not use scaling and the result of overflow is NaN. Overflow
839      * may occur when the exponent of the input value is above 996.
840      *
841      * <p>Splitting a NaN or infinite value will return NaN.
842      *
843      * @param value Value.
844      * @return the high part of the value.
845      * @see Math#getExponent(double)
846      */
847     private static double highPartUnscaled(double value) {
848         final double c = MULTIPLIER * value;
849         return c - (c - value);
850     }
851 
852     /**
853      * Compute the low part of the double length number {@code (z,zz)} for the exact
854      * square of {@code x} using Dekker's mult12 algorithm. The standard
855      * precision product {@code x*x} must be provided. The number {@code x}
856      * should already be split into low and high parts.
857      *
858      * <p>Note: This uses the high part of the result {@code (z,zz)} as {@code x * x} and not
859      * {@code hx * hx + hx * lx + lx * hx} as specified in Dekker's original paper.
860      * See Shewchuk (1997) for working examples.
861      *
862      * @param hx High part of factor.
863      * @param lx Low part of factor.
864      * @param xx Square of the factor.
865      * @return <code>lx * ly - (((xy - hx * hy) - lx * hy) - hx * ly)</code>
866      * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps">
867      * Shewchuk (1997) Theorum 18</a>
868      */
869     private static double squareLow(double hx, double lx, double xx) {
870         // Compute the multiply low part:
871         // err1 = xy - hx * hy
872         // err2 = err1 - lx * hy
873         // err3 = err2 - hx * ly
874         // low = lx * ly - err3
875         return lx * lx - ((xx - hx * hx) - 2 * lx * hx);
876     }
877 }
878