ExtendedPrecision.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.statistics.distribution;

/**
 * Computes extended precision floating-point operations.
 *
 * <p>It is based on the 1971 paper
 * <a href="https://doi.org/10.1007/BF01397083">
 * Dekker (1971) A floating-point technique for extending the available precision</a>
 * Numer. Math. 18, 224-242.
 *
 * <p>Adapted from {@code org.apache.commons.numbers.core.ExtendedPrecision}.
 */
final class ExtendedPrecision {
    /** sqrt(2 pi) as a double. Computed to 64-digits precision and converted to double. */
    static final double SQRT2PI = 2.5066282746310007;
    /** Round-off from sqrt(2 pi) as a double.
     * Computed from the value sqrt(2 pi) to 64-digits precision minus {@link #SQRT2PI}. */
    static final double SQRT2PI_R = -1.8328579980459167e-16;

    /**
     * The multiplier used to split the double value into high and low parts. From
     * Dekker (1971): "The constant should be chosen equal to 2^(p - p/2) + 1,
     * where p is the number of binary digits in the mantissa". Here p is 53
     * and the multiplier is {@code 2^27 + 1}.
     */
    private static final double MULTIPLIER = 1.0 + 0x1.0p27;
    /** Threshold for a big number that may overflow when squared. 2^500. */
    private static final double BIG = 0x1.0p500;
    /** Threshold for a small number that may underflow when squared. 2^-500. */
    private static final double SMALL = 0x1.0p-500;
    /** Scale up by 2^600. */
    private static final double SCALE_UP = 0x1.0p600;
    /** Scale down by 2^600. */
    private static final double SCALE_DOWN = 0x1.0p-600;
    /** Upper bits of sqrt(2 pi). */
    private static final double SQRT2PI_H;
    /** Lower bits of sqrt(2 pi). */
    private static final double SQRT2PI_L;
    /** X squared value where {@code exp(-0.5*x*x)} cannot increase accuracy using the round-off
     * from x squared. */
    private static final int EXP_M_HALF_XX_MIN_VALUE = 2;
    /** Approximate x squared value where {@code exp(-0.5*x*x) == 0}. This is above
     * {@code -2 * ln(2^-1074)} due to rounding performed within the exp function. */
    private static final int EXP_M_HALF_XX_MAX_VALUE = 1491;

    static {
        // Initialise constants

        // Split the upper 53-bits for extended precision multiplication
        SQRT2PI_H = highPartUnscaled(SQRT2PI);
        SQRT2PI_L = SQRT2PI - SQRT2PI_H;
    }

    /** No instances. */
    private ExtendedPrecision() {}

    /**
     * Multiply the term by sqrt(2 pi).
     *
     * @param x Value (assumed to be positive)
     * @return x * sqrt(2 pi)
     */
    static double xsqrt2pi(double x) {
        // Note: Do not convert x to absolute for this use case
        if (x > BIG) {
            if (x == Double.POSITIVE_INFINITY) {
                return Double.POSITIVE_INFINITY;
            }
            return computeXsqrt2pi(x * SCALE_DOWN) * SCALE_UP;
        } else if (x < SMALL) {
            // Note: Ignore possible zero for this use case
            return computeXsqrt2pi(x * SCALE_UP) * SCALE_DOWN;
        } else {
            return computeXsqrt2pi(x);
        }
    }

    /**
     * Compute {@code a * sqrt(2 * pi)}.
     *
     * @param a Value
     * @return the result
     */
    private static double computeXsqrt2pi(double a) {
        // Split the number
        final double ha = highPartUnscaled(a);
        final double la = a - ha;

        // Extended precision product with sqrt(2 * pi)
        final double x = a * SQRT2PI;
        final double xx = productLow(ha, la, SQRT2PI_H, SQRT2PI_L, x);

        // Add the term a multiplied by the round-off from sqrt(2 * pi)
        // result = a * (SQRT2PI + SQRT2PI_R)
        // Sum from small to high
        return a * SQRT2PI_R + xx + x;
    }

    /**
     * Compute {@code sqrt(2 * x * x)}.
     *
     * <p>The result is computed using a high precision computation of
     * {@code sqrt(2 * x * x)} avoiding underflow or overflow of {@code x}
     * squared.
     *
     * @param x Value (assumed to be positive)
     * @return {@code sqrt(2 * x * x)}
     */
    static double sqrt2xx(double x) {
        // Note: Do not convert x to absolute for this use case
        if (x > BIG) {
            if (x == Double.POSITIVE_INFINITY) {
                return Double.POSITIVE_INFINITY;
            }
            return computeSqrt2aa(x * SCALE_DOWN) * SCALE_UP;
        } else if (x < SMALL) {
            // Note: Ignore possible zero for this use case
            return computeSqrt2aa(x * SCALE_UP) * SCALE_DOWN;
        } else {
            return computeSqrt2aa(x);
        }
    }

    /**
     * Compute {@code sqrt(2 * a * a)}.
     *
     * @param a Value
     * @return the result
     */
    private static double computeSqrt2aa(double a) {
        // Split the number
        final double ha = highPartUnscaled(a);
        final double la = a - ha;

        // Extended precision product
        final double x = 2 * a * a;
        final double xx = productLow(ha, la, 2 * ha, 2 * la, x);

        // Standard sqrt
        final double c = Math.sqrt(x);

        // Edge case.
        // Occurs if a has limited precision in the mantissa including
        // the special cases of 0 and 1.
        if (xx == 0) {
            return c;
        }

        // Dekker's double precision sqrt2 algorithm.
        // See Dekker, 1971, pp 242.
        final double hc = highPartUnscaled(c);
        final double lc = c - hc;
        final double u = c * c;
        final double uu = productLow(hc, lc, hc, lc, u);
        final double cc = (x - u - uu + xx) * 0.5 / c;

        // Extended precision result:
        // y = c + cc
        // yy = c - y + cc
        // Return only y
        return c + cc;
    }

    /**
     * Compute {@code exp(-0.5*x*x)} with high accuracy. This is performed using information in the
     * round-off from {@code x*x}.
     *
     * <p>This is accurate at large x to 1 ulp until exp(-0.5*x*x) is close to sub-normal. For very
     * small exp(-0.5*x*x) the adjustment is sub-normal and bits can be lost in the adjustment for a
     * max observed error of {@code < 2} ulp.
     *
     * <p>At small x the accuracy cannot be improved over using exp(-0.5*x*x). This occurs at
     * {@code x <= sqrt(2)}.
     *
     * @param x Value
     * @return exp(-0.5*x*x)
     * @see <a href="https://issues.apache.org/jira/browse/STATISTICS-52">STATISTICS-52</a>
     */
    static double expmhxx(double x) {
        final double z = x * x;
        if (z <= EXP_M_HALF_XX_MIN_VALUE) {
            return Math.exp(-0.5 * z);
        } else if (z >= EXP_M_HALF_XX_MAX_VALUE) {
            // exp(-745.5) == 0
            return 0;
        }
        // Split the number
        final double hx = highPartUnscaled(x);
        final double lx = x - hx;
        // Compute the round-off
        final double zz = squareLow(hx, lx, z);
        return expxx(-0.5 * z, -0.5 * zz);
    }

    /**
     * Compute {@code exp(a+b)} with high accuracy assuming {@code a+b = a}.
     *
     * <p>This is accurate at large positive a to 1 ulp. If a is negative and exp(a) is close to
     * sub-normal a bit of precision may be lost when adjusting result as the adjustment is sub-normal
     * (max observed error {@code < 2} ulp). For the use case of multiplication of a number less than
     * 1 by exp(-x*x), a = -x*x, the result will be sub-normal and the rounding error is lost.
     *
     * <p>At small |a| the accuracy cannot be improved over using exp(a) as the round-off is too small
     * to create terms that can adjust the standard result by more than 0.5 ulp. This occurs at
     * {@code |a| <= 1}.
     *
     * @param a High bits of a split number
     * @param b Low bits of a split number
     * @return exp(a+b)
     * @see <a href="https://issues.apache.org/jira/projects/NUMBERS/issues/NUMBERS-177">
     * Numbers-177: Accurate scaling by exp(z*z)</a>
     */
    private static double expxx(double a, double b) {
        // exp(a+b) = exp(a) * exp(b)
        // = exp(a) * (exp(b) - 1) + exp(a)
        // Assuming:
        // 1. -746 < a < 710 for no under/overflow of exp(a)
        // 2. a+b = a
        // As b -> 0 then exp(b) -> 1; expm1(b) -> b
        // The round-off b is limited to ~ 0.5 * ulp(746) ~ 5.68e-14
        // and we can use an approximation for expm1 (x/1! + x^2/2! + ...)
        // The second term is required for the expm1 result but the
        // bits are not significant to change the product with exp(a)

        final double ea = Math.exp(a);
        // b ~ expm1(b)
        return ea * b + ea;
    }

    /**
     * Implement Dekker's method to split a value into two parts. Multiplying by (2^s + 1) creates
     * a big value from which to derive the two split parts.
     * <pre>
     * c = (2^s + 1) * a
     * a_big = c - a
     * a_hi = c - a_big
     * a_lo = a - a_hi
     * a = a_hi + a_lo
     * </pre>
     *
     * <p>The multiplicand allows a p-bit value to be split into
     * (p-s)-bit value {@code a_hi} and a non-overlapping (s-1)-bit value {@code a_lo}.
     * Combined they have (p-1) bits of significand but the sign bit of {@code a_lo}
     * contains a bit of information. The constant is chosen so that s is ceil(p/2) where
     * the precision p for a double is 53-bits (1-bit of the mantissa is assumed to be
     * 1 for a non sub-normal number) and s is 27.
     *
     * <p>This conversion does not use scaling and the result of overflow is NaN. Overflow
     * may occur when the exponent of the input value is above 996.
     *
     * <p>Splitting a NaN or infinite value will return NaN.
     *
     * @param value Value.
     * @return the high part of the value.
     * @see Math#getExponent(double)
     */
    private static double highPartUnscaled(double value) {
        final double c = MULTIPLIER * value;
        return c - (c - value);
    }

    /**
     * Compute the low part of the double length number {@code (z,zz)} for the exact
     * product of {@code x} and {@code y} using Dekker's mult12 algorithm. The standard
     * precision product {@code x*y} must be provided. The numbers {@code x} and {@code y}
     * should already be split into low and high parts.
     *
     * <p>Note: This uses the high part of the result {@code (z,zz)} as {@code x * y} and not
     * {@code hx * hy + hx * ty + tx * hy} as specified in Dekker's original paper.
     * See Shewchuk (1997) for working examples.
     *
     * @param hx High part of first factor.
     * @param lx Low part of first factor.
     * @param hy High part of second factor.
     * @param ly Low part of second factor.
     * @param xy Product of the factors.
     * @return <code>lx * ly - (((xy - hx * hy) - lx * hy) - hx * ly)</code>
     * @see <a href="https://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps">
     * Shewchuk (1997) Theorum 18</a>
     */
    private static double productLow(double hx, double lx, double hy, double ly, double xy) {
        // Compute the multiply low part:
        // err1 = xy - hx * hy
        // err2 = err1 - lx * hy
        // err3 = err2 - hx * ly
        // low = lx * ly - err3
        return lx * ly - (((xy - hx * hy) - lx * hy) - hx * ly);
    }

    /**
     * Compute the low part of the double length number {@code (z,zz)} for the exact
     * square of {@code x} using Dekker's mult12 algorithm. The standard precision product
     * {@code x*x} must be provided. The number {@code x} should already be split into low
     * and high parts.
     *
     * <p>Note: This is a specialisation of
     * {@link #productLow(double, double, double, double, double)}.
     *
     * @param hx High part of factor.
     * @param lx Low part of factor.
     * @param xx Square of the factor.
     * @return <code>lx * lx - (((xx - hx * hx) - lx * hx) - hx * lx)</code>
     */
    private static double squareLow(double hx, double lx, double xx) {
        return lx * lx - ((xx - hx * hx) - 2 * lx * hx);
    }
}