/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.26/src/compound/compoundf.rs

Source
/*
 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::{dd_fmla, is_integerf};
use crate::double_double::DoubleDouble;
use crate::rounding::CpuRoundTiesEven;
use std::hint::black_box;

#[cold]
#[inline(never)]
fn as_compoundf_special(x: f32, y: f32) -> f32 {
    let nx = x.to_bits();
    let ny = y.to_bits();
    let ax: u32 = nx.wrapping_shl(1);
    let ay: u32 = ny.wrapping_shl(1);

    if ax == 0 || ay == 0 {
        // x or y is 0
        if ax == 0 {
            // compound(0,y) = 1 except for y = sNaN
            return if y.is_nan() { x + y } else { 1.0 };
        }

        if ay == 0 {
            // compound (x, 0)
            if x.is_nan() {
                return x + y;
            } // x = sNaN
            return if x < -1.0 {
                f32::NAN // rule (g)
            } else {
                1.0
            }; // rule (a)
        }
    }

    let mone = (-1.0f32).to_bits();
    if ay >= 0xffu32 << 24 {
        // y=Inf/NaN
        // the case x=0 was already checked above
        if ax > 0xffu32 << 24 {
            return x + y;
        } // x=NaN
        if ay == 0xffu32 << 24 {
            // y = +/-Inf
            if nx > mone {
                return f32::NAN;
            } // rule (g)
            let sy = ny >> 31; // sign bit of y
            if nx == mone {
                return if sy == 0 {
                    0.0 // Rule (c)
                } else {
                    f32::INFINITY // Rule (b)
                };
            }
            if x < 0.0 {
                return if sy == 0 { 0.0 } else { f32::INFINITY };
            }
            if x > 0.0 {
                return if sy != 0 { 0.0 } else { f32::INFINITY };
            }
            return 1.0;
        }
        return x + y; // case y=NaN
    }

    if nx >= mone || nx >= 0xffu32 << 23 {
        // x is Inf, NaN or <= -1
        if ax == 0xffu32 << 24 {
            // x is +Inf or -Inf
            if (nx >> 31) != 0 {
                return f32::NAN;
            } // x = -Inf, rule (g)
            // (1 + Inf)^y = +Inf for y > 0, +0 for y < 0
            return if (ny >> 31) != 0 { 1.0 / x } else { x };
        }
        if ax > 0xffu32 << 24 {
            return x + y;
        } // x is NaN
        if nx > mone {
            return f32::NAN; // x < -1.0: rule (g)
        }
        // now x = -1
        return if (ny >> 31) != 0 {
            // y < 0
            f32::INFINITY
        } else {
            // y > 0
            0.0
        };
    }
    0.0
}

#[inline]
pub(crate) fn log2p1_polyeval_1(z: f64) -> f64 {
    // we include P[0] = 0 so that P[i] corresponds to degree i
    // this degree-8 polynomial generated by Sollya (cf p1.sollya)
    // has relative error < 2^-50.98
    const P: [u64; 8] = [
        0x0000000000000000,
        0x3ff71547652b82fe,
        0xbfe71547652b8d11,
        0x3fdec709dc3a5014,
        0xbfd715475b144983,
        0x3fd2776c3fda300e,
        0xbfcec990162358ce,
        0x3fca645337c29e27,
    ];

    let z2 = z * z;
    let mut c5 = dd_fmla(f64::from_bits(P[6]), z, f64::from_bits(P[5]));
    let c3 = dd_fmla(f64::from_bits(P[4]), z, f64::from_bits(P[3]));
    let mut c1 = dd_fmla(f64::from_bits(P[2]), z, f64::from_bits(P[1]));
    let z4 = z2 * z2;
    c5 = dd_fmla(f64::from_bits(P[7]), z2, c5);
    c1 = dd_fmla(c3, z2, c1);
    c1 = dd_fmla(c5, z4, c1);
    z * c1
}

// for 0<=i<46, inv[i] approximates 1/t for 1/2+(i+13)/64 <= t < 1/2+(i+14)/64
pub(crate) static LOG2P1_COMPOUNDF_INV: [u64; 46] = [
    0x3ff6800000000000,
    0x3ff6000000000000,
    0x3ff5800000000000,
    0x3ff5000000000000,
    0x3ff4c00000000000,
    0x3ff4400000000000,
    0x3ff4000000000000,
    0x3ff3800000000000,
    0x3ff3400000000000,
    0x3ff2c00000000000,
    0x3ff2800000000000,
    0x3ff2000000000000,
    0x3ff1c00000000000,
    0x3ff1800000000000,
    0x3ff1400000000000,
    0x3ff1000000000000,
    0x3ff0c00000000000,
    0x3ff0800000000000,
    0x3ff0000000000000,
    0x3ff0000000000000,
    0x3fef400000000000,
    0x3feec00000000000,
    0x3fee400000000000,
    0x3fee000000000000,
    0x3fed800000000000,
    0x3fed000000000000,
    0x3feca00000000000,
    0x3fec400000000000,
    0x3febe00000000000,
    0x3feb800000000000,
    0x3feb200000000000,
    0x3feac00000000000,
    0x3fea800000000000,
    0x3fea200000000000,
    0x3fe9c00000000000,
    0x3fe9800000000000,
    0x3fe9200000000000,
    0x3fe8c00000000000,
    0x3fe8800000000000,
    0x3fe8400000000000,
    0x3fe8000000000000,
    0x3fe7c00000000000,
    0x3fe7600000000000,
    0x3fe7200000000000,
    0x3fe6e00000000000,
    0x3fe6a00000000000,
];

/* log2inv[i][0]+log2inv[i][1] is a double-double approximation of
-log2(inv[i]), with log2inv[i][0] having absolute error < 2^-54.462,
and log2inv[i][0]+log2inv[i][1] absolute error < 2^-109.101 */
pub(crate) static LOG2P1_COMPOUNDF_LOG2_INV: [(u64, u64); 46] = [
    (0x3c68f3673ffdd785, 0xbfdf7a8568cb06cf),
    (0x3c1c141e66faaaad, 0xbfdd6753e032ea0f),
    (0x3c76fae441c09d76, 0xbfdb47ebf73882a1),
    (0x3c72d352bea51e59, 0xbfd91bba891f1709),
    (0xbc69575b04fa6fbd, 0xbfd800a563161c54),
    (0x3c7817fd3b7d7e5d, 0xbfd5c01a39fbd688),
    (0x3c1b6d40900b2502, 0xbfd49a784bcd1b8b),
    (0x3c7f6e91ad16ecff, 0xbfd24407ab0e073a),
    (0x3c6a7b47d2c352d9, 0xbfd11307dad30b76),
    (0x3c5b85a54d7ee2fd, 0xbfcd49ee4c325970),
    (0x3c401ee1343fe7ca, 0xbfcacf5e2db4ec94),
    (0x3c6817fd3b7d7e5d, 0xbfc5c01a39fbd688),
    (0xbc4f51f2c075a74c, 0xbfc32ae9e278ae1a),
    (0x3c6a7610e40bd6ab, 0xbfc08c588cda79e4),
    (0xbc58ecb169b9465f, 0xbfbbc84240adabba),
    (0xbc5f3314e0985116, 0xbfb663f6fac91316),
    (0x3c530c22d15199b8, 0xbfb0eb389fa29f9b),
    (0xbc389b03784b5be1, 0xbfa6bad3758efd87),
    (0x0000000000000000, 0x0000000000000000),
    (0x0000000000000000, 0x0000000000000000),
    (0x3c3491f06c085bc2, 0x3fa184b8e4c56af8),
    (0x3c0155660710eb2a, 0x3fad6ebd1f1febfe),
    (0x3c2c141e66faaaad, 0x3fb4c560fe68af88),
    (0x3c59ced1447e30ad, 0x3fb7d60496cfbb4c),
    (0x3c592ce9636c90a0, 0x3fbe0b1ae8f2fd56),
    (0xbc5696e2866c718e, 0x3fc22dadc2ab3497),
    (0xbc61562d61af73f8, 0x3fc494f863b8df35),
    (0xbc60798d1aa21694, 0x3fc7046031c79f85),
    (0xbc6e95734abd2fcc, 0x3fc97c1cb13c7ec1),
    (0x3c2bc0af7b82e7d7, 0x3fcbfc67a7fff4cc),
    (0xbc6086fce864a1f6, 0x3fce857d3d361368),
    (0xbc53d56efe4338fe, 0x3fd08bce0d95fa38),
    (0x3c7c8d43e017579b, 0x3fd169c05363f158),
    (0xbc50132ae5e417cd, 0x3fd2baa0c34be1ec),
    (0xbc7c658d602e66b0, 0x3fd4106017c3eca3),
    (0x3c7e393a16b94b52, 0x3fd4f6fbb2cec598),
    (0x3c7ac9080333c605, 0x3fd6552b49986277),
    (0x3c68f89e2eb553b2, 0x3fd7b89f02cf2aad),
    (0x3c799aa6df8b7d83, 0x3fd8a8980abfbd32),
    (0x3c7bca36fd02def0, 0x3fd99b072a96c6b2),
    (0x3c5817fd3b7d7e5d, 0x3fda8ff971810a5e),
    (0xbc501d98c3531027, 0x3fdb877c57b1b070),
    (0x3c78a38b4175d665, 0x3fdcffae611ad12b),
    (0x3c438c8946414c6a, 0x3fddfdd89d586e2b),
    (0x3c76d261f1753e0b, 0x3fdefec61b011f85),
    (0xbc87398fe685f171, 0x3fe0014332be0033),
];

/* for |z| <= 2^-6, returns an approximation of 2^z
with absolute error < 2^-43.540  */
#[inline]
fn compoundf_expf_poly(z: f64) -> f64 {
    /* Q is a degree-4 polynomial generated by Sollya (cf q1.sollya)
    with absolute error < 2^-43.549 */
    const Q: [u64; 5] = [
        0x3ff0000000000000,
        0x3fe62e42fef6d01a,
        0x3fcebfbdff7feeba,
        0x3fac6b167e579bee,
        0x3f83b2b3428d06de,
    ];
    let z2 = z * z;
    let c3 = dd_fmla(f64::from_bits(Q[4]), z, f64::from_bits(Q[3]));
    let c0 = dd_fmla(f64::from_bits(Q[1]), z, f64::from_bits(Q[0]));
    let c2 = dd_fmla(c3, z, f64::from_bits(Q[2]));
    dd_fmla(c2, z2, c0)
}

pub(crate) fn compoundf_log2p1_fast(x: f64) -> f64 {
    /* for x > 0, 1+x is exact when 2^-29 <=  x < 2^53
    for x < 0, 1+x is exact when -1 < x <= 2^-30 */

    //  double u = (x >= 0x1p53) ? x : 1.0 + x;
    let u = 1.0 + x;
    /* For x < 0x1p53, x + 1 is exact thus u = x+1.
    For x >= 2^53, we estimate log2(x) instead of log2(1+x),
    since log2(1+x) = log2(x) + log2(1+1/x),
    log2(x) >= 53 and |log2(1+1/x)| < 2^-52.471, the additional relative
    error is bounded by 2^-52.471/53 < 2^-58.198 */

    let mut v = u.to_bits();
    let m: u64 = v & 0xfffffffffffffu64;
    let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
    // 2^e/sqrt(2) < u < 2^e*sqrt(2), with -29 <= e <= 128
    v = v.wrapping_sub((e << 52) as u64);
    let t = f64::from_bits(v);
    // u = 2^e*t with 1/sqrt(2) < t < sqrt(2)
    // thus log2(u) = e + log2(t)
    v = (f64::from_bits(v) + 2.0).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
    let i = (v >> 45) as i32 - 0x2002d; // 0 <= i <= 45
    let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
    let z = dd_fmla(r, t, -1.0); // exact, -1/64 <= z <= 1/64
    // we approximates log2(t) by -log2(r) + log2(r*t)
    let p = log2p1_polyeval_1(z);
    // p approximates log2(r*t) with rel. error < 2^-49.642, and |p| < 2^-5.459
    e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
}

pub(crate) static COMPOUNDF_EXP2_T: [u64; 33] = [
    0xbfe0000000000000,
    0xbfde000000000000,
    0xbfdc000000000000,
    0xbfda000000000000,
    0xbfd8000000000000,
    0xbfd6000000000000,
    0xbfd4000000000000,
    0xbfd2000000000000,
    0xbfd0000000000000,
    0xbfcc000000000000,
    0xbfc8000000000000,
    0xbfc4000000000000,
    0xbfc0000000000000,
    0xbfb8000000000000,
    0xbfb0000000000000,
    0xbfa0000000000000,
    0x0000000000000000,
    0x3fa0000000000000,
    0x3fb0000000000000,
    0x3fb8000000000000,
    0x3fc0000000000000,
    0x3fc4000000000000,
    0x3fc8000000000000,
    0x3fcc000000000000,
    0x3fd0000000000000,
    0x3fd2000000000000,
    0x3fd4000000000000,
    0x3fd6000000000000,
    0x3fd8000000000000,
    0x3fda000000000000,
    0x3fdc000000000000,
    0x3fde000000000000,
    0x3fe0000000000000,
];

/* exp2_U[i] is a double-double approximation h+l of 2^exp2_T[i]
so that h approximates 2^exp2_T[i] with absolute error < 2^-53.092,
and h+l approximates 2^exp2_T[i] with absolute error < 2^-107.385 */
pub(crate) static COMPOUNDF_EXP2_U: [(u64, u64); 33] = [
    (0xbc8bdd3413b26456, 0x3fe6a09e667f3bcd),
    (0xbc716e4786887a99, 0x3fe71f75e8ec5f74),
    (0xbc741577ee04992f, 0x3fe7a11473eb0187),
    (0xbc8d4c1dd41532d8, 0x3fe82589994cce13),
    (0x3c86e9f156864b27, 0x3fe8ace5422aa0db),
    (0xbc575fc781b57ebc, 0x3fe93737b0cdc5e5),
    (0x3c6c7c46b071f2be, 0x3fe9c49182a3f090),
    (0xbc8d2f6edb8d41e1, 0x3fea5503b23e255d),
    (0x3c87a1cd345dcc81, 0x3feae89f995ad3ad),
    (0xbc65584f7e54ac3b, 0x3feb7f76f2fb5e47),
    (0x3c711065895048dd, 0x3fec199bdd85529c),
    (0x3c6503cbd1e949db, 0x3fecb720dcef9069),
    (0x3c72ed02d75b3707, 0x3fed5818dcfba487),
    (0xbc81a5cd4f184b5c, 0x3fedfc97337b9b5f),
    (0xbc8e9c23179c2893, 0x3feea4afa2a490da),
    (0x3c89d3e12dd8a18b, 0x3fef50765b6e4540),
    (0x0000000000000000, 0x3ff0000000000000),
    (0x3c8d73e2a475b465, 0x3ff059b0d3158574),
    (0x3c98a62e4adc610b, 0x3ff0b5586cf9890f),
    (0xbc96c51039449b3a, 0x3ff11301d0125b51),
    (0xbc819041b9d78a76, 0x3ff172b83c7d517b),
    (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
    (0x3c99b07eb6c70573, 0x3ff2387a6e756238),
    (0x3c8612e8afad1255, 0x3ff29e9df51fdee1),
    (0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
    (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
    (0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
    (0x3c489b7a04ef80d0, 0x3ff44e086061892d),
    (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
    (0xbc807abe1db13cad, 0x3ff5342b569d4f82),
    (0x3c96324c054647ad, 0x3ff5ab07dd485429),
    (0xbc9383c17e40b497, 0x3ff6247eb03a5585),
    (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
];

/* return the correct rounding of (1+x)^y, otherwise -1.0
where t is an approximation of y*log2(1+x) with absolute error < 2^-40.680,
assuming 0x1.7154759a0df53p-24 <= |t| <= 150
exact is non-zero iff (1+x)^y is exact or midpoint */
fn exp2_fast(t: f64) -> f64 {
    let k = t.cpu_round_ties_even(); // 0 <= |k| <= 150
    let mut r = t - k; // |r| <= 1/2, exact
    let mut v: u64 = (3.015625 + r).to_bits(); // 2.5 <= v <= 3.5015625
    // we add 2^-6 so that i is rounded to nearest
    let i: i32 = (v >> 46) as i32 - 0x10010; // 0 <= i <= 32
    r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
    // now |r| <= 2^-6
    // 2^t = 2^k * exp2_U[i][0] * 2^r
    v = (f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1) * compoundf_expf_poly(r)).to_bits();
    /* the absolute error on exp2_U[i][0] is bounded by 2^-53.092, with
    exp2_U[i][0] < 2^0.5, and that on q1(r) is bounded by 2^-43.540,
    with |q1(r)| < 1.011, thus |v| < 1.43, and the absolute error on v is
    bounded by ulp(v) + 2^0.5 * 2^-43.540 + 2^-53.092 * 1.011 < 2^-43.035.
    Now t approximates u := y*log2(1+x) with |t-u| < 2^-40.680 thus
    2^u = 2^t * (1 + eps) with eps < 2^(2^-40.680)-1 < 2^-41.208.
    The total absolute error is thus bounded by 2^-43.035 + 2^-41.208
    < 2^-40.849. */
    let mut err: u64 = 0x3d61d00000000000; // 2^-40.849 < 0x1.1dp-41
    v = unsafe { v.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) }; // scale v by 2^k, k is already integer

    // in case of potential underflow, we defer to the accurate path
    if f64::from_bits(v) < f64::from_bits(0x38100000000008e2) {
        return -1.0;
    }
    err = unsafe { err.wrapping_add((k.to_int_unchecked::<i64>() << 52) as u64) }; // scale the error by 2^k too
    let lb = (f64::from_bits(v) - f64::from_bits(err)) as f32;
    let rb = (f64::from_bits(v) + f64::from_bits(err)) as f32;
    if lb != rb {
        return -1.0;
    } // rounding test failed

    f64::from_bits(v)
}

// 2^e/sqrt(2) < h < 2^e*sqrt(2), with -29 <= e <= 128
// divide h, l by 2^e
pub(crate) static LOG2P1_SCALE: [u64; 158] = [
    0x41c0000000000000,
    0x41b0000000000000,
    0x41a0000000000000,
    0x4190000000000000,
    0x4180000000000000,
    0x4170000000000000,
    0x4160000000000000,
    0x4150000000000000,
    0x4140000000000000,
    0x4130000000000000,
    0x4120000000000000,
    0x4110000000000000,
    0x4100000000000000,
    0x40f0000000000000,
    0x40e0000000000000,
    0x40d0000000000000,
    0x40c0000000000000,
    0x40b0000000000000,
    0x40a0000000000000,
    0x4090000000000000,
    0x4080000000000000,
    0x4070000000000000,
    0x4060000000000000,
    0x4050000000000000,
    0x4040000000000000,
    0x4030000000000000,
    0x4020000000000000,
    0x4010000000000000,
    0x4000000000000000,
    0x3ff0000000000000,
    0x3fe0000000000000,
    0x3fd0000000000000,
    0x3fc0000000000000,
    0x3fb0000000000000,
    0x3fa0000000000000,
    0x3f90000000000000,
    0x3f80000000000000,
    0x3f70000000000000,
    0x3f60000000000000,
    0x3f50000000000000,
    0x3f40000000000000,
    0x3f30000000000000,
    0x3f20000000000000,
    0x3f10000000000000,
    0x3f00000000000000,
    0x3ef0000000000000,
    0x3ee0000000000000,
    0x3ed0000000000000,
    0x3ec0000000000000,
    0x3eb0000000000000,
    0x3ea0000000000000,
    0x3e90000000000000,
    0x3e80000000000000,
    0x3e70000000000000,
    0x3e60000000000000,
    0x3e50000000000000,
    0x3e40000000000000,
    0x3e30000000000000,
    0x3e20000000000000,
    0x3e10000000000000,
    0x3e00000000000000,
    0x3df0000000000000,
    0x3de0000000000000,
    0x3dd0000000000000,
    0x3dc0000000000000,
    0x3db0000000000000,
    0x3da0000000000000,
    0x3d90000000000000,
    0x3d80000000000000,
    0x3d70000000000000,
    0x3d60000000000000,
    0x3d50000000000000,
    0x3d40000000000000,
    0x3d30000000000000,
    0x3d20000000000000,
    0x3d10000000000000,
    0x3d00000000000000,
    0x3cf0000000000000,
    0x3ce0000000000000,
    0x3cd0000000000000,
    0x3cc0000000000000,
    0x3cb0000000000000,
    0x3ca0000000000000,
    0x3c90000000000000,
    0x3c80000000000000,
    0x3c70000000000000,
    0x3c60000000000000,
    0x3c50000000000000,
    0x3c40000000000000,
    0x3c30000000000000,
    0x3c20000000000000,
    0x3c10000000000000,
    0x3c00000000000000,
    0x3bf0000000000000,
    0x3be0000000000000,
    0x3bd0000000000000,
    0x3bc0000000000000,
    0x3bb0000000000000,
    0x3ba0000000000000,
    0x3b90000000000000,
    0x3b80000000000000,
    0x3b70000000000000,
    0x3b60000000000000,
    0x3b50000000000000,
    0x3b40000000000000,
    0x3b30000000000000,
    0x3b20000000000000,
    0x3b10000000000000,
    0x3b00000000000000,
    0x3af0000000000000,
    0x3ae0000000000000,
    0x3ad0000000000000,
    0x3ac0000000000000,
    0x3ab0000000000000,
    0x3aa0000000000000,
    0x3a90000000000000,
    0x3a80000000000000,
    0x3a70000000000000,
    0x3a60000000000000,
    0x3a50000000000000,
    0x3a40000000000000,
    0x3a30000000000000,
    0x3a20000000000000,
    0x3a10000000000000,
    0x3a00000000000000,
    0x39f0000000000000,
    0x39e0000000000000,
    0x39d0000000000000,
    0x39c0000000000000,
    0x39b0000000000000,
    0x39a0000000000000,
    0x3990000000000000,
    0x3980000000000000,
    0x3970000000000000,
    0x3960000000000000,
    0x3950000000000000,
    0x3940000000000000,
    0x3930000000000000,
    0x3920000000000000,
    0x3910000000000000,
    0x3900000000000000,
    0x38f0000000000000,
    0x38e0000000000000,
    0x38d0000000000000,
    0x38c0000000000000,
    0x38b0000000000000,
    0x38a0000000000000,
    0x3890000000000000,
    0x3880000000000000,
    0x3870000000000000,
    0x3860000000000000,
    0x3850000000000000,
    0x3840000000000000,
    0x3830000000000000,
    0x3820000000000000,
    0x3810000000000000,
    0x3800000000000000,
    0x37f0000000000000,
];

/* put in h+l an approximation of log2(1+zh+zl)
   for |zh| <= 1/64 + 2^-51.508, |zl| < 2^-58 and |zl| < ulp(zh).
   We have |h|, |h+l| < 2^-5.459, |l| < 2^-56.162,
   the relative error is bounded by 2^-91.196,
   and |l| < 2^-50.523 |h| (see analyze_p2() in compoundf.sage).
*/

/* degree-13 polynomial generated by Sollya which approximates
   log2(1+z) for |z| <= 1/64 with relative error < 2^-93.777
   (cf file p2.sollya)
*/
static LOG2P1_LOG2_POLY: [u64; 18] = [
    0x3ff71547652b82fe,
    0x3c7777d0ffda0d80,
    0xbfe71547652b82fe,
    0xbc6777d0fd20b49c,
    0x3fdec709dc3a03fd,
    0x3c7d27f05171b74a,
    0xbfd71547652b82fe,
    0xbc57814e70b828b0,
    0x3fd2776c50ef9bfe,
    0x3c7e4f63e12bff83,
    0xbfcec709dc3a03f4,
    0x3fca61762a7adecc,
    0xbfc71547652d8849,
    0x3fc484b13d7e7029,
    0xbfc2776c1b2a40fd,
    0x3fc0c9a80f9b7c1c,
    0xbfbecc6801121200,
    0x3fbc6e4b91fd10e5,
];

fn log2_poly2(z: DoubleDouble) -> DoubleDouble {
    /* since we can't expect a relative accuracy better than 2^-93.777,
    the lower part of the double-double approximation only needs to
    have about 94-53 = 41 accurate bits. Since |p7*z^7/p1| < 2^-44,
    we evaluate terms of degree 7 or more in double precision only. */
    let mut h = f64::from_bits(LOG2P1_LOG2_POLY[4 + 13]); // degree 13

    for i in 7..=12 {
        h = dd_fmla(z.hi, z.hi, f64::from_bits(LOG2P1_LOG2_POLY[4 + i]));
    }

    let mut v = DoubleDouble::quick_mult_f64(z, h);
    let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[10]));
    v.hi = t.hi;
    v.lo += t.lo;

    v = DoubleDouble::quick_mult(v, z);

    let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[8]));
    v.hi = t.hi;
    v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[9]);

    v = DoubleDouble::quick_mult(v, z);

    let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[6]));
    v.hi = t.hi;
    v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[7]);

    v = DoubleDouble::quick_mult(v, z);

    let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[4]));
    v.hi = t.hi;
    v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[5]);

    v = DoubleDouble::quick_mult(v, z);

    let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[2]));
    v.hi = t.hi;
    v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[3]);

    v = DoubleDouble::quick_mult(v, z);

    let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[0]));
    v.hi = t.hi;
    v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[1]);

    v = DoubleDouble::quick_mult(v, z);

    v
}

/* assuming -1 < x < 2^128, and x is representable in binary32,
put in h+l a double-double approximation of log2(1+x),
with relative error bounded by 2^-91.123, and |l| < 2^-48.574 |h|
(see analyze_log2p1_accurate() in compoundf.sage) */
pub(crate) fn compoundf_log2p1_accurate(x: f64) -> DoubleDouble {
    let mut v_dd = if 1.0 >= x {
        // then 1.0 >= |x| since x > -1
        if (x as f32).abs() >= f32::from_bits(0x25000000) {
            DoubleDouble::from_exact_add(1.0, x)
        } else {
            DoubleDouble::new(x, 1.0)
        }
    } else {
        // fast_two_sum() is exact when |x| < 2^54 by Lemma 1 condition (ii) of [1]
        DoubleDouble::from_exact_add(x, 1.0)
    };

    // now h + l = 1 + x + eps with |eps| <= 2^-105 |h| and |l| <= ulp(h)
    let mut v = v_dd.hi.to_bits();
    let m = v & 0xfffffffffffffu64;
    let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;

    let scale = f64::from_bits(LOG2P1_SCALE[e.wrapping_add(29) as usize]);
    v_dd.hi *= scale;
    v_dd.lo *= scale;

    // now |h| < sqrt(2) and |l| <= ulp(h) <= 2^-52

    // now 1 + x ~ 2^e * (h + l) thus log2(1+x) ~ e + log2(h+l)

    v = (2.0 + v_dd.hi).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
    let i: i32 = (v >> 45) as i32 - 0x2002d; // h is near 1/2+(i+13)/64
    let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
    let mut z_dd = DoubleDouble::new(r * v_dd.lo, dd_fmla(r, v_dd.hi, -1.0)); // exact, -1/64 <= zh <= 1/64
    // since |r| <= 0x1.68p+0 and |l| <= 2^-52, |zl| <= 2^-51.508
    // zh + zl = r*(h+l)-1
    // log2(h+l) = -log2(r) + log2(r*(h+l)) = -log2(r) + log2(1+zh+zl)
    z_dd = DoubleDouble::from_exact_add(z_dd.hi, z_dd.lo);
    // now |zh| <= 1/64 + 2^-51.508 and |zl| < 2^-58
    /* the relative error of fast_two_sum() is bounded by 2^-105,
    this amplified the relative error on p2() as follows:
    (1+2^-91.196)*(1+2^-105)-1 < 2^-91.195. */

    // now |zh| <= 1/64 + 2^-51.508 and |zl| < 2^-58
    /* the relative error of fast_two_sum() is bounded by 2^-105,
    this amplified the relative error on p2() as follows:
    (1+2^-91.196)*(1+2^-105)-1 < 2^-91.195. */
    let log_p = log2_poly2(z_dd);
    // ph + pl approximates log2(1+zh+zl) with relative error < 2^-93.471

    /* since |log2inv[i][0]| < 1 and e is integer, the precondition of
    fast_two_sum is fulfilled: either |e| >= 1, or e=0 and fast_two_sum
    is exact */
    let log2_inv = LOG2P1_COMPOUNDF_LOG2_INV[i as usize];
    v_dd = DoubleDouble::from_exact_add(e as f64, f64::from_bits(log2_inv.1));
    v_dd.lo += f64::from_bits(log2_inv.0);
    let mut p = DoubleDouble::from_exact_add(v_dd.hi, log_p.hi);
    p.lo += v_dd.lo + log_p.lo;
    p
}

pub(crate) fn compoundf_exp2_poly2(z: DoubleDouble) -> DoubleDouble {
    /* Q2 is a degree-8 polynomial generated by Sollya (cf q2.sollya)
    with absolute error < 2^-85.218 */

    static Q2: [u64; 12] = [
        0x3ff0000000000000,
        0x3fe62e42fefa39ef,
        0x3c7abc9d45534d06,
        0x3fcebfbdff82c58f,
        0xbc65e4383cf9ddf7,
        0x3fac6b08d704a0c0,
        0xbc46cbc55586c8f1,
        0x3f83b2ab6fba4e77,
        0x3f55d87fe789aec5,
        0x3f2430912f879daa,
        0x3eeffcc774b2367a,
        0x3eb62c017b9bdfe6,
    ];
    let h2 = z.hi * z.hi;
    let c7 = dd_fmla(f64::from_bits(Q2[11]), z.hi, f64::from_bits(Q2[10]));
    let mut c5 = dd_fmla(f64::from_bits(Q2[9]), z.hi, f64::from_bits(Q2[8]));
    c5 = dd_fmla(c7, h2, c5);
    // since ulp(c5*h^5) <= 2^-86, we still compute c5*z as double
    let z_vqh = c5 * z.hi;
    let mut q = DoubleDouble::from_exact_add(f64::from_bits(Q2[7]), z_vqh);
    // multiply by z
    q = DoubleDouble::quick_mult(q, z);
    // add coefficient of degree 3
    let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[5]), q.hi);
    q.hi = t.hi;
    q.lo += t.lo + f64::from_bits(Q2[6]);
    // multiply by z and add coefficient of degree 2
    q = DoubleDouble::quick_mult(q, z);
    let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[3]), q.hi);
    q.hi = t.hi;
    q.lo += t.lo + f64::from_bits(Q2[4]);
    // multiply by h+l and add coefficient of degree 1
    q = DoubleDouble::quick_mult(q, z);
    let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[1]), q.hi);
    q.hi = t.hi;
    q.lo += t.lo + f64::from_bits(Q2[2]);
    // multiply by h+l and add coefficient of degree 0
    q = DoubleDouble::quick_mult(q, z);
    let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[0]), q.hi);
    q.hi = t.hi;
    q.lo += t.lo;
    q
}

/* return the correct rounding of (1+x)^y or -1 if the rounding test failed,
where t is an approximation of y*log2(1+x).
We assume |h+l| < 150, |l/h| < 2^-48.445 |h|,
and the relative error between h+l and y*log2(1+x) is < 2^-91.120.
x and y are the original inputs of compound. */
fn compoundf_exp2_accurate(x_dd: DoubleDouble, x: f32, y: f32) -> f32 {
    if y == 1.0 {
        let res = 1.0 + x;
        return res;
    }
    let k = x_dd.hi.cpu_round_ties_even(); // |k| <= 150

    // check easy cases h+l is tiny thus 2^(h+l) rounds to 1, 1- or 1+
    if k == 0. && x_dd.hi.abs() <= f64::from_bits(0x3e6715476af0d4c8) {
        /* the relative error between h and y*log2(1+x) is bounded by
        (1 + 2^-48.445) * (1 + 2^-91.120) - 1 < 2^-48.444.
        2^h rounds to 1 to nearest for |h| <= H0 := 0x1.715476af0d4d9p-25.
        The above threshold is such that h*(1+2^-48.444) < H0. */
        return (1.0 + x_dd.hi * 0.5) as f32;
    }

    let r = x_dd.hi - k; // |r| <= 1/2, exact
    // since r is an integer multiple of ulp(h), fast_two_sum() below is exact
    let mut v_dd = DoubleDouble::from_exact_add(r, x_dd.lo);
    let mut v = (3.015625 + v_dd.hi).to_bits(); // 2.5 <= v <= 3.5015625
    // we add 2^-6 so that i is rounded to nearest
    let i: i32 = ((v >> 46) as i32).wrapping_sub(0x10010); // 0 <= i <= 32
    // h is near (i-16)/2^5
    v_dd.hi -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact

    // now |h| <= 2^-6
    // 2^(h+l) = 2^k * exp2_U[i] * 2^(h+l)
    v_dd = DoubleDouble::from_exact_add(v_dd.hi, v_dd.lo);
    let q = compoundf_exp2_poly2(v_dd);

    /* we have 0.989 < qh < 1.011, |ql| < 2^-51.959, and
    |qh + ql - 2^(h+l)| < 2^-85.210 */
    let exp2u = DoubleDouble::from_bit_pair(COMPOUNDF_EXP2_U[i as usize]);
    let mut q = DoubleDouble::quick_mult(exp2u, q);
    q = DoubleDouble::from_exact_add(q.hi, q.lo);
    /* Total error:
    * at input we have a relative error between h+l and y*log2(1+x) bounded
      by 2^-91.120: h + l = y*log2(1+x) * (1 + eps1) with |eps1| < 2^-91.120.
      Since |h+l| <= 150, this yields an absolute error bounded
      by 150*2^-91.120 < 2^-83.891:
      h + l = y*log2(1+x) + eps2 with |eps2| <= 150*2^-91.120 < 2^-83.891.
    * the absolute error in q2() is bounded by 2^-85.210
      and is multiplied by exp2_U[i] < 1.415
    * the absolute d_mul() error is bounded by 2^-102.199
    * the fast_two_sum() error is bounded by 2^-105
    All this yields an absolute error on qh+ql bounded by:
      2^-83.891 + 2^-85.210*1.415 + 2^-102.199 + 2^-105 < 2^-83.242.

    We distinguish the "small" case when at input |h+l| <= 2^-9.
    In this case k=0, i=16, thus exp2_T[i]=0, exp2_U[i]=1,
    and absolute error in q2() is bounded by 2^-102.646,
    and remains unchanged since the d_mul() call does not change qh, ql.
    */

    /* Rounding test: since |ql| < ulp(qh), and the error is less than ulp(qh),
    the rounding test can fail only when the last 53-25 = 28 bits of qh
    represent a signed number in [-1,1] (when it is -2 or 2, adding ql and
    the error cannot cross a rounding boundary). */
    let mut w = q.hi.to_bits();
    if ((w.wrapping_add(1)) & 0xfffffffu64) <= 2 {
        static ERR: [u64; 2] = [0x3abb100000000000, 0x3a2d800000000000];
        let small: bool = k == 0. && i == 16 && x_dd.hi <= f64::from_bits(0x3f60000000000000);
        let err = f64::from_bits(ERR[small as usize]);

        w = (q.hi + (q.lo + err)).to_bits();
        w = unsafe { w.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) };
    }

    /* multiply qh+ql by 2^k: since 0.989 < qh_in < 1.011 and
    0.707 < exp2_U[i] < 1.415, we have 0.69 < qh+ql < 1.44 */
    v = (q.hi + q.lo).to_bits();
    /* For RNDN, if qh fits exactly in 25 bits, and ql is tiny, so that
    qh + ql rounds to qh, then we might have a double-rounding issue. */
    if (w.wrapping_shl(36)) == 0 && f64::from_bits(v) == q.hi && q.lo != 0. {
        v = v.wrapping_add((if q.lo > 0. { 1i64 } else { -1i64 }) as u64); // simulate round to odd
    }
    v = unsafe { v.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) };
    // there is no underflow/overflow in the scaling by 2^k since |k| <= 150
    f64::from_bits(v) as f32
}

// at input, exact is non-zero iff (1+x)^y is exact
// x,y=0x1.0f6f1ap+1,0x1.c643bp+5: 49 identical bits after round bit
// x,y=0x1.ef272cp+15,-0x1.746ab2p+1: 55 identical bits after round bit
// x,y=0x1.07ffcp+0,-0x1.921a8ap+4: 47 identical bits after round bit
#[cold]
#[inline(never)]
fn compoundf_accurate(x: f32, y: f32) -> f32 {
    let mut v = compoundf_log2p1_accurate(x as f64);
    /* h + l is a double-double approximation of log(1+x),
    with relative error bounded by 2^-91.123,
    and |l| < 2^-48.574 |h| */
    v = DoubleDouble::quick_mult_f64(v, y as f64);
    /* h + l is a double-double approximation of y*log(1+x).
    Since 2^-149 <= |h_in+l_in| < 128 and 2^-149 <= |y| < 2^128, we have
    2^-298 <= |h+l| < 2^135, thus no underflow/overflow in double is possible.
    The s_mul() error is bounded by ulp(l). Since |l_in| < 2^-48.574 |h_in|,
    and the intermediate variable lo in s_mul() satisfies |lo| < ulp(h),
    we have |l| < ulp(h) + |y l_in| <= ulp(h) + 2^-48.574 |y h_in|
    < (2^-52 + 2^-48.574) |h| < 2^-48.445 |h|. The s_mul() error is thus
    bounded by 2^-48.445*2^-52 = 2^-100.445 |h|. This yields a total relative
    error bounded by (1+2^-91.123)*(1+2^-100.445)-1 < 2^-91.120. */
    compoundf_exp2_accurate(v, x, y)
}

/// Computes compound function (1.0 + x)^y
///
/// Max ULP 0.5
#[inline]
pub fn f_compoundf(x: f32, y: f32) -> f32 {
    /* Rules from IEEE 754-2019 for compound (x, n) with n integer:
       (a) compound (x, 0) is 1 for x >= -1 or quiet NaN
       (b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
       (c) compound (-1, n) is +0 for n > 0
       (d) compound (+/-0, n) is 1
       (e) compound (+Inf, n) is +Inf for n > 0
       (f) compound (+Inf, n) is +0 for n < 0
       (g) compound (x, n) is qNaN and signals the invalid exception for x < -1
       (h) compound (qNaN, n) is qNaN for n <> 0.
    */
    let mone = (-1.0f32).to_bits();
    let nx = x.to_bits();
    let ny = y.to_bits();
    if nx >= mone {
        return as_compoundf_special(x, y);
    } // x <= -1 
    // now x > -1

    let ax: u32 = nx.wrapping_shl(1);
    let ay: u32 = ny.wrapping_shl(1);
    if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
        return as_compoundf_special(x, y);
    } // x=+-0 || x=+-inf/nan || y=+-0 || y=+-inf/nan

    // evaluate (1+x)^y explicitly for integer y in [-16,16] range and |x|<2^64
    if is_integerf(y) && ay <= 0x83000000u32 && ax <= 0xbefffffeu32 {
        if ax <= 0x62000000u32 {
            return 1.0 + y * x;
        } // does it work for |x|<2^-29 and |y|<=16?
        let mut s = x as f64 + 1.;
        let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };

        // exponentiation by squaring: O(log(y)) complexity
        let mut acc = if iter_count % 2 != 0 { s } else { 1. };

        while {
            iter_count >>= 1;
            iter_count
        } != 0
        {
            s = s * s;
            if iter_count % 2 != 0 {
                acc *= s;
            }
        }

        let dz = if y.is_sign_negative() { 1. / acc } else { acc };
        return dz as f32;
    }

    let xd = x as f64;
    let yd = y as f64;
    let tx = xd.to_bits();
    let ty = yd.to_bits();

    let l: f64 = compoundf_log2p1_fast(f64::from_bits(tx));

    /* l approximates log2(1+x) with relative error < 2^-47.997,
    and 2^-149 <= |l| < 128 */

    let t: u64 = (l * f64::from_bits(ty)).to_bits();
    /* since 2^-149 <= |l| < 128 and 2^-149 <= |y| < 2^128, we have
    2^-298 <= |t| < 2^135, thus no underflow/overflow in double is possible.
    The relative error is bounded by (1+2^-47.997)*(1+2^-52)-1 < 2^-47.909 */

    // detect overflow/underflow
    if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
        // |t| >= 128
        if t >= 0x3018bu64 << 46 {
            // t <= -150
            return black_box(f32::from_bits(0x00800000)) * black_box(f32::from_bits(0x00800000));
        } else if (t >> 63) == 0 {
            // t >= 128: overflow
            return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
        }
    }

    /* since |t| < 150, the absolute error on t is bounded by
    150*2^-47.909 < 2^-40.680 */

    // 2^t rounds to 1 to nearest when |t| <= 0x1.715476ba97f14p-25
    if (t.wrapping_shl(1)) <= 0x3e6715476ba97f14u64 {
        return if (t >> 63) != 0 {
            black_box(1.0) - black_box(f32::from_bits(0x33000000))
        } else {
            black_box(1.0) + black_box(f32::from_bits(0x33000000))
        };
    }

    let res = exp2_fast(f64::from_bits(t));
    if res != -1.0 {
        return res as f32;
    }
    compoundf_accurate(x, y)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_compoundf() {
        assert_eq!(
            f_compoundf(
                0.000000000000000000000000000000000000011754944,
                -170502050000000000000000000000000000000.
            ),
            1.
        );
        assert_eq!(f_compoundf(1.235, 1.432), 3.1634824);
        assert_eq!(f_compoundf(2., 3.0), 27.);
        assert!(f_compoundf(-2., 5.0).is_nan());
        assert_eq!(f_compoundf(1., f32::INFINITY), f32::INFINITY);
        assert_eq!(f_compoundf(1., f32::NEG_INFINITY), 0.0);
    }
}

Coverage Report

Created: 2025-12-05 07:37