/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/logs/log1pmx.rs

Source
/*
 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::bits::{EXP_MASK, get_exponent_f64};
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::logs::log1p::{LOG_R1_DD, R1};
use crate::logs::log1p_dd;
use crate::polyeval::{f_estrin_polyeval8, f_polyeval4, f_polyeval5};

#[cold]
fn tiny_hard(x: f64) -> f64 {
    // d = [-2^-10; 2^-10];
    // f_log1pf = (log(1+x) - x)/x^2;
    // Q = fpminimax(f_log1pf, 7, [|0, 107...|], d);
    // See ./notes/log1pmx_at_zero_hard.sollya

    let x2 = DoubleDouble::from_exact_mult(x, x);

    const C: [(u64, u64); 7] = [
        (0x3c755555555129de, 0x3fd5555555555555),
        (0xbbd333352fe28400, 0xbfd0000000000000),
        (0xbc698cb3ef1ea2dd, 0x3fc999999999999a),
        (0x3c4269700b3f95d0, 0xbfc55555555546ef),
        (0x3c61290e9261823e, 0x3fc2492492491565),
        (0x3c6fb0a243c2a59c, 0xbfc000018af8cb7e),
        (0x3c3b271ceb5c60a0, 0x3fbc71ca10b30518),
    ];

    let mut r = DoubleDouble::mul_f64_add(
        DoubleDouble::from_bit_pair(C[6]),
        x,
        DoubleDouble::from_bit_pair(C[5]),
    );
    r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[4]));
    r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[3]));
    r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[2]));
    r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[1]));
    r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[0]));
    r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000));
    r = DoubleDouble::quick_mult(r, x2);
    r.to_f64()
}

fn log1pmx_big(x: f64) -> f64 {
    let mut x_u = x.to_bits();

    let mut x_dd = DoubleDouble::default();

    let x_exp: u16 = ((x_u >> 52) & 0x7ff) as u16;

    const EXP_BIAS: u16 = (1u16 << (11 - 1u16)) - 1u16;

    if x_exp >= EXP_BIAS {
        // |x| >= 1
        if x_u >= 0x4650_0000_0000_0000u64 {
            x_dd.hi = x;
        } else {
            x_dd = DoubleDouble::from_exact_add(x, 1.0);
        }
    } else {
        // |x| < 1
        x_dd = DoubleDouble::from_exact_add(1.0, x);
    }

    // At this point, x_dd is the exact sum of 1 + x:
    //   x_dd.hi + x_dd.lo = x + 1.0 exactly.
    //   |x_dd.hi| >= 2^-54
    //   |x_dd.lo| < ulp(x_dd.hi)

    let xhi_bits = x_dd.hi.to_bits();
    let xhi_frac = xhi_bits & ((1u64 << 52) - 1);
    x_u = xhi_bits;
    // Range reduction:
    // Find k such that |x_hi - k * 2^-7| <= 2^-8.
    let idx: i32 = ((xhi_frac.wrapping_add(1u64 << (52 - 8))) >> (52 - 7)) as i32;
    let x_e = (get_exponent_f64(f64::from_bits(xhi_bits)) as i32).wrapping_add(idx >> 7);
    let e_x = x_e as f64;

    const LOG_2: DoubleDouble = DoubleDouble::new(
        f64::from_bits(0x3d2ef35793c76730),
        f64::from_bits(0x3fe62e42fefa3800),
    );

    // hi is exact
    // ulp(hi) = ulp(LOG_2_HI) = ulp(LOG_R1_DD[idx].hi) = 2^-43

    let r_dd = LOG_R1_DD[idx as usize];

    let hi = f_fmla(e_x, LOG_2.hi, f64::from_bits(r_dd.1));
    // lo errors < |e_x| * ulp(LOG_2_LO) + ulp(LOG_R1[idx].lo)
    //           <= 2^11 * 2^(-43-53) = 2^-85
    let lo = f_fmla(e_x, LOG_2.lo, f64::from_bits(r_dd.0));

    // Scale x_dd by 2^(-xh_bits.get_exponent()).
    let s_u: i64 = (x_u & EXP_MASK) as i64 - (EXP_BIAS as i64).wrapping_shl(52);
    // Normalize arguments:
    //   1 <= m_dd.hi < 2
    //   |m_dd.lo| < 2^-52.
    // This is exact.
    let m_hi = 1f64.to_bits() | xhi_frac;

    let m_lo = if x_dd.lo.abs() > x_dd.hi * f64::from_bits(0x3800000000000000) {
        (x_dd.lo.to_bits() as i64).wrapping_sub(s_u)
    } else {
        0
    };

    let m_dd = DoubleDouble::new(f64::from_bits(m_lo as u64), f64::from_bits(m_hi));

    // Perform range reduction:
    //   r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1
    //             = (r * m_dd.hi - 1) + r * m_dd.lo
    //             = v_hi + (v_lo.hi + v_lo.lo)
    // where:
    //   v_hi = r * m_dd.hi - 1          (exact)
    //   v_lo.hi + v_lo.lo = r * m_dd.lo (exact)
    // Bounds on the values:
    //   -0x1.69000000000edp-8 < r * m - 1 < 0x1.7f00000000081p-8
    //   |v_lo.hi| <= |r| * |m_dd.lo| < 2^-52
    //   |v_lo.lo| < ulp(v_lo.hi) <= 2^(-52 - 53) = 2^(-105)
    let r = R1[idx as usize];
    let v_hi;
    let v_lo = DoubleDouble::from_exact_mult(m_dd.lo, f64::from_bits(r));

    #[cfg(any(
        all(
            any(target_arch = "x86", target_arch = "x86_64"),
            target_feature = "fma"
        ),
        target_arch = "aarch64"
    ))]
    {
        v_hi = f_fmla(f64::from_bits(r), m_dd.hi, -1.0); // Exact.
    }

    #[cfg(not(any(
        all(
            any(target_arch = "x86", target_arch = "x86_64"),
            target_feature = "fma"
        ),
        target_arch = "aarch64"
    )))]
    {
        use crate::logs::log1p::RCM1;
        let c = f64::from_bits(
            (idx as u64)
                .wrapping_shl(52 - 7)
                .wrapping_add(0x3FF0_0000_0000_0000u64),
        );
        v_hi = f_fmla(
            f64::from_bits(r),
            m_dd.hi - c,
            f64::from_bits(RCM1[idx as usize]),
        ); // Exact
    }

    // Range reduction output:
    //   -0x1.69000000000edp-8 < v_hi + v_lo < 0x1.7f00000000081p-8
    //   |v_dd.lo| < ulp(v_dd.hi) <= 2^(-7 - 53) = 2^-60
    let mut v_dd = DoubleDouble::from_exact_add(v_hi, v_lo.hi);
    v_dd.lo += v_lo.lo;

    // Exact sum:
    //   r1.hi + r1.lo = e_x * log(2)_hi - log(r)_hi + u
    let mut r1 = DoubleDouble::from_exact_add(hi, v_dd.hi);

    // Degree-7 minimax polynomial log(1 + v) ~ v - v^2 / 2 + ...
    // generated by Sollya with:
    // > P = fpminimax(log(1 + x)/x, 6, [|1, 1, D...|],
    //                 [-0x1.69000000000edp-8, 0x1.7f00000000081p-8]);
    const P_COEFFS: [u64; 6] = [
        0xbfe0000000000000,
        0x3fd5555555555166,
        0xbfcfffffffdb7746,
        0x3fc99999a8718a60,
        0xbfc555874ce8ce22,
        0x3fc24335555ddbe5,
    ];

    //   C * ulp(v_sq) + err_hi
    let v_sq = v_dd.hi * v_dd.hi;
    let p0 = f_fmla(
        v_dd.hi,
        f64::from_bits(P_COEFFS[1]),
        f64::from_bits(P_COEFFS[0]),
    );
    let p1 = f_fmla(
        v_dd.hi,
        f64::from_bits(P_COEFFS[3]),
        f64::from_bits(P_COEFFS[2]),
    );
    let p2 = f_fmla(
        v_dd.hi,
        f64::from_bits(P_COEFFS[5]),
        f64::from_bits(P_COEFFS[4]),
    );
    let p = f_polyeval4(v_sq, (v_dd.lo + r1.lo) + lo, p0, p1, p2);

    const ERR_HI: [f64; 2] = [f64::from_bits(0x3aa0000000000000), 0.0];
    let err_hi = ERR_HI[if hi == 0.0 { 1 } else { 0 }];

    let err = f_fmla(v_sq, f64::from_bits(0x3ce0000000000000), err_hi);

    r1.lo = p;
    r1 = DoubleDouble::from_exact_add(r1.hi, r1.lo);
    r1 = DoubleDouble::full_add_f64(r1, -x);

    let left = r1.hi + (r1.lo - err);
    let right = r1.hi + (r1.lo + err);
    // Ziv's test to see if fast pass is accurate enough.
    if left == right {
        return left;
    }
    log1pmx_accurate_dd(x)
}

#[cold]
fn log1pmx_accurate_dd(x: f64) -> f64 {
    let r = log1p_dd(x);
    DoubleDouble::full_add_f64(r, -x).to_f64()
}

/// Computes log(1+x) - x
///
/// ulp 0.5
pub fn f_log1pmx(x: f64) -> f64 {
    let ax = x.to_bits() & 0x7fff_ffff_ffff_ffffu64;

    let x_e = (x.to_bits() >> 52) & 0x7ff;

    if !x.is_normal() {
        if x.is_nan() {
            return x + x;
        }
        if x.is_infinite() {
            return f64::NAN;
        }
        if x == 0. {
            return x;
        }
    }

    if ax > 0x3f90000000000000u64 {
        // |x| > 2^-6
        if x <= -1. {
            if x == -1. {
                return f64::NEG_INFINITY;
            }
            return f64::NAN;
        }
        return log1pmx_big(x);
    }

    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;

    // log(1+x) is expected to be used near zero

    if x_e < E_BIAS - 10 {
        if x_e < E_BIAS - 100 {
            // |x| < 2^-100
            // taylor series log(1+x) - x ~ -x^2/2 + x^3/3
            let x2 = x * x;
            let dx2 = f_fmla(x, x, -x2);
            let rl = dx2 * -0.5;
            return f_fmla(x2, -0.5, rl);
        }

        // Polynomial generated by Sollya in form log(1+x) - x = x^2 * R(x):
        // d = [-2^-10; 2^-10];
        // f_log1pf = (log(1+x) - x)/x^2;
        // Q = fpminimax(f_log1pf, 7, [|0, 107, 107, D...|], d);
        // See ./notes/log1pmx_at_zero.sollya

        let p = f_polyeval5(
            x,
            f64::from_bits(0x3fc999999999999a),
            f64::from_bits(0xbfc55555555546ef),
            f64::from_bits(0x3fc24924923d3abf),
            f64::from_bits(0xbfc000018af7f637),
            f64::from_bits(0x3fbc72984db24b6a),
        );

        let x2 = DoubleDouble::from_exact_mult(x, x);

        let mut r = DoubleDouble::f64_mul_f64_add(
            x,
            p,
            DoubleDouble::from_bit_pair((0xbbd3330899095800, 0xbfd0000000000000)),
        );
        r = DoubleDouble::mul_f64_add(
            r,
            x,
            DoubleDouble::from_bit_pair((0x3c75555538509407, 0x3fd5555555555555)),
        );
        r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000));
        r = DoubleDouble::quick_mult(r, x2);
        const ERR: f64 = f64::from_bits(0x3af0000000000000); // 2^-80
        let ub = r.hi + (r.lo + ERR);
        let lb = r.hi + (r.lo - ERR);
        if lb == ub {
            return r.to_f64();
        }
        return tiny_hard(x);
    }

    // Polynomial generated by Sollya in form log(1+x) - x = x^2 * R(x):
    // d = [-2^-6; 2^-6];
    // f_log1pf = (log(1+x) - x)/x^2;
    // Q = fpminimax(f_log1pf, 10, [|0, 107, 107, D...|], d);
    // See ./notes/log1pmx.sollya

    let p = f_estrin_polyeval8(
        x,
        f64::from_bits(0x3fc9999999999997),
        f64::from_bits(0xbfc5555555555552),
        f64::from_bits(0x3fc249249249fb64),
        f64::from_bits(0xbfc000000000f450),
        f64::from_bits(0x3fbc71c6e2591149),
        f64::from_bits(0xbfb999995cf14d86),
        f64::from_bits(0x3fb7494eb6c2c544),
        f64::from_bits(0xbfb558bf05690e85),
    );

    let x2 = DoubleDouble::from_exact_mult(x, x);

    let mut r = DoubleDouble::f64_mul_f64_add(
        x,
        p,
        DoubleDouble::from_bit_pair((0xbb9d89dc15a38000, 0xbfd0000000000000)),
    );
    r = DoubleDouble::mul_f64_add(
        r,
        x,
        DoubleDouble::from_bit_pair((0x3c7555a15a94e505, 0x3fd5555555555555)),
    );
    r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000));
    r = DoubleDouble::quick_mult(r, x2);

    r.to_f64()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_log1pmx() {
        assert_eq!(
            f_log1pmx(0.00000000000000005076835735015165),
            -0.0000000000000000000000000000000012887130540163486
        );
        assert_eq!(f_log1pmx(5.), -3.208240530771945);
        assert_eq!(f_log1pmx(-0.99), -3.6151701859880907);
        assert_eq!(f_log1pmx(-1.), f64::NEG_INFINITY);
        assert_eq!(
            f_log1pmx(1.0000000000008708e-13),
            -0.0000000000000000000000000050000000000083744
        );
        assert_eq!(
            f_log1pmx(1.0000000000008708e-26),
            -0.00000000000000000000000000000000000000000000000000005000000000008707
        );
        assert_eq!(f_log1pmx(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012890176556069385),
                   -0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000830783258233204);
    }
}

Coverage Report

Created: 2025-11-24 07:30

Line	Count	Source
1		/*
2		* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3		* //
4		* // Redistribution and use in source and binary forms, with or without modification,
5		* // are permitted provided that the following conditions are met:
6		* //
7		* // 1. Redistributions of source code must retain the above copyright notice, this
8		* // list of conditions and the following disclaimer.
9		* //
10		* // 2. Redistributions in binary form must reproduce the above copyright notice,
11		* // this list of conditions and the following disclaimer in the documentation
12		* // and/or other materials provided with the distribution.
13		* //
14		* // 3. Neither the name of the copyright holder nor the names of its
15		* // contributors may be used to endorse or promote products derived from
16		* // this software without specific prior written permission.
17		* //
18		* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19		* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20		* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21		* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22		* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23		* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24		* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25		* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26		* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27		* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28		*/
29		use crate::bits::{EXP_MASK, get_exponent_f64};
30		use crate::common::f_fmla;
31		use crate::double_double::DoubleDouble;
32		use crate::logs::log1p::{LOG_R1_DD, R1};
33		use crate::logs::log1p_dd;
34		use crate::polyeval::{f_estrin_polyeval8, f_polyeval4, f_polyeval5};
35
36		#[cold]
37	0	fn tiny_hard(x: f64) -> f64 {
38		// d = [-2^-10; 2^-10];
39		// f_log1pf = (log(1+x) - x)/x^2;
40		// Q = fpminimax(f_log1pf, 7, [\|0, 107...\|], d);
41		// See ./notes/log1pmx_at_zero_hard.sollya
42
43	0	let x2 = DoubleDouble::from_exact_mult(x, x);
44
45		const C: [(u64, u64); 7] = [
46		(0x3c755555555129de, 0x3fd5555555555555),
47		(0xbbd333352fe28400, 0xbfd0000000000000),
48		(0xbc698cb3ef1ea2dd, 0x3fc999999999999a),
49		(0x3c4269700b3f95d0, 0xbfc55555555546ef),
50		(0x3c61290e9261823e, 0x3fc2492492491565),
51		(0x3c6fb0a243c2a59c, 0xbfc000018af8cb7e),
52		(0x3c3b271ceb5c60a0, 0x3fbc71ca10b30518),
53		];
54
55	0	let mut r = DoubleDouble::mul_f64_add(
56	0	DoubleDouble::from_bit_pair(C[6]),
57	0	x,
58	0	DoubleDouble::from_bit_pair(C[5]),
59		);
60	0	r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[4]));
61	0	r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[3]));
62	0	r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[2]));
63	0	r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[1]));
64	0	r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[0]));
65	0	r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000));
66	0	r = DoubleDouble::quick_mult(r, x2);
67	0	r.to_f64()
68	0	}
69
70	0	fn log1pmx_big(x: f64) -> f64 {
71	0	let mut x_u = x.to_bits();
72
73	0	let mut x_dd = DoubleDouble::default();
74
75	0	let x_exp: u16 = ((x_u >> 52) & 0x7ff) as u16;
76
77		const EXP_BIAS: u16 = (1u16 << (11 - 1u16)) - 1u16;
78
79	0	if x_exp >= EXP_BIAS {
80		// \|x\| >= 1
81	0	if x_u >= 0x4650_0000_0000_0000u64 {
82	0	x_dd.hi = x;
83	0	} else {
84	0	x_dd = DoubleDouble::from_exact_add(x, 1.0);
85	0	}
86	0	} else {
87	0	// \|x\| < 1
88	0	x_dd = DoubleDouble::from_exact_add(1.0, x);
89	0	}
90
91		// At this point, x_dd is the exact sum of 1 + x:
92		// x_dd.hi + x_dd.lo = x + 1.0 exactly.
93		// \|x_dd.hi\| >= 2^-54
94		// \|x_dd.lo\| < ulp(x_dd.hi)
95
96	0	let xhi_bits = x_dd.hi.to_bits();
97	0	let xhi_frac = xhi_bits & ((1u64 << 52) - 1);
98	0	x_u = xhi_bits;
99		// Range reduction:
100		// Find k such that \|x_hi - k * 2^-7\| <= 2^-8.
101	0	let idx: i32 = ((xhi_frac.wrapping_add(1u64 << (52 - 8))) >> (52 - 7)) as i32;
102	0	let x_e = (get_exponent_f64(f64::from_bits(xhi_bits)) as i32).wrapping_add(idx >> 7);
103	0	let e_x = x_e as f64;
104
105		const LOG_2: DoubleDouble = DoubleDouble::new(
106		f64::from_bits(0x3d2ef35793c76730),
107		f64::from_bits(0x3fe62e42fefa3800),
108		);
109
110		// hi is exact
111		// ulp(hi) = ulp(LOG_2_HI) = ulp(LOG_R1_DD[idx].hi) = 2^-43
112
113	0	let r_dd = LOG_R1_DD[idx as usize];
114
115	0	let hi = f_fmla(e_x, LOG_2.hi, f64::from_bits(r_dd.1));
116		// lo errors < \|e_x\| * ulp(LOG_2_LO) + ulp(LOG_R1[idx].lo)
117		// <= 2^11 * 2^(-43-53) = 2^-85
118	0	let lo = f_fmla(e_x, LOG_2.lo, f64::from_bits(r_dd.0));
119
120		// Scale x_dd by 2^(-xh_bits.get_exponent()).
121	0	let s_u: i64 = (x_u & EXP_MASK) as i64 - (EXP_BIAS as i64).wrapping_shl(52);
122		// Normalize arguments:
123		// 1 <= m_dd.hi < 2
124		// \|m_dd.lo\| < 2^-52.
125		// This is exact.
126	0	let m_hi = 1f64.to_bits() \| xhi_frac;
127
128	0	let m_lo = if x_dd.lo.abs() > x_dd.hi * f64::from_bits(0x3800000000000000) {
129	0	(x_dd.lo.to_bits() as i64).wrapping_sub(s_u)
130		} else {
131	0	0
132		};
133
134	0	let m_dd = DoubleDouble::new(f64::from_bits(m_lo as u64), f64::from_bits(m_hi));
135
136		// Perform range reduction:
137		// r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1
138		// = (r * m_dd.hi - 1) + r * m_dd.lo
139		// = v_hi + (v_lo.hi + v_lo.lo)
140		// where:
141		// v_hi = r * m_dd.hi - 1 (exact)
142		// v_lo.hi + v_lo.lo = r * m_dd.lo (exact)
143		// Bounds on the values:
144		// -0x1.69000000000edp-8 < r * m - 1 < 0x1.7f00000000081p-8
145		// \|v_lo.hi\| <= \|r\| * \|m_dd.lo\| < 2^-52
146		// \|v_lo.lo\| < ulp(v_lo.hi) <= 2^(-52 - 53) = 2^(-105)
147	0	let r = R1[idx as usize];
148		let v_hi;
149	0	let v_lo = DoubleDouble::from_exact_mult(m_dd.lo, f64::from_bits(r));
150
151		#[cfg(any(
152		all(
153		any(target_arch = "x86", target_arch = "x86_64"),
154		target_feature = "fma"
155		),
156		target_arch = "aarch64"
157		))]
158		{
159		v_hi = f_fmla(f64::from_bits(r), m_dd.hi, -1.0); // Exact.
160		}
161
162		#[cfg(not(any(
163		all(
164		any(target_arch = "x86", target_arch = "x86_64"),
165		target_feature = "fma"
166		),
167		target_arch = "aarch64"
168		)))]
169		{
170		use crate::logs::log1p::RCM1;
171	0	let c = f64::from_bits(
172	0	(idx as u64)
173	0	.wrapping_shl(52 - 7)
174	0	.wrapping_add(0x3FF0_0000_0000_0000u64),
175		);
176	0	v_hi = f_fmla(
177	0	f64::from_bits(r),
178	0	m_dd.hi - c,
179	0	f64::from_bits(RCM1[idx as usize]),
180	0	); // Exact
181		}
182
183		// Range reduction output:
184		// -0x1.69000000000edp-8 < v_hi + v_lo < 0x1.7f00000000081p-8
185		// \|v_dd.lo\| < ulp(v_dd.hi) <= 2^(-7 - 53) = 2^-60
186	0	let mut v_dd = DoubleDouble::from_exact_add(v_hi, v_lo.hi);
187	0	v_dd.lo += v_lo.lo;
188
189		// Exact sum:
190		// r1.hi + r1.lo = e_x * log(2)_hi - log(r)_hi + u
191	0	let mut r1 = DoubleDouble::from_exact_add(hi, v_dd.hi);
192
193		// Degree-7 minimax polynomial log(1 + v) ~ v - v^2 / 2 + ...
194		// generated by Sollya with:
195		// > P = fpminimax(log(1 + x)/x, 6, [\|1, 1, D...\|],
196		// [-0x1.69000000000edp-8, 0x1.7f00000000081p-8]);
197		const P_COEFFS: [u64; 6] = [
198		0xbfe0000000000000,
199		0x3fd5555555555166,
200		0xbfcfffffffdb7746,
201		0x3fc99999a8718a60,
202		0xbfc555874ce8ce22,
203		0x3fc24335555ddbe5,
204		];
205
206		// C * ulp(v_sq) + err_hi
207	0	let v_sq = v_dd.hi * v_dd.hi;
208	0	let p0 = f_fmla(
209	0	v_dd.hi,
210	0	f64::from_bits(P_COEFFS[1]),
211	0	f64::from_bits(P_COEFFS[0]),
212		);
213	0	let p1 = f_fmla(
214	0	v_dd.hi,
215	0	f64::from_bits(P_COEFFS[3]),
216	0	f64::from_bits(P_COEFFS[2]),
217		);
218	0	let p2 = f_fmla(
219	0	v_dd.hi,
220	0	f64::from_bits(P_COEFFS[5]),
221	0	f64::from_bits(P_COEFFS[4]),
222		);
223	0	let p = f_polyeval4(v_sq, (v_dd.lo + r1.lo) + lo, p0, p1, p2);
224
225		const ERR_HI: [f64; 2] = [f64::from_bits(0x3aa0000000000000), 0.0];
226	0	let err_hi = ERR_HI[if hi == 0.0 { 1 } else { 0 }];
227
228	0	let err = f_fmla(v_sq, f64::from_bits(0x3ce0000000000000), err_hi);
229
230	0	r1.lo = p;
231	0	r1 = DoubleDouble::from_exact_add(r1.hi, r1.lo);
232	0	r1 = DoubleDouble::full_add_f64(r1, -x);
233
234	0	let left = r1.hi + (r1.lo - err);
235	0	let right = r1.hi + (r1.lo + err);
236		// Ziv's test to see if fast pass is accurate enough.
237	0	if left == right {
238	0	return left;
239	0	}
240	0	log1pmx_accurate_dd(x)
241	0	}
242
243		#[cold]
244	0	fn log1pmx_accurate_dd(x: f64) -> f64 {
245	0	let r = log1p_dd(x);
246	0	DoubleDouble::full_add_f64(r, -x).to_f64()
247	0	}
248
249		/// Computes log(1+x) - x
250		///
251		/// ulp 0.5
252	0	pub fn f_log1pmx(x: f64) -> f64 {
253	0	let ax = x.to_bits() & 0x7fff_ffff_ffff_ffffu64;
254
255	0	let x_e = (x.to_bits() >> 52) & 0x7ff;
256
257	0	if !x.is_normal() {
258	0	if x.is_nan() {
259	0	return x + x;
260	0	}
261	0	if x.is_infinite() {
262	0	return f64::NAN;
263	0	}
264	0	if x == 0. {
265	0	return x;
266	0	}
267	0	}
268
269	0	if ax > 0x3f90000000000000u64 {
270		// \|x\| > 2^-6
271	0	if x <= -1. {
272	0	if x == -1. {
273	0	return f64::NEG_INFINITY;
274	0	}
275	0	return f64::NAN;
276	0	}
277	0	return log1pmx_big(x);
278	0	}
279
280		const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
281
282		// log(1+x) is expected to be used near zero
283
284	0	if x_e < E_BIAS - 10 {
285	0	if x_e < E_BIAS - 100 {
286		// \|x\| < 2^-100
287		// taylor series log(1+x) - x ~ -x^2/2 + x^3/3
288	0	let x2 = x * x;
289	0	let dx2 = f_fmla(x, x, -x2);
290	0	let rl = dx2 * -0.5;
291	0	return f_fmla(x2, -0.5, rl);
292	0	}
293
294		// Polynomial generated by Sollya in form log(1+x) - x = x^2 * R(x):
295		// d = [-2^-10; 2^-10];
296		// f_log1pf = (log(1+x) - x)/x^2;
297		// Q = fpminimax(f_log1pf, 7, [\|0, 107, 107, D...\|], d);
298		// See ./notes/log1pmx_at_zero.sollya
299
300	0	let p = f_polyeval5(
301	0	x,
302	0	f64::from_bits(0x3fc999999999999a),
303	0	f64::from_bits(0xbfc55555555546ef),
304	0	f64::from_bits(0x3fc24924923d3abf),
305	0	f64::from_bits(0xbfc000018af7f637),
306	0	f64::from_bits(0x3fbc72984db24b6a),
307		);
308
309	0	let x2 = DoubleDouble::from_exact_mult(x, x);
310
311	0	let mut r = DoubleDouble::f64_mul_f64_add(
312	0	x,
313	0	p,
314	0	DoubleDouble::from_bit_pair((0xbbd3330899095800, 0xbfd0000000000000)),
315		);
316	0	r = DoubleDouble::mul_f64_add(
317	0	r,
318	0	x,
319	0	DoubleDouble::from_bit_pair((0x3c75555538509407, 0x3fd5555555555555)),
320	0	);
321	0	r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000));
322	0	r = DoubleDouble::quick_mult(r, x2);
323		const ERR: f64 = f64::from_bits(0x3af0000000000000); // 2^-80
324	0	let ub = r.hi + (r.lo + ERR);
325	0	let lb = r.hi + (r.lo - ERR);
326	0	if lb == ub {
327	0	return r.to_f64();
328	0	}
329	0	return tiny_hard(x);
330	0	}
331
332		// Polynomial generated by Sollya in form log(1+x) - x = x^2 * R(x):
333		// d = [-2^-6; 2^-6];
334		// f_log1pf = (log(1+x) - x)/x^2;
335		// Q = fpminimax(f_log1pf, 10, [\|0, 107, 107, D...\|], d);
336		// See ./notes/log1pmx.sollya
337
338	0	let p = f_estrin_polyeval8(
339	0	x,
340	0	f64::from_bits(0x3fc9999999999997),
341	0	f64::from_bits(0xbfc5555555555552),
342	0	f64::from_bits(0x3fc249249249fb64),
343	0	f64::from_bits(0xbfc000000000f450),
344	0	f64::from_bits(0x3fbc71c6e2591149),
345	0	f64::from_bits(0xbfb999995cf14d86),
346	0	f64::from_bits(0x3fb7494eb6c2c544),
347	0	f64::from_bits(0xbfb558bf05690e85),
348		);
349
350	0	let x2 = DoubleDouble::from_exact_mult(x, x);
351
352	0	let mut r = DoubleDouble::f64_mul_f64_add(
353	0	x,
354	0	p,
355	0	DoubleDouble::from_bit_pair((0xbb9d89dc15a38000, 0xbfd0000000000000)),
356		);
357	0	r = DoubleDouble::mul_f64_add(
358	0	r,
359	0	x,
360	0	DoubleDouble::from_bit_pair((0x3c7555a15a94e505, 0x3fd5555555555555)),
361	0	);
362	0	r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000));
363	0	r = DoubleDouble::quick_mult(r, x2);
364
365	0	r.to_f64()
366	0	}
367
368		#[cfg(test)]
369		mod tests {
370		use super::*;
371
372		#[test]
373		fn test_log1pmx() {
374		assert_eq!(
375		f_log1pmx(0.00000000000000005076835735015165),
376		-0.0000000000000000000000000000000012887130540163486
377		);
378		assert_eq!(f_log1pmx(5.), -3.208240530771945);
379		assert_eq!(f_log1pmx(-0.99), -3.6151701859880907);
380		assert_eq!(f_log1pmx(-1.), f64::NEG_INFINITY);
381		assert_eq!(
382		f_log1pmx(1.0000000000008708e-13),
383		-0.0000000000000000000000000050000000000083744
384		);
385		assert_eq!(
386		f_log1pmx(1.0000000000008708e-26),
387		-0.00000000000000000000000000000000000000000000000000005000000000008707
388		);
389		assert_eq!(f_log1pmx(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012890176556069385),
390		-0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000830783258233204);
391		}
392		}