/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.26/src/sin.rs

Source
/*
 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::{dyad_fmla, f_fmla, min_normal_f64};
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::rounding::CpuRound;
use crate::sin_helper::sincos_eval_dd;
use crate::sin_table::SIN_K_PI_OVER_128;
use crate::sincos_dyadic::SIN_K_PI_OVER_128_F128;
use crate::sincos_reduce::LargeArgumentReduction;

// For 2^-7 < |x| < 2^16, return k and u such that:
//   k = round(x * 128/pi)
//   x mod pi/128 = x - k * pi/128 ~ u.hi + u.lo
// Error bound:
//   |(x - k * pi/128) - (u_hi + u_lo)| <= max(ulp(ulp(u_hi)), 2^-119)
//                                      <= 2^-111.
#[inline]
pub(crate) fn range_reduction_small(x: f64) -> (DoubleDouble, u64) {
    const MPI_OVER_128: [u64; 3] = [0xbf9921fb54400000, 0xbd70b4611a600000, 0xbb43198a2e037073];
    const ONE_TWENTY_EIGHT_OVER_PI_D: f64 = f64::from_bits(0x40445f306dc9c883);
    let prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI_D;
    let kd = prod_hi.cpu_round();

    // Let y = x - k * (pi/128)
    // Then |y| < pi / 256
    // With extra rounding errors, we can bound |y| < 1.6 * 2^-7.
    let y_hi = f_fmla(kd, f64::from_bits(MPI_OVER_128[0]), x); // Exact
    // |u.hi| < 1.6*2^-7
    let u_hi = f_fmla(kd, f64::from_bits(MPI_OVER_128[1]), y_hi);

    let u0 = y_hi - u_hi; // Exact
    // |u.lo| <= max(ulp(u.hi), |kd * MPI_OVER_128[2]|)
    let u1 = f_fmla(kd, f64::from_bits(MPI_OVER_128[1]), u0); // Exact
    let u_lo = f_fmla(kd, f64::from_bits(MPI_OVER_128[2]), u1);
    // Error bound:
    // |x - k * pi/128| - (u.hi + u.lo) <= ulp(u.lo)
    //                                  <= ulp(max(ulp(u.hi), kd*MPI_OVER_128[2]))
    //                                  <= 2^(-7 - 104) = 2^-111.
    (DoubleDouble::new(u_lo, u_hi), unsafe {
        kd.to_int_unchecked::<i64>() as u64 // indeterminate values is always filtered out before this call, as well only lowest bits are used
    })
}

#[inline]
pub(crate) fn get_sin_k_rational(kk: u64) -> DyadicFloat128 {
    let idx = if (kk & 64) != 0 {
        64 - (kk & 63)
    } else {
        kk & 63
    };
    let mut ans = SIN_K_PI_OVER_128_F128[idx as usize];
    if (kk & 128) != 0 {
        ans.sign = DyadicSign::Neg;
    }
    ans
}

pub(crate) struct SinCos {
    pub(crate) v_sin: DoubleDouble,
    pub(crate) v_cos: DoubleDouble,
    pub(crate) err: f64,
}

#[inline]
pub(crate) fn sincos_eval(u: DoubleDouble) -> SinCos {
    // Evaluate sin(y) = sin(x - k * (pi/128))
    // We use the degree-7 Taylor approximation:
    //   sin(y) ~ y - y^3/3! + y^5/5! - y^7/7!
    // Then the error is bounded by:
    //   |sin(y) - (y - y^3/3! + y^5/5! - y^7/7!)| < |y|^9/9! < 2^-54/9! < 2^-72.
    // For y ~ u_hi + u_lo, fully expanding the polynomial and drop any terms
    // < ulp(u_hi^3) gives us:
    //   y - y^3/3! + y^5/5! - y^7/7! = ...
    // ~ u_hi + u_hi^3 * (-1/6 + u_hi^2 * (1/120 - u_hi^2 * 1/5040)) +
    //        + u_lo (1 + u_hi^2 * (-1/2 + u_hi^2 / 24))
    let u_hi_sq = u.hi * u.hi; // Error < ulp(u_hi^2) < 2^(-6 - 52) = 2^-58.
    // p1 ~ 1/120 + u_hi^2 / 5040.
    let p1 = f_fmla(
        u_hi_sq,
        f64::from_bits(0xbf2a01a01a01a01a),
        f64::from_bits(0x3f81111111111111),
    );
    // q1 ~ -1/2 + u_hi^2 / 24.
    let q1 = f_fmla(
        u_hi_sq,
        f64::from_bits(0x3fa5555555555555),
        f64::from_bits(0xbfe0000000000000),
    );
    let u_hi_3 = u_hi_sq * u.hi;
    // p2 ~ -1/6 + u_hi^2 (1/120 - u_hi^2 * 1/5040)
    let p2 = f_fmla(u_hi_sq, p1, f64::from_bits(0xbfc5555555555555));
    // q2 ~ 1 + u_hi^2 (-1/2 + u_hi^2 / 24)
    let q2 = f_fmla(u_hi_sq, q1, 1.0);
    let sin_lo = f_fmla(u_hi_3, p2, u.lo * q2);
    // Overall, |sin(y) - (u_hi + sin_lo)| < 2*ulp(u_hi^3) < 2^-69.

    // Evaluate cos(y) = cos(x - k * (pi/128))
    // We use the degree-8 Taylor approximation:
    //   cos(y) ~ 1 - y^2/2 + y^4/4! - y^6/6! + y^8/8!
    // Then the error is bounded by:
    //   |cos(y) - (...)| < |y|^10/10! < 2^-81
    // For y ~ u_hi + u_lo, fully expanding the polynomial and drop any terms
    // < ulp(u_hi^3) gives us:
    //   1 - y^2/2 + y^4/4! - y^6/6! + y^8/8! = ...
    // ~ 1 - u_hi^2/2 + u_hi^4(1/24 + u_hi^2 (-1/720 + u_hi^2/40320)) +
    //     + u_hi u_lo (-1 + u_hi^2/6)
    // We compute 1 - u_hi^2 accurately:
    //   v_hi + v_lo ~ 1 - u_hi^2/2
    // with error <= 2^-105.
    let u_hi_neg_half = (-0.5) * u.hi;

    let (mut v_lo, v_hi);

    #[cfg(any(
        all(
            any(target_arch = "x86", target_arch = "x86_64"),
            target_feature = "fma"
        ),
        target_arch = "aarch64"
    ))]
    {
        v_hi = f_fmla(u.hi, u_hi_neg_half, 1.0);
        v_lo = 1.0 - v_hi; // Exact
        v_lo = f_fmla(u.hi, u_hi_neg_half, v_lo);
    }

    #[cfg(not(any(
        all(
            any(target_arch = "x86", target_arch = "x86_64"),
            target_feature = "fma"
        ),
        target_arch = "aarch64"
    )))]
    {
        let u_hi_sq_neg_half = DoubleDouble::from_exact_mult(u.hi, u_hi_neg_half);
        let v = DoubleDouble::from_exact_add(1.0, u_hi_sq_neg_half.hi);
        v_lo = v.lo;
        v_lo += u_hi_sq_neg_half.lo;
        v_hi = v.hi;
    }

    // r1 ~ -1/720 + u_hi^2 / 40320
    let r1 = f_fmla(
        u_hi_sq,
        f64::from_bits(0x3efa01a01a01a01a),
        f64::from_bits(0xbf56c16c16c16c17),
    );
    // s1 ~ -1 + u_hi^2 / 6
    let s1 = f_fmla(u_hi_sq, f64::from_bits(0x3fc5555555555555), -1.0);
    let u_hi_4 = u_hi_sq * u_hi_sq;
    let u_hi_u_lo = u.hi * u.lo;
    // r2 ~ 1/24 + u_hi^2 (-1/720 + u_hi^2 / 40320)
    let r2 = f_fmla(u_hi_sq, r1, f64::from_bits(0x3fa5555555555555));
    // s2 ~ v_lo + u_hi * u_lo * (-1 + u_hi^2 / 6)
    let s2 = f_fmla(u_hi_u_lo, s1, v_lo);
    let cos_lo = f_fmla(u_hi_4, r2, s2);
    // Overall, |cos(y) - (v_hi + cos_lo)| < 2*ulp(u_hi^4) < 2^-75.

    let sin_u = DoubleDouble::from_exact_add(u.hi, sin_lo);
    let cos_u = DoubleDouble::from_exact_add(v_hi, cos_lo);

    let err = f_fmla(
        u_hi_3,
        f64::from_bits(0x3cc0000000000000),
        f64::from_bits(0x3960000000000000),
    );

    SinCos {
        v_sin: sin_u,
        v_cos: cos_u,
        err,
    }
}

#[cold]
#[inline(never)]
fn sin_accurate(y: DoubleDouble, sin_k: DoubleDouble, cos_k: DoubleDouble) -> f64 {
    let r_sincos = sincos_eval_dd(y);

    // k is an integer and -pi / 256 <= y <= pi / 256.
    // Then sin(x) = sin((k * pi/128 + y)
    //             = sin(y) * cos(k*pi/128) + cos(y) * sin(k*pi/128)

    let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k);
    let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k);

    let rr = DoubleDouble::full_dd_add(sin_k_cos_y, cos_k_sin_y);
    rr.to_f64()
}

#[cold]
pub(crate) fn sin_accurate_near_zero(x: f64) -> f64 {
    // Generated by Sollya:
    // d = [0, 0.0490874];
    // f_sin = sin(y)/y;
    // Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10, 12|], [|107...|], d, relative, floating);
    const C: [(u64, u64); 7] = [
        (0x0000000000000000, 0x3ff0000000000000),
        (0xbc65555555555217, 0xbfc5555555555555),
        (0x3c011110f7d49e8c, 0x3f81111111111111),
        (0xbb65e5b30986fc80, 0xbf2a01a01a01a01a),
        (0xbb689e86245bd566, 0x3ec71de3a556c6e5),
        (0xbaccb3d55ccfca58, 0xbe5ae64567954935),
        (0x3a6333ef7a00ce40, 0x3de6120ca00ce3a2),
    ];
    let x2 = DoubleDouble::from_exact_mult(x, x);
    let mut p = DoubleDouble::quick_mul_add(
        x2,
        DoubleDouble::from_bit_pair(C[6]),
        DoubleDouble::from_bit_pair(C[5]),
    );
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[4]));
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
    p = DoubleDouble::quick_mul_add_f64(x2, p, f64::from_bits(0x3ff0000000000000));
    p = DoubleDouble::quick_mult_f64(p, x);
    p.to_f64()
}

/// Sine for double precision
///
/// ULP 0.5
pub fn f_sin(x: f64) -> f64 {
    let x_e = (x.to_bits() >> 52) & 0x7ff;
    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;

    let y: DoubleDouble;
    let k;

    let mut argument_reduction = LargeArgumentReduction::default();

    if x_e < E_BIAS + 16 {
        // |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA)
        let ax = x.to_bits() & 0x7fff_ffff_ffff_ffff;
        if ax <= 0x3fa921fbd34a9550 {
            // |x| <= 0.0490874
            if x_e < E_BIAS - 26 {
                // |x| < 2^-26
                if x == 0.0 {
                    // Signed zeros.
                    return x;
                }

                // For |x| < 2^-26, |sin(x) - x| < ulp(x)/2.
                return dyad_fmla(x, f64::from_bits(0xbc90000000000000), x);
            }

            // Polynomial for sin(x)/x
            // Generated by Sollya:
            // d = [0, 0.0490874];
            // f_sin = sin(y)/y;
            // Q = fpminimax(f_sin, [|0, 2, 4, 6, 8|], [|1, D...|], d, relative, floating);

            let x2 = x * x;
            let x4 = x2 * x2;
            const C: [u64; 4] = [
                0xbfc5555555555555,
                0x3f8111111110e45a,
                0xbf2a019ffd7fdaaf,
                0x3ec71819b9bf01ef,
            ];
            let p01 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
            let p23 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
            let w0 = f_fmla(x4, p23, p01);
            let w1 = x2 * w0 * x;
            let r = DoubleDouble::from_exact_add(x, w1);
            let err = f_fmla(
                x2,
                f64::from_bits(0x3cb0000000000000), // 2^-52
                f64::from_bits(0x3be0000000000000), // 2^-65
            );
            let ub = r.hi + (r.lo + err);
            let lb = r.hi + (r.lo - err);
            if ub == lb {
                return ub;
            }
            return sin_accurate_near_zero(x);
        }

        // Small range reduction.
        (y, k) = range_reduction_small(x);
    } else {
        // Inf or NaN
        if x_e > 2 * E_BIAS {
            // sin(+-Inf) = NaN
            return x + f64::NAN;
        }

        // Large range reduction.
        (k, y) = argument_reduction.reduce(x);
    }

    let r_sincos = sincos_eval(y);

    // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
    let sk = SIN_K_PI_OVER_128[(k & 255) as usize];
    let ck = SIN_K_PI_OVER_128[((k.wrapping_add(64)) & 255) as usize];

    let sin_k = DoubleDouble::from_bit_pair(sk);
    let cos_k = DoubleDouble::from_bit_pair(ck);

    let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k);
    let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k);

    // sin_k_cos_y is always >> cos_k_sin_y
    let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
    rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;

    let rlp = rr.lo + r_sincos.err;
    let rlm = rr.lo - r_sincos.err;

    let r_upper = rr.hi + rlp; // (rr.lo + ERR);
    let r_lower = rr.hi + rlm; // (rr.lo - ERR);

    // Ziv's accuracy test
    if r_upper == r_lower {
        return rr.to_f64();
    }

    sin_accurate(y, sin_k, cos_k)
}

#[cold]
#[inline(never)]
fn cos_accurate(y: DoubleDouble, msin_k: DoubleDouble, cos_k: DoubleDouble) -> f64 {
    let r_sincos = sincos_eval_dd(y);

    // After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128).
    // So k is an integer and -pi / 256 <= y <= pi / 256.
    // Then sin(x) = sin((k * pi/128 + y)
    //             = sin(y) * cos(k*pi/128) + cos(y) * sin(k*pi/128)

    let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, cos_k);
    let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, msin_k);

    let rr = DoubleDouble::full_dd_add(sin_k_cos_y, cos_k_sin_y);
    rr.to_f64()
}

#[cold]
pub(crate) fn cos_accurate_near_zero(x: f64) -> f64 {
    // Generated by Sollya:
    // d = [0, 0.0490874];
    // f_sin = sin(y)/y;
    // Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107...|], d, relative, floating);
    const C: [(u64, u64); 6] = [
        (0xba2fa1c000000000, 0x3ff0000000000000),
        (0x3b1cd6ead3800000, 0xbfe0000000000000),
        (0x3c45112931063916, 0x3fa5555555555555),
        (0xbba1c1e3ab3b09e0, 0xbf56c16c16c16b2b),
        (0x3b937bc2f4ea7db6, 0x3efa01a019495fca),
        (0x3b307fd5e1b021b3, 0xbe927e0d57d894f8),
    ];
    let x2 = DoubleDouble::from_exact_mult(x, x);
    let mut p = DoubleDouble::quick_mul_add(
        x2,
        DoubleDouble::from_bit_pair(C[5]),
        DoubleDouble::from_bit_pair(C[4]),
    );
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
    p.to_f64()
}

/// Cosine for double precision
///
/// ULP 0.5
pub fn f_cos(x: f64) -> f64 {
    let x_e = (x.to_bits() >> 52) & 0x7ff;
    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;

    let y: DoubleDouble;
    let k;

    let mut argument_reduction = LargeArgumentReduction::default();

    if x_e < E_BIAS + 16 {
        // |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA)
        let ax = x.to_bits() & 0x7fff_ffff_ffff_ffff;
        if ax <= 0x3fa921fbd34a9550 {
            // |x| <= 0.0490874
            if x_e < E_BIAS - 27 {
                // |x| < 2^-27
                if x == 0.0 {
                    // Signed zeros.
                    return 1.0;
                }
                // For |x| < 2^-26, |sin(x) - x| < ulp(x)/2.
                return 1.0 - min_normal_f64();
            }

            // Polynomial for cos(x)
            // Generated by Sollya:
            // d = [0, 0.0490874];
            // f_cos = cos(y);
            // Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|1, D...|], d, relative, floating);

            let x2 = x * x;
            let x4 = x2 * x2;
            const C: [u64; 4] = [
                0xbfe0000000000000,
                0x3fa55555555554a4,
                0xbf56c16c1619b84a,
                0x3efa013d3d01cf7f,
            ];
            let p01 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
            let p23 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
            let w0 = f_fmla(x4, p23, p01);
            let w1 = x2 * w0;
            let r = DoubleDouble::from_exact_add(1., w1);
            let err = f_fmla(
                x2,
                f64::from_bits(0x3cb0000000000000), // 2^-52
                f64::from_bits(0x3be0000000000000), // 2^-65
            );

            let ub = r.hi + (r.lo + err);
            let lb = r.hi + (r.lo - err);
            if ub == lb {
                return ub;
            }
            return cos_accurate_near_zero(x);
        } else {
            // // Small range reduction.
            (y, k) = range_reduction_small(x);
        }
    } else {
        // Inf or NaN
        if x_e > 2 * E_BIAS {
            // cos(+-Inf) = NaN
            return x + f64::NAN;
        }

        // Large range reduction.
        (k, y) = argument_reduction.reduce(x);
    }
    let r_sincos = sincos_eval(y);

    // After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128).
    // So k is an integer and -pi / 256 <= y <= pi / 256.
    // Then cos(x) = cos((k * pi/128 + y)
    //             = cos(y) * cos(k*pi/128) - sin(y) * sin(k*pi/128)
    let sk = SIN_K_PI_OVER_128[(k.wrapping_add(128) & 255) as usize];
    let ck = SIN_K_PI_OVER_128[((k.wrapping_add(64)) & 255) as usize];
    let msin_k = DoubleDouble::from_bit_pair(sk);
    let cos_k = DoubleDouble::from_bit_pair(ck);

    let cos_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, cos_k);
    let cos_k_msin_y = DoubleDouble::quick_mult(r_sincos.v_sin, msin_k);

    // cos_k_cos_y is always >> cos_k_msin_y
    let mut rr = DoubleDouble::from_exact_add(cos_k_cos_y.hi, cos_k_msin_y.hi);
    rr.lo += cos_k_cos_y.lo + cos_k_msin_y.lo;
    let rlp = rr.lo + r_sincos.err;
    let rlm = rr.lo - r_sincos.err;

    let r_upper = rr.hi + rlp; // (rr.lo + ERR);
    let r_lower = rr.hi + rlm; // (rr.lo - ERR);

    // Ziv's accuracy test
    if r_upper == r_lower {
        return rr.to_f64();
    }
    cos_accurate(y, msin_k, cos_k)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn cos_test() {
        assert_eq!(f_cos(0.0), 1.0);
        assert_eq!(f_cos(0.0024312312), 0.9999970445588818);
        assert_eq!(f_cos(1.0), 0.5403023058681398);
        assert_eq!(f_cos(-0.5), 0.8775825618903728);
        assert!(f_cos(f64::INFINITY).is_nan());
        assert!(f_cos(f64::NEG_INFINITY).is_nan());
        assert!(f_cos(f64::NAN).is_nan());
    }

    #[test]
    fn sin_test() {
        assert_eq!(f_sin(0.00000002149119332890786), 0.00000002149119332890786);
        assert_eq!(f_sin(2.8477476437362989E-306), 2.8477476437362989E-306);
        assert_eq!(f_sin(0.0024312312), 0.002431228804879309);
        assert_eq!(f_sin(0.0), 0.0);
        assert_eq!(f_sin(1.0), 0.8414709848078965);
        assert_eq!(f_sin(-0.5), -0.479425538604203);
        assert!(f_sin(f64::INFINITY).is_nan());
        assert!(f_sin(f64::NEG_INFINITY).is_nan());
        assert!(f_sin(f64::NAN).is_nan());
    }
}

Coverage Report

Created: 2025-12-05 07:37

Line	Count	Source
1		/*
2		* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3		* //
4		* // Redistribution and use in source and binary forms, with or without modification,
5		* // are permitted provided that the following conditions are met:
6		* //
7		* // 1. Redistributions of source code must retain the above copyright notice, this
8		* // list of conditions and the following disclaimer.
9		* //
10		* // 2. Redistributions in binary form must reproduce the above copyright notice,
11		* // this list of conditions and the following disclaimer in the documentation
12		* // and/or other materials provided with the distribution.
13		* //
14		* // 3. Neither the name of the copyright holder nor the names of its
15		* // contributors may be used to endorse or promote products derived from
16		* // this software without specific prior written permission.
17		* //
18		* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19		* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20		* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21		* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22		* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23		* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24		* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25		* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26		* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27		* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28		*/
29		use crate::common::{dyad_fmla, f_fmla, min_normal_f64};
30		use crate::double_double::DoubleDouble;
31		use crate::dyadic_float::{DyadicFloat128, DyadicSign};
32		use crate::rounding::CpuRound;
33		use crate::sin_helper::sincos_eval_dd;
34		use crate::sin_table::SIN_K_PI_OVER_128;
35		use crate::sincos_dyadic::SIN_K_PI_OVER_128_F128;
36		use crate::sincos_reduce::LargeArgumentReduction;
37
38		// For 2^-7 < \|x\| < 2^16, return k and u such that:
39		// k = round(x * 128/pi)
40		// x mod pi/128 = x - k * pi/128 ~ u.hi + u.lo
41		// Error bound:
42		// \|(x - k * pi/128) - (u_hi + u_lo)\| <= max(ulp(ulp(u_hi)), 2^-119)
43		// <= 2^-111.
44		#[inline]
45	0	pub(crate) fn range_reduction_small(x: f64) -> (DoubleDouble, u64) {
46		const MPI_OVER_128: [u64; 3] = [0xbf9921fb54400000, 0xbd70b4611a600000, 0xbb43198a2e037073];
47		const ONE_TWENTY_EIGHT_OVER_PI_D: f64 = f64::from_bits(0x40445f306dc9c883);
48	0	let prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI_D;
49	0	let kd = prod_hi.cpu_round();
50
51		// Let y = x - k * (pi/128)
52		// Then \|y\| < pi / 256
53		// With extra rounding errors, we can bound \|y\| < 1.6 * 2^-7.
54	0	let y_hi = f_fmla(kd, f64::from_bits(MPI_OVER_128[0]), x); // Exact
55		// \|u.hi\| < 1.6*2^-7
56	0	let u_hi = f_fmla(kd, f64::from_bits(MPI_OVER_128[1]), y_hi);
57
58	0	let u0 = y_hi - u_hi; // Exact
59		// \|u.lo\| <= max(ulp(u.hi), \|kd * MPI_OVER_128[2]\|)
60	0	let u1 = f_fmla(kd, f64::from_bits(MPI_OVER_128[1]), u0); // Exact
61	0	let u_lo = f_fmla(kd, f64::from_bits(MPI_OVER_128[2]), u1);
62		// Error bound:
63		// \|x - k * pi/128\| - (u.hi + u.lo) <= ulp(u.lo)
64		// <= ulp(max(ulp(u.hi), kd*MPI_OVER_128[2]))
65		// <= 2^(-7 - 104) = 2^-111.
66	0	(DoubleDouble::new(u_lo, u_hi), unsafe {
67	0	kd.to_int_unchecked::<i64>() as u64 // indeterminate values is always filtered out before this call, as well only lowest bits are used
68	0	})
69	0	}
70
71		#[inline]
72	0	pub(crate) fn get_sin_k_rational(kk: u64) -> DyadicFloat128 {
73	0	let idx = if (kk & 64) != 0 {
74	0	64 - (kk & 63)
75		} else {
76	0	kk & 63
77		};
78	0	let mut ans = SIN_K_PI_OVER_128_F128[idx as usize];
79	0	if (kk & 128) != 0 {
80	0	ans.sign = DyadicSign::Neg;
81	0	}
82	0	ans
83	0	}
84
85		pub(crate) struct SinCos {
86		pub(crate) v_sin: DoubleDouble,
87		pub(crate) v_cos: DoubleDouble,
88		pub(crate) err: f64,
89		}
90
91		#[inline]
92	0	pub(crate) fn sincos_eval(u: DoubleDouble) -> SinCos {
93		// Evaluate sin(y) = sin(x - k * (pi/128))
94		// We use the degree-7 Taylor approximation:
95		// sin(y) ~ y - y^3/3! + y^5/5! - y^7/7!
96		// Then the error is bounded by:
97		// \|sin(y) - (y - y^3/3! + y^5/5! - y^7/7!)\| < \|y\|^9/9! < 2^-54/9! < 2^-72.
98		// For y ~ u_hi + u_lo, fully expanding the polynomial and drop any terms
99		// < ulp(u_hi^3) gives us:
100		// y - y^3/3! + y^5/5! - y^7/7! = ...
101		// ~ u_hi + u_hi^3 * (-1/6 + u_hi^2 * (1/120 - u_hi^2 * 1/5040)) +
102		// + u_lo (1 + u_hi^2 * (-1/2 + u_hi^2 / 24))
103	0	let u_hi_sq = u.hi * u.hi; // Error < ulp(u_hi^2) < 2^(-6 - 52) = 2^-58.
104		// p1 ~ 1/120 + u_hi^2 / 5040.
105	0	let p1 = f_fmla(
106	0	u_hi_sq,
107	0	f64::from_bits(0xbf2a01a01a01a01a),
108	0	f64::from_bits(0x3f81111111111111),
109		);
110		// q1 ~ -1/2 + u_hi^2 / 24.
111	0	let q1 = f_fmla(
112	0	u_hi_sq,
113	0	f64::from_bits(0x3fa5555555555555),
114	0	f64::from_bits(0xbfe0000000000000),
115		);
116	0	let u_hi_3 = u_hi_sq * u.hi;
117		// p2 ~ -1/6 + u_hi^2 (1/120 - u_hi^2 * 1/5040)
118	0	let p2 = f_fmla(u_hi_sq, p1, f64::from_bits(0xbfc5555555555555));
119		// q2 ~ 1 + u_hi^2 (-1/2 + u_hi^2 / 24)
120	0	let q2 = f_fmla(u_hi_sq, q1, 1.0);
121	0	let sin_lo = f_fmla(u_hi_3, p2, u.lo * q2);
122		// Overall, \|sin(y) - (u_hi + sin_lo)\| < 2*ulp(u_hi^3) < 2^-69.
123
124		// Evaluate cos(y) = cos(x - k * (pi/128))
125		// We use the degree-8 Taylor approximation:
126		// cos(y) ~ 1 - y^2/2 + y^4/4! - y^6/6! + y^8/8!
127		// Then the error is bounded by:
128		// \|cos(y) - (...)\| < \|y\|^10/10! < 2^-81
129		// For y ~ u_hi + u_lo, fully expanding the polynomial and drop any terms
130		// < ulp(u_hi^3) gives us:
131		// 1 - y^2/2 + y^4/4! - y^6/6! + y^8/8! = ...
132		// ~ 1 - u_hi^2/2 + u_hi^4(1/24 + u_hi^2 (-1/720 + u_hi^2/40320)) +
133		// + u_hi u_lo (-1 + u_hi^2/6)
134		// We compute 1 - u_hi^2 accurately:
135		// v_hi + v_lo ~ 1 - u_hi^2/2
136		// with error <= 2^-105.
137	0	let u_hi_neg_half = (-0.5) * u.hi;
138
139		let (mut v_lo, v_hi);
140
141		#[cfg(any(
142		all(
143		any(target_arch = "x86", target_arch = "x86_64"),
144		target_feature = "fma"
145		),
146		target_arch = "aarch64"
147		))]
148		{
149		v_hi = f_fmla(u.hi, u_hi_neg_half, 1.0);
150		v_lo = 1.0 - v_hi; // Exact
151		v_lo = f_fmla(u.hi, u_hi_neg_half, v_lo);
152		}
153
154		#[cfg(not(any(
155		all(
156		any(target_arch = "x86", target_arch = "x86_64"),
157		target_feature = "fma"
158		),
159		target_arch = "aarch64"
160		)))]
161	0	{
162	0	let u_hi_sq_neg_half = DoubleDouble::from_exact_mult(u.hi, u_hi_neg_half);
163	0	let v = DoubleDouble::from_exact_add(1.0, u_hi_sq_neg_half.hi);
164	0	v_lo = v.lo;
165	0	v_lo += u_hi_sq_neg_half.lo;
166	0	v_hi = v.hi;
167	0	}
168
169		// r1 ~ -1/720 + u_hi^2 / 40320
170	0	let r1 = f_fmla(
171	0	u_hi_sq,
172	0	f64::from_bits(0x3efa01a01a01a01a),
173	0	f64::from_bits(0xbf56c16c16c16c17),
174		);
175		// s1 ~ -1 + u_hi^2 / 6
176	0	let s1 = f_fmla(u_hi_sq, f64::from_bits(0x3fc5555555555555), -1.0);
177	0	let u_hi_4 = u_hi_sq * u_hi_sq;
178	0	let u_hi_u_lo = u.hi * u.lo;
179		// r2 ~ 1/24 + u_hi^2 (-1/720 + u_hi^2 / 40320)
180	0	let r2 = f_fmla(u_hi_sq, r1, f64::from_bits(0x3fa5555555555555));
181		// s2 ~ v_lo + u_hi * u_lo * (-1 + u_hi^2 / 6)
182	0	let s2 = f_fmla(u_hi_u_lo, s1, v_lo);
183	0	let cos_lo = f_fmla(u_hi_4, r2, s2);
184		// Overall, \|cos(y) - (v_hi + cos_lo)\| < 2*ulp(u_hi^4) < 2^-75.
185
186	0	let sin_u = DoubleDouble::from_exact_add(u.hi, sin_lo);
187	0	let cos_u = DoubleDouble::from_exact_add(v_hi, cos_lo);
188
189	0	let err = f_fmla(
190	0	u_hi_3,
191	0	f64::from_bits(0x3cc0000000000000),
192	0	f64::from_bits(0x3960000000000000),
193		);
194
195	0	SinCos {
196	0	v_sin: sin_u,
197	0	v_cos: cos_u,
198	0	err,
199	0	}
200	0	}
201
202		#[cold]
203		#[inline(never)]
204	0	fn sin_accurate(y: DoubleDouble, sin_k: DoubleDouble, cos_k: DoubleDouble) -> f64 {
205	0	let r_sincos = sincos_eval_dd(y);
206
207		// k is an integer and -pi / 256 <= y <= pi / 256.
208		// Then sin(x) = sin((k * pi/128 + y)
209		// = sin(y) * cos(kpi/128) + cos(y) sin(k*pi/128)
210
211	0	let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k);
212	0	let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k);
213
214	0	let rr = DoubleDouble::full_dd_add(sin_k_cos_y, cos_k_sin_y);
215	0	rr.to_f64()
216	0	}
217
218		#[cold]
219	0	pub(crate) fn sin_accurate_near_zero(x: f64) -> f64 {
220		// Generated by Sollya:
221		// d = [0, 0.0490874];
222		// f_sin = sin(y)/y;
223		// Q = fpminimax(f_sin, [\|0, 2, 4, 6, 8, 10, 12\|], [\|107...\|], d, relative, floating);
224		const C: [(u64, u64); 7] = [
225		(0x0000000000000000, 0x3ff0000000000000),
226		(0xbc65555555555217, 0xbfc5555555555555),
227		(0x3c011110f7d49e8c, 0x3f81111111111111),
228		(0xbb65e5b30986fc80, 0xbf2a01a01a01a01a),
229		(0xbb689e86245bd566, 0x3ec71de3a556c6e5),
230		(0xbaccb3d55ccfca58, 0xbe5ae64567954935),
231		(0x3a6333ef7a00ce40, 0x3de6120ca00ce3a2),
232		];
233	0	let x2 = DoubleDouble::from_exact_mult(x, x);
234	0	let mut p = DoubleDouble::quick_mul_add(
235	0	x2,
236	0	DoubleDouble::from_bit_pair(C[6]),
237	0	DoubleDouble::from_bit_pair(C[5]),
238		);
239	0	p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[4]));
240	0	p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
241	0	p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
242	0	p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
243	0	p = DoubleDouble::quick_mul_add_f64(x2, p, f64::from_bits(0x3ff0000000000000));
244	0	p = DoubleDouble::quick_mult_f64(p, x);
245	0	p.to_f64()
246	0	}
247
248		/// Sine for double precision
249		///
250		/// ULP 0.5
251	0	pub fn f_sin(x: f64) -> f64 {
252	0	let x_e = (x.to_bits() >> 52) & 0x7ff;
253		const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
254
255		let y: DoubleDouble;
256		let k;
257
258	0	let mut argument_reduction = LargeArgumentReduction::default();
259
260	0	if x_e < E_BIAS + 16 {
261		// \|x\| < 2^32 (with FMA) or \|x\| < 2^23 (w/o FMA)
262	0	let ax = x.to_bits() & 0x7fff_ffff_ffff_ffff;
263	0	if ax <= 0x3fa921fbd34a9550 {
264		// \|x\| <= 0.0490874
265	0	if x_e < E_BIAS - 26 {
266		// \|x\| < 2^-26
267	0	if x == 0.0 {
268		// Signed zeros.
269	0	return x;
270	0	}
271
272		// For \|x\| < 2^-26, \|sin(x) - x\| < ulp(x)/2.
273	0	return dyad_fmla(x, f64::from_bits(0xbc90000000000000), x);
274	0	}
275
276		// Polynomial for sin(x)/x
277		// Generated by Sollya:
278		// d = [0, 0.0490874];
279		// f_sin = sin(y)/y;
280		// Q = fpminimax(f_sin, [\|0, 2, 4, 6, 8\|], [\|1, D...\|], d, relative, floating);
281
282	0	let x2 = x * x;
283	0	let x4 = x2 * x2;
284		const C: [u64; 4] = [
285		0xbfc5555555555555,
286		0x3f8111111110e45a,
287		0xbf2a019ffd7fdaaf,
288		0x3ec71819b9bf01ef,
289		];
290	0	let p01 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
291	0	let p23 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
292	0	let w0 = f_fmla(x4, p23, p01);
293	0	let w1 = x2 * w0 * x;
294	0	let r = DoubleDouble::from_exact_add(x, w1);
295	0	let err = f_fmla(
296	0	x2,
297	0	f64::from_bits(0x3cb0000000000000), // 2^-52
298	0	f64::from_bits(0x3be0000000000000), // 2^-65
299		);
300	0	let ub = r.hi + (r.lo + err);
301	0	let lb = r.hi + (r.lo - err);
302	0	if ub == lb {
303	0	return ub;
304	0	}
305	0	return sin_accurate_near_zero(x);
306	0	}
307
308		// Small range reduction.
309	0	(y, k) = range_reduction_small(x);
310		} else {
311		// Inf or NaN
312	0	if x_e > 2 * E_BIAS {
313		// sin(+-Inf) = NaN
314	0	return x + f64::NAN;
315	0	}
316
317		// Large range reduction.
318	0	(k, y) = argument_reduction.reduce(x);
319		}
320
321	0	let r_sincos = sincos_eval(y);
322
323		// cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
324	0	let sk = SIN_K_PI_OVER_128[(k & 255) as usize];
325	0	let ck = SIN_K_PI_OVER_128[((k.wrapping_add(64)) & 255) as usize];
326
327	0	let sin_k = DoubleDouble::from_bit_pair(sk);
328	0	let cos_k = DoubleDouble::from_bit_pair(ck);
329
330	0	let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k);
331	0	let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k);
332
333		// sin_k_cos_y is always >> cos_k_sin_y
334	0	let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
335	0	rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
336
337	0	let rlp = rr.lo + r_sincos.err;
338	0	let rlm = rr.lo - r_sincos.err;
339
340	0	let r_upper = rr.hi + rlp; // (rr.lo + ERR);
341	0	let r_lower = rr.hi + rlm; // (rr.lo - ERR);
342
343		// Ziv's accuracy test
344	0	if r_upper == r_lower {
345	0	return rr.to_f64();
346	0	}
347
348	0	sin_accurate(y, sin_k, cos_k)
349	0	}
350
351		#[cold]
352		#[inline(never)]
353	0	fn cos_accurate(y: DoubleDouble, msin_k: DoubleDouble, cos_k: DoubleDouble) -> f64 {
354	0	let r_sincos = sincos_eval_dd(y);
355
356		// After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128).
357		// So k is an integer and -pi / 256 <= y <= pi / 256.
358		// Then sin(x) = sin((k * pi/128 + y)
359		// = sin(y) * cos(kpi/128) + cos(y) sin(k*pi/128)
360
361	0	let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, cos_k);
362	0	let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, msin_k);
363
364	0	let rr = DoubleDouble::full_dd_add(sin_k_cos_y, cos_k_sin_y);
365	0	rr.to_f64()
366	0	}
367
368		#[cold]
369	0	pub(crate) fn cos_accurate_near_zero(x: f64) -> f64 {
370		// Generated by Sollya:
371		// d = [0, 0.0490874];
372		// f_sin = sin(y)/y;
373		// Q = fpminimax(f_sin, [\|0, 2, 4, 6, 8, 10\|], [\|107...\|], d, relative, floating);
374		const C: [(u64, u64); 6] = [
375		(0xba2fa1c000000000, 0x3ff0000000000000),
376		(0x3b1cd6ead3800000, 0xbfe0000000000000),
377		(0x3c45112931063916, 0x3fa5555555555555),
378		(0xbba1c1e3ab3b09e0, 0xbf56c16c16c16b2b),
379		(0x3b937bc2f4ea7db6, 0x3efa01a019495fca),
380		(0x3b307fd5e1b021b3, 0xbe927e0d57d894f8),
381		];
382	0	let x2 = DoubleDouble::from_exact_mult(x, x);
383	0	let mut p = DoubleDouble::quick_mul_add(
384	0	x2,
385	0	DoubleDouble::from_bit_pair(C[5]),
386	0	DoubleDouble::from_bit_pair(C[4]),
387		);
388	0	p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
389	0	p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
390	0	p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
391	0	p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
392	0	p.to_f64()
393	0	}
394
395		/// Cosine for double precision
396		///
397		/// ULP 0.5
398	0	pub fn f_cos(x: f64) -> f64 {
399	0	let x_e = (x.to_bits() >> 52) & 0x7ff;
400		const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
401
402		let y: DoubleDouble;
403		let k;
404
405	0	let mut argument_reduction = LargeArgumentReduction::default();
406
407	0	if x_e < E_BIAS + 16 {
408		// \|x\| < 2^32 (with FMA) or \|x\| < 2^23 (w/o FMA)
409	0	let ax = x.to_bits() & 0x7fff_ffff_ffff_ffff;
410	0	if ax <= 0x3fa921fbd34a9550 {
411		// \|x\| <= 0.0490874
412	0	if x_e < E_BIAS - 27 {
413		// \|x\| < 2^-27
414	0	if x == 0.0 {
415		// Signed zeros.
416	0	return 1.0;
417	0	}
418		// For \|x\| < 2^-26, \|sin(x) - x\| < ulp(x)/2.
419	0	return 1.0 - min_normal_f64();
420	0	}
421
422		// Polynomial for cos(x)
423		// Generated by Sollya:
424		// d = [0, 0.0490874];
425		// f_cos = cos(y);
426		// Q = fpminimax(f_cos, [\|0, 2, 4, 6, 8\|], [\|1, D...\|], d, relative, floating);
427
428	0	let x2 = x * x;
429	0	let x4 = x2 * x2;
430		const C: [u64; 4] = [
431		0xbfe0000000000000,
432		0x3fa55555555554a4,
433		0xbf56c16c1619b84a,
434		0x3efa013d3d01cf7f,
435		];
436	0	let p01 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
437	0	let p23 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
438	0	let w0 = f_fmla(x4, p23, p01);
439	0	let w1 = x2 * w0;
440	0	let r = DoubleDouble::from_exact_add(1., w1);
441	0	let err = f_fmla(
442	0	x2,
443	0	f64::from_bits(0x3cb0000000000000), // 2^-52
444	0	f64::from_bits(0x3be0000000000000), // 2^-65
445		);
446
447	0	let ub = r.hi + (r.lo + err);
448	0	let lb = r.hi + (r.lo - err);
449	0	if ub == lb {
450	0	return ub;
451	0	}
452	0	return cos_accurate_near_zero(x);
453	0	} else {
454	0	// // Small range reduction.
455	0	(y, k) = range_reduction_small(x);
456	0	}
457		} else {
458		// Inf or NaN
459	0	if x_e > 2 * E_BIAS {
460		// cos(+-Inf) = NaN
461	0	return x + f64::NAN;
462	0	}
463
464		// Large range reduction.
465	0	(k, y) = argument_reduction.reduce(x);
466		}
467	0	let r_sincos = sincos_eval(y);
468
469		// After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128).
470		// So k is an integer and -pi / 256 <= y <= pi / 256.
471		// Then cos(x) = cos((k * pi/128 + y)
472		// = cos(y) * cos(kpi/128) - sin(y) sin(k*pi/128)
473	0	let sk = SIN_K_PI_OVER_128[(k.wrapping_add(128) & 255) as usize];
474	0	let ck = SIN_K_PI_OVER_128[((k.wrapping_add(64)) & 255) as usize];
475	0	let msin_k = DoubleDouble::from_bit_pair(sk);
476	0	let cos_k = DoubleDouble::from_bit_pair(ck);
477
478	0	let cos_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, cos_k);
479	0	let cos_k_msin_y = DoubleDouble::quick_mult(r_sincos.v_sin, msin_k);
480
481		// cos_k_cos_y is always >> cos_k_msin_y
482	0	let mut rr = DoubleDouble::from_exact_add(cos_k_cos_y.hi, cos_k_msin_y.hi);
483	0	rr.lo += cos_k_cos_y.lo + cos_k_msin_y.lo;
484	0	let rlp = rr.lo + r_sincos.err;
485	0	let rlm = rr.lo - r_sincos.err;
486
487	0	let r_upper = rr.hi + rlp; // (rr.lo + ERR);
488	0	let r_lower = rr.hi + rlm; // (rr.lo - ERR);
489
490		// Ziv's accuracy test
491	0	if r_upper == r_lower {
492	0	return rr.to_f64();
493	0	}
494	0	cos_accurate(y, msin_k, cos_k)
495	0	}
496
497		#[cfg(test)]
498		mod tests {
499		use super::*;
500
501		#[test]
502		fn cos_test() {
503		assert_eq!(f_cos(0.0), 1.0);
504		assert_eq!(f_cos(0.0024312312), 0.9999970445588818);
505		assert_eq!(f_cos(1.0), 0.5403023058681398);
506		assert_eq!(f_cos(-0.5), 0.8775825618903728);
507		assert!(f_cos(f64::INFINITY).is_nan());
508		assert!(f_cos(f64::NEG_INFINITY).is_nan());
509		assert!(f_cos(f64::NAN).is_nan());
510		}
511
512		#[test]
513		fn sin_test() {
514		assert_eq!(f_sin(0.00000002149119332890786), 0.00000002149119332890786);
515		assert_eq!(f_sin(2.8477476437362989E-306), 2.8477476437362989E-306);
516		assert_eq!(f_sin(0.0024312312), 0.002431228804879309);
517		assert_eq!(f_sin(0.0), 0.0);
518		assert_eq!(f_sin(1.0), 0.8414709848078965);
519		assert_eq!(f_sin(-0.5), -0.479425538604203);
520		assert!(f_sin(f64::INFINITY).is_nan());
521		assert!(f_sin(f64::NEG_INFINITY).is_nan());
522		assert!(f_sin(f64::NAN).is_nan());
523		}
524		}