/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.29/src/sincos_reduce.rs

Source
/*
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::bits::set_exponent_f64;
use crate::common::{dd_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::rounding::CpuRound;
use crate::sincos_reduce_tables::ONE_TWENTY_EIGHT_OVER_PI;

#[derive(Debug)]
pub(crate) struct AngleReduced {
    pub(crate) angle: DoubleDouble,
}

#[derive(Default)]
pub(crate) struct LargeArgumentReduction {
    x_reduced: f64,
    idx: u64,
    y_hi: f64,
    y_lo: f64,
    // Low part of x * ONE_TWENTY_EIGHT_OVER_PI[idx][1].
    y_mid: DoubleDouble,
}

// For large range |x| >= 2^16, we perform the range reduction computations as:
//   u = x - k * pi/128 = (pi/128) * (x * (128/pi) - k).
// We use the exponent of x to find 4 double-chunks of 128/pi:
// c_hi, c_mid, c_lo, c_lo_2 such that:
//   1) ulp(round(x * c_hi, D, RN)) >= 2^8 = 256,
//   2) If x * c_hi = ph_hi + ph_lo and x * c_mid = pm_hi + pm_lo, then
//        min(ulp(ph_lo), ulp(pm_hi)) >= 2^-53.
// This will allow us to drop the high part ph_hi and the addition:
//   (ph_lo + pm_hi) mod 1
// can be exactly representable in a double precision.
// This will allow us to do split the computations as:
//   (x * 256/pi) ~ x * (c_hi + c_mid + c_lo + c_lo_2)    (mod 256)
//                ~ (ph_lo + pm_hi) + (pm_lo + x * c_lo) + x * c_lo_2.
// Then,
//   round(x * 128/pi) = round(ph_lo + pm_hi)    (mod 256)
// And the high part of fractional part of (x * 128/pi) can simply be:
//   {x * 128/pi}_hi = {ph_lo + pm_hi}.
// To prevent overflow when x is very large, we simply scale up
// (c_hi, c_mid, c_lo, c_lo_2) by a fixed power of 2 (based on the index) and
// scale down x by the same amount.
impl LargeArgumentReduction {
    #[cold]
    pub(crate) fn accurate(&self) -> DyadicFloat128 {
        // Sage math:
        // R = RealField(128)
        // π = R.pi()
        //
        // def format_hex(value):
        //     l = hex(value)[2:]
        //     n = 4
        //     x = [l[i:i + n] for i in range(0, len(l), n)]
        //     return "0x" + "_".join(x) + "_u128"
        //
        // def print_dyadic(value):
        //     (s, m, e) = RealField(128)(value).sign_mantissa_exponent();
        //     print("DyadicFloat128 {")
        //     print(f"    sign: DyadicSign::{'Pos' if s >= 0 else 'Neg'},")
        //     print(f"    exponent: {e},")
        //     print(f"    mantissa: {format_hex(m)},")
        //     print("};")
        //
        // print_dyadic(π/128)
        const PI_OVER_128_F128: DyadicFloat128 = DyadicFloat128 {
            sign: DyadicSign::Pos,
            exponent: -133,
            mantissa: 0xc90f_daa2_2168_c234_c4c6_628b_80dc_1cd1_u128,
        };

        // y_lo = x * c_lo + pm.lo
        let one_pi_rot = ONE_TWENTY_EIGHT_OVER_PI[self.idx as usize];
        let y_lo_0 = DyadicFloat128::new_from_f64(self.x_reduced * f64::from_bits(one_pi_rot.3));
        let y_lo_1 = DyadicFloat128::new_from_f64(self.y_lo) + y_lo_0;
        let y_mid_f128 = DyadicFloat128::new_from_f64(self.y_mid.lo) + y_lo_1;
        let y_hi_f128 =
            DyadicFloat128::new_from_f64(self.y_hi) + DyadicFloat128::new_from_f64(self.y_mid.hi);
        let y = y_hi_f128 + y_mid_f128;

        y * PI_OVER_128_F128
    }

    pub(crate) fn reduce(&mut self, x: f64) -> (u64, DoubleDouble) {
        const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
        let mut xbits = x.to_bits();
        let x_e = ((x.to_bits() >> 52) & 0x7ff) as i64;
        let x_e_m62 = x_e.wrapping_sub(E_BIAS as i64 + 62);
        self.idx = (x_e_m62 >> 4).wrapping_add(3) as u64;
        // Scale x down by 2^(-(16 * (idx - 3))
        xbits = set_exponent_f64(
            xbits,
            (x_e_m62 & 15)
                .wrapping_add(E_BIAS as i64)
                .wrapping_add(62i64) as u64,
        );
        // 2^62 <= |x_reduced| < 2^(62 + 16) = 2^78
        self.x_reduced = f64::from_bits(xbits);
        // x * c_hi = ph.hi + ph.lo exactly.
        let one_pi = ONE_TWENTY_EIGHT_OVER_PI[self.idx as usize];
        let ph = DoubleDouble::from_exact_mult(self.x_reduced, f64::from_bits(one_pi.0));
        // x * c_mid = pm.hi + pm.lo exactly.
        let pm = DoubleDouble::from_exact_mult(self.x_reduced, f64::from_bits(one_pi.1));
        // x * c_lo = pl.hi + pl.lo exactly.
        let pl = DoubleDouble::from_exact_mult(self.x_reduced, f64::from_bits(one_pi.2));
        // Extract integral parts and fractional parts of (ph.lo + pm.hi).
        let sum_hi = ph.lo + pm.hi;
        let kd = sum_hi.cpu_round();

        // x * 128/pi mod 1 ~ y_hi + y_mid + y_lo
        self.y_hi = (ph.lo - kd) + pm.hi; // Exact
        self.y_mid = DoubleDouble::from_exact_add(pm.lo, pl.hi);
        self.y_lo = pl.lo;

        // y_l = x * c_lo_2 + pl.lo
        let y_l = dd_fmla(self.x_reduced, f64::from_bits(one_pi.3), self.y_lo);
        let mut y = DoubleDouble::from_exact_add(self.y_hi, self.y_mid.hi);
        y.lo += self.y_mid.lo + y_l;

        // Digits of pi/128, generated by SageMath with:
        // import struct
        // from sage.all import *
        //
        // def double_to_hex(f):
        //     return "0x" + format(struct.unpack('<Q', struct.pack('<d', f))[0], '016x')
        //
        // R = RealField(128)
        // π = R.pi()
        //
        // RN = RealField(53)
        //
        // hi = RN(π/128)
        // lo = RN(π/128 - R(hi))
        //
        // print("lo: " + double_to_hex(lo))
        // print("hi: " + double_to_hex(hi))
        const PI_OVER_128_DD: DoubleDouble = DoubleDouble::new(
            f64::from_bits(0x3c31a62633145c07),
            f64::from_bits(0x3f9921fb54442d18),
        );

        // Error bound: with {a} denote the fractional part of a, i.e.:
        //   {a} = a - round(a)
        // Then,
        //   | {x * 128/pi} - (y_hi + y_lo) | <=  ulp(ulp(y_hi)) <= 2^-105
        //   | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-105 = 2^-110
        let u = DoubleDouble::quick_mult(y, PI_OVER_128_DD);

        (kd as i64 as u64, u)
    }
}

// Generated by SageMath
// nwords = 20
// prec = nwords * 64 + 150
// R = RealField(prec)
// invpi = R(1) / (R(2) * R.pi())
//
// scale = R(2)**64
//
// words = []
// x = invpi
// for i in range(nwords):
//     y = floor(x * scale)
//     words.append(int(y))
//     x = x * scale - y
//
// for w in words:
//     print("0x{:016x},".format(w))
static INVPI_2_64: [u64; 20] = [
    0x28be60db9391054a,
    0x7f09d5f47d4d3770,
    0x36d8a5664f10e410,
    0x7f9458eaf7aef158,
    0x6dc91b8e909374b8,
    0x1924bba82746487,
    0x3f877ac72c4a69cf,
    0xba208d7d4baed121,
    0x3a671c09ad17df90,
    0x4e64758e60d4ce7d,
    0x272117e2ef7e4a0e,
    0xc7fe25fff7816603,
    0xfbcbc462d6829b47,
    0xdb4d9fb3c9f2c26d,
    0xd3d18fd9a797fa8b,
    0x5d49eeb1faf97c5e,
    0xcf41ce7de294a4ba,
    0x9afed7ec47e35742,
    0x1580cc11bf1edaea,
    0xfc33ef0826bd0d87,
];

#[inline]
fn create_dd(c1: u64, c0: u64) -> DoubleDouble {
    let mut c1 = c1;
    let mut c0 = c0;
    if c1 != 0 {
        let e = c1.leading_zeros();
        if e != 0 {
            c1 = (c1 << e) | (c0 >> (64 - e));
            c0 = c0.wrapping_shl(e);
        }
        let f = 0x3fe - e;
        let t_u = ((f as u64) << 52) | ((c1 << 1) >> 12);
        let hi = f64::from_bits(t_u);
        c0 = (c1 << 53) | (c0 >> 11);
        let l = if c0 != 0 {
            let g = c0.leading_zeros();
            if (g) != 0 {
                c0 = c0.wrapping_shl(g);
            }
            let t_u = (((f - 53 - g) as u64) << 52) | ((c0 << 1) >> 12);
            f64::from_bits(t_u)
        } else {
            0.
        };
        DoubleDouble::new(l, hi)
    } else if c0 != 0 {
        let e = c0.leading_zeros();
        let f = 0x3fe - 64 - e;
        c0 = c0.wrapping_shl(e + 1); // most significant bit shifted out

        /* put the upper 52 bits of c0 into h */
        let t_u = ((f as u64) << 52) | (c0 >> 12);
        let hi = f64::from_bits(t_u);
        /* put the lower 12 bits of c0 into l */
        c0 = c0.wrapping_shl(52);
        let l = if c0 != 0 {
            let g = c0.leading_zeros();
            c0 = c0.wrapping_shl(g + 1);
            let t_u = (((f - 64 - g) as u64) << 52) | (c0 >> 12);
            f64::from_bits(t_u)
        } else {
            0.
        };
        DoubleDouble::new(l, hi)
    } else {
        DoubleDouble::default()
    }
}

#[inline]
fn frac_2pi(x: f64) -> DoubleDouble {
    if x <= f64::from_bits(0x401921fb54442d17)
    // x < 2*pi
    {
        /* | CH+CL - 1/(2pi) | < 2^-110.523 */
        const C: DoubleDouble = DoubleDouble::new(
            f64::from_bits(0xbc66b01ec5417056),
            f64::from_bits(0x3fc45f306dc9c883),
        );
        let mut z = DoubleDouble::quick_mult_f64(C, x);
        z.lo = f_fmla(C.lo, x, z.lo);
        z
    } else
    // x > 0x1.921fb54442d17p+2
    {
        let t = x.to_bits();
        let mut e = ((t >> 52) & 0x7ff) as i32; /* 1025 <= e <= 2046 */
        let m = (1u64 << 52) | (t & 0xfffffffffffffu64);
        let mut c0: u64;
        let mut c1: u64;
        let mut c2: u64;
        // x = m/2^53 * 2^(e-1022)
        if e <= 1074
        // 1025 <= e <= 1074: 2^2 <= x < 2^52
        {
            let mut u = m as u128 * INVPI_2_64[1] as u128;
            c0 = u as u64;
            c1 = (u >> 64) as u64;
            u = m as u128 * INVPI_2_64[0] as u128;
            c1 = c1.wrapping_add(u as u64);
            c2 = (u >> 64) as u64 + (c1 < (u as u64)) as u64;
            e = 1075 - e; // 1 <= e <= 50
        } else
        // 1075 <= e <= 2046, 2^52 <= x < 2^1024
        {
            let i = (e - 1138 + 63) / 64; // i = ceil((e-1138)/64), 0 <= i <= 15
            let mut u = m as u128 * INVPI_2_64[i as usize + 2] as u128;
            c0 = u as u64;
            c1 = (u >> 64) as u64;
            u = m as u128 * INVPI_2_64[i as usize + 1] as u128;
            c1 = c1.wrapping_add(u as u64);
            c2 = (u >> 64) as u64 + ((c1) < (u as u64)) as u64;
            u = m as u128 * INVPI_2_64[i as usize] as u128;
            c2 = c2.wrapping_add(u as u64);
            e = 1139 + (i << 6) - e; // 1 <= e <= 64
        }
        if e == 64 {
            c0 = c1;
            c1 = c2;
        } else {
            c0 = (c1 << (64 - e)) | c0 >> e;
            c1 = (c2 << (64 - e)) | c1 >> e;
        }
        create_dd(c1, c0)
    }
}

/// Returns x mod 2*PI
#[inline]
pub(crate) fn rem2pi_any(x: f64) -> AngleReduced {
    const TWO_PI: DoubleDouble = DoubleDouble::new(
        f64::from_bits(0x3cb1a62633145c07),
        f64::from_bits(0x401921fb54442d18),
    );
    let a = frac_2pi(x);
    let z = DoubleDouble::quick_mult(a, TWO_PI);
    AngleReduced { angle: z }
}

/**
Generated by SageMath:
```python
def triple_double_split(x, n):
    VR = RealField(1000)
    R32 = RealField(n)
    hi = R32(x)
    rem1 = x - VR(hi)
    med = R32(rem1)
    rem2 = rem1 - VR(med)
    lo = R32(rem2)
    rem2 = rem1 - VR(med)
    lo = R32(rem2)
    return (hi, med, lo)

hi, med, lo = triple_double_split((RealField(600).pi() * RealField(600)(2)), 51)

print(double_to_hex(hi))
print(double_to_hex(med))
print(double_to_hex(lo))
```
 **/
#[inline]
fn rem2pif_small(x: f32) -> f64 {
    const ONE_OVER_PI_2: f64 = f64::from_bits(0x3fc45f306dc9c883);
    const TWO_PI: [u64; 3] = [0x401921fb54440000, 0x3da68c234c4c0000, 0x3b498a2e03707345];
    let dx = x as f64;
    let kd = (dx * ONE_OVER_PI_2).cpu_round();
    let mut y = dd_fmla(-kd, f64::from_bits(TWO_PI[0]), dx);
    y = dd_fmla(-kd, f64::from_bits(TWO_PI[1]), y);
    y = dd_fmla(-kd, f64::from_bits(TWO_PI[2]), y);
    y
}

#[inline]
pub(crate) fn rem2pif_any(x: f32) -> f64 {
    let x_abs = x.abs();
    if x_abs.to_bits() < 0x5600_0000u32 {
        rem2pif_small(x_abs)
    } else {
        let p = rem2pi_any(x_abs as f64);
        p.angle.to_f64()
    }
}

pub(crate) fn frac2pi_d128(x: DyadicFloat128) -> DyadicFloat128 {
    let e = x.biased_exponent();

    let mut fe = x;

    if e <= 1
    // |X| < 2
    {
        /* multiply by T[0]/2^64 + T[1]/2^128, where
        |T[0]/2^64 + T[1]/2^128 - 1/(2pi)| < 2^-130.22 */
        let mut x_hi = (x.mantissa >> 64) as u64;
        let mut x_lo: u64;
        let mut u: u128 = x_hi as u128 * INVPI_2_64[1] as u128;
        let tiny: u64 = u as u64;
        x_lo = (u >> 64) as u64;
        u = x_hi as u128 * INVPI_2_64[0] as u128;
        x_lo = x_lo.wrapping_add(u as u64);
        x_hi = (u >> 64) as u64 + (x_lo < u as u64) as u64;
        /* hi + lo/2^64 + tiny/2^128 = hi_in * (T[0]/2^64 + T[1]/2^128) thus
        |hi + lo/2^64 + tiny/2^128 - hi_in/(2*pi)| < hi_in * 2^-130.22
        Since X is normalized at input, hi_in >= 2^63, and since T[0] >= 2^61,
        we have hi >= 2^(63+61-64) = 2^60, thus the normalize() below
        perform a left shift by at most 3 bits */
        let mut e = x.exponent;
        fe.mantissa = (x_hi as u128).wrapping_shl(64) | (x_lo as u128);
        fe.normalize();
        e -= fe.exponent;
        // put the upper e bits of tiny into X->lo
        if (e) != 0 {
            x_hi = (fe.mantissa >> 64) as u64;
            x_lo = (fe.mantissa & 0xffff_ffff_ffff_ffff) as u64;
            x_lo |= tiny >> (64 - e);
            fe.mantissa = (x_hi as u128).wrapping_shl(64) | (x_lo as u128);
        }
        /* The error is bounded by 2^-130.22 (relative) + ulp(lo) (absolute).
           Since now X->hi >= 2^63, the absolute error of ulp(lo) converts into
           a relative error of less than 2^-127.
           This yields a maximal relative error of:
           (1 + 2^-130.22) * (1 + 2^-127) - 1 < 2^-126.852.
        */
        return fe;
    }

    // now 2 <= e <= 1024

    /* The upper 64-bit word X->hi corresponds to hi/2^64*2^e, if multiplied by
    T[i]/2^((i+1)*64) it yields hi*T[i]/2^128 * 2^(e-i*64).
    If e-64i <= -128, it contributes to less than 2^-128;
    if e-64i >= 128, it yields an integer, which is 0 modulo 1.
    We thus only consider the values of i such that -127 <= e-64i <= 127,
    i.e., (-127+e)/64 <= i <= (127+e)/64.
    Up to 4 consecutive values of T[i] can contribute (only 3 when e is a
    multiple of 64). */
    let i = (if e < 127 { 0 } else { (e - 127 + 64 - 1) / 64 }) as usize; // ceil((e-127)/64)
    // 0 <= i <= 15
    let mut c: [u64; 5] = [0u64; 5];

    let mut x_hi = (x.mantissa >> 64) as u64;
    let mut x_lo: u64;

    let mut u: u128 = x_hi as u128 * INVPI_2_64[i + 3] as u128; // i+3 <= 18
    c[0] = u as u64;
    c[1] = (u >> 64) as u64;
    u = x_hi as u128 * INVPI_2_64[i + 2] as u128;
    c[1] = c[1].wrapping_add(u as u64);
    c[2] = (u >> 64) as u64 + (c[1] < u as u64) as u64;
    u = x_hi as u128 * INVPI_2_64[i + 1] as u128;
    c[2] = c[2].wrapping_add(u as u64);
    c[3] = (u >> 64) as u64 + (c[2] < u as u64) as u64;
    u = x_hi as u128 * INVPI_2_64[i] as u128;
    c[3] = c[3].wrapping_add(u as u64);
    c[4] = (u >> 64) as u64 + (c[3] < u as u64) as u64;

    let f = e as i32 - 64 * i as i32; // hi*T[i]/2^128 is multiplied by 2^f

    /* {c, 5} = hi*(T[i]+T[i+1]/2^64+T[i+2]/2^128+T[i+3]/2^192) */
    /* now shift c[0..4] by f bits to the left */
    let tiny;
    if f < 64 {
        x_hi = (c[4] << f) | (c[3] >> (64 - f));
        x_lo = (c[3] << f) | (c[2] >> (64 - f));
        tiny = (c[2] << f) | (c[1] >> (64 - f));
        /* the ignored part was less than 1 in c[1],
        thus less than 2^(f-64) <= 1/2 in tiny */
    } else if f == 64 {
        x_hi = c[3];
        x_lo = c[2];
        tiny = c[1];
        /* the ignored part was less than 1 in c[1],
        thus less than 1 in tiny */
    } else
    /* 65 <= f <= 127: this case can only occur when e >= 65 */
    {
        let g = f - 64; /* 1 <= g <= 63 */
        /* we compute an extra term */
        u = x_hi as u128 * INVPI_2_64[i + 4] as u128; // i+4 <= 19
        u >>= 64;
        c[0] = c[0].wrapping_add(u as u64);
        c[1] += (c[0] < u as u64) as u64;
        c[2] += ((c[0] < u as u64) && c[1] == 0) as u64;
        c[3] += ((c[0] < u as u64) && c[1] == 0 && c[2] == 0) as u64;
        c[4] += ((c[0] < u as u64) && c[1] == 0 && c[2] == 0 && c[3] == 0) as u64;
        x_hi = (c[3] << g) | (c[2] >> (64 - g));
        x_lo = (c[2] << g) | (c[1] >> (64 - g));
        tiny = (c[1] << g) | (c[0] >> (64 - g));
        /* the ignored part was less than 1 in c[0],
        thus less than 1/2 in tiny */
    }
    let mut fe = x;
    fe.exponent = -127;
    fe.mantissa = (x_hi as u128).wrapping_shl(64) | (x_lo as u128);
    fe.normalize();
    let ze = fe.biased_exponent();
    if ze < 0 {
        x_hi = (fe.mantissa >> 64) as u64;
        x_lo = (fe.mantissa & 0xffff_ffff_ffff_ffff) as u64;
        x_lo |= tiny >> (64 + ze);
        fe.mantissa = (x_hi as u128).wrapping_shl(64) | (x_lo as u128);
    }
    fe
}

pub(crate) fn rem2pi_f128(x: DyadicFloat128) -> DyadicFloat128 {
    /*
        Generated by SageMath:
        ```python
    def double_to_hex(f):
        # Converts Python float (f64) to hex string
        packed = struct.pack('>d', float(f))
        return '0x' + packed.hex()

    def format_dyadic_hex(value):
        l = hex(value)[2:]
        n = 8
        x = [l[i:i + n] for i in range(0, len(l), n)]
        return "0x" + "_".join(x) + "_u128"

    def print_dyadic(value):
        (s, m, e) = RealField(128)(value).sign_mantissa_exponent();
        print("DyadicFloat128 {")
        print(f"    sign: DyadicSign::{'Pos' if s >= 0 else 'Neg'},")
        print(f"    exponent: {e},")
        print(f"    mantissa: {format_dyadic_hex(m)},")
        print("},")

    print_dyadic(RealField(300).pi() * 2)
        ```
         */
    const TWO_PI: DyadicFloat128 = DyadicFloat128 {
        sign: DyadicSign::Pos,
        exponent: -125,
        mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
    };
    let frac2pi = frac2pi_d128(x);
    TWO_PI * frac2pi
}
//
// #[cfg(test)]
// mod tests {
//     use crate::dyadic_float::DyadicFloat128;
//     use crate::sincos_reduce::{frac_2pi, frac2pi_d128};
//
//     #[test]
//     fn test_reduce() {
//         let x = 1.8481363e36f32;
//         let reduced = frac_2pi(x as f64);
//         let to_reduced2 = DyadicFloat128::new_from_f64(x as f64);
//         let reduced2 = frac2pi_d128(to_reduced2);
//         println!("{:?}", reduced);
//         println!("{:?}", reduced2.fast_as_f64());
//     }
// }

Coverage Report

Created: 2026-05-16 07:04

Line	Count	Source
1		/*
2		* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3		* //
4		* // Redistribution and use in source and binary forms, with or without modification,
5		* // are permitted provided that the following conditions are met:
6		* //
7		* // 1. Redistributions of source code must retain the above copyright notice, this
8		* // list of conditions and the following disclaimer.
9		* //
10		* // 2. Redistributions in binary form must reproduce the above copyright notice,
11		* // this list of conditions and the following disclaimer in the documentation
12		* // and/or other materials provided with the distribution.
13		* //
14		* // 3. Neither the name of the copyright holder nor the names of its
15		* // contributors may be used to endorse or promote products derived from
16		* // this software without specific prior written permission.
17		* //
18		* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19		* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20		* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21		* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22		* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23		* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24		* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25		* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26		* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27		* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28		*/
29		use crate::bits::set_exponent_f64;
30		use crate::common::{dd_fmla, f_fmla};
31		use crate::double_double::DoubleDouble;
32		use crate::dyadic_float::{DyadicFloat128, DyadicSign};
33		use crate::rounding::CpuRound;
34		use crate::sincos_reduce_tables::ONE_TWENTY_EIGHT_OVER_PI;
35
36		#[derive(Debug)]
37		pub(crate) struct AngleReduced {
38		pub(crate) angle: DoubleDouble,
39		}
40
41		#[derive(Default)]
42		pub(crate) struct LargeArgumentReduction {
43		x_reduced: f64,
44		idx: u64,
45		y_hi: f64,
46		y_lo: f64,
47		// Low part of x * ONE_TWENTY_EIGHT_OVER_PI[idx][1].
48		y_mid: DoubleDouble,
49		}
50
51		// For large range \|x\| >= 2^16, we perform the range reduction computations as:
52		// u = x - k * pi/128 = (pi/128) * (x * (128/pi) - k).
53		// We use the exponent of x to find 4 double-chunks of 128/pi:
54		// c_hi, c_mid, c_lo, c_lo_2 such that:
55		// 1) ulp(round(x * c_hi, D, RN)) >= 2^8 = 256,
56		// 2) If x * c_hi = ph_hi + ph_lo and x * c_mid = pm_hi + pm_lo, then
57		// min(ulp(ph_lo), ulp(pm_hi)) >= 2^-53.
58		// This will allow us to drop the high part ph_hi and the addition:
59		// (ph_lo + pm_hi) mod 1
60		// can be exactly representable in a double precision.
61		// This will allow us to do split the computations as:
62		// (x * 256/pi) ~ x * (c_hi + c_mid + c_lo + c_lo_2) (mod 256)
63		// ~ (ph_lo + pm_hi) + (pm_lo + x * c_lo) + x * c_lo_2.
64		// Then,
65		// round(x * 128/pi) = round(ph_lo + pm_hi) (mod 256)
66		// And the high part of fractional part of (x * 128/pi) can simply be:
67		// {x * 128/pi}_hi = {ph_lo + pm_hi}.
68		// To prevent overflow when x is very large, we simply scale up
69		// (c_hi, c_mid, c_lo, c_lo_2) by a fixed power of 2 (based on the index) and
70		// scale down x by the same amount.
71		impl LargeArgumentReduction {
72		#[cold]
73	0	pub(crate) fn accurate(&self) -> DyadicFloat128 {
74		// Sage math:
75		// R = RealField(128)
76		// π = R.pi()
77		//
78		// def format_hex(value):
79		// l = hex(value)[2:]
80		// n = 4
81		// x = [l[i:i + n] for i in range(0, len(l), n)]
82		// return "0x" + "_".join(x) + "_u128"
83		//
84		// def print_dyadic(value):
85		// (s, m, e) = RealField(128)(value).sign_mantissa_exponent();
86		// print("DyadicFloat128 {")
87		// print(f" sign: DyadicSign::{'Pos' if s >= 0 else 'Neg'},")
88		// print(f" exponent: {e},")
89		// print(f" mantissa: {format_hex(m)},")
90		// print("};")
91		//
92		// print_dyadic(π/128)
93		const PI_OVER_128_F128: DyadicFloat128 = DyadicFloat128 {
94		sign: DyadicSign::Pos,
95		exponent: -133,
96		mantissa: 0xc90f_daa2_2168_c234_c4c6_628b_80dc_1cd1_u128,
97		};
98
99		// y_lo = x * c_lo + pm.lo
100	0	let one_pi_rot = ONE_TWENTY_EIGHT_OVER_PI[self.idx as usize];
101	0	let y_lo_0 = DyadicFloat128::new_from_f64(self.x_reduced * f64::from_bits(one_pi_rot.3));
102	0	let y_lo_1 = DyadicFloat128::new_from_f64(self.y_lo) + y_lo_0;
103	0	let y_mid_f128 = DyadicFloat128::new_from_f64(self.y_mid.lo) + y_lo_1;
104	0	let y_hi_f128 =
105	0	DyadicFloat128::new_from_f64(self.y_hi) + DyadicFloat128::new_from_f64(self.y_mid.hi);
106	0	let y = y_hi_f128 + y_mid_f128;
107
108	0	y * PI_OVER_128_F128
109	0	}
110
111	0	pub(crate) fn reduce(&mut self, x: f64) -> (u64, DoubleDouble) {
112		const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
113	0	let mut xbits = x.to_bits();
114	0	let x_e = ((x.to_bits() >> 52) & 0x7ff) as i64;
115	0	let x_e_m62 = x_e.wrapping_sub(E_BIAS as i64 + 62);
116	0	self.idx = (x_e_m62 >> 4).wrapping_add(3) as u64;
117		// Scale x down by 2^(-(16 * (idx - 3))
118	0	xbits = set_exponent_f64(
119	0	xbits,
120	0	(x_e_m62 & 15)
121	0	.wrapping_add(E_BIAS as i64)
122	0	.wrapping_add(62i64) as u64,
123	0	);
124		// 2^62 <= \|x_reduced\| < 2^(62 + 16) = 2^78
125	0	self.x_reduced = f64::from_bits(xbits);
126		// x * c_hi = ph.hi + ph.lo exactly.
127	0	let one_pi = ONE_TWENTY_EIGHT_OVER_PI[self.idx as usize];
128	0	let ph = DoubleDouble::from_exact_mult(self.x_reduced, f64::from_bits(one_pi.0));
129		// x * c_mid = pm.hi + pm.lo exactly.
130	0	let pm = DoubleDouble::from_exact_mult(self.x_reduced, f64::from_bits(one_pi.1));
131		// x * c_lo = pl.hi + pl.lo exactly.
132	0	let pl = DoubleDouble::from_exact_mult(self.x_reduced, f64::from_bits(one_pi.2));
133		// Extract integral parts and fractional parts of (ph.lo + pm.hi).
134	0	let sum_hi = ph.lo + pm.hi;
135	0	let kd = sum_hi.cpu_round();
136
137		// x * 128/pi mod 1 ~ y_hi + y_mid + y_lo
138	0	self.y_hi = (ph.lo - kd) + pm.hi; // Exact
139	0	self.y_mid = DoubleDouble::from_exact_add(pm.lo, pl.hi);
140	0	self.y_lo = pl.lo;
141
142		// y_l = x * c_lo_2 + pl.lo
143	0	let y_l = dd_fmla(self.x_reduced, f64::from_bits(one_pi.3), self.y_lo);
144	0	let mut y = DoubleDouble::from_exact_add(self.y_hi, self.y_mid.hi);
145	0	y.lo += self.y_mid.lo + y_l;
146
147		// Digits of pi/128, generated by SageMath with:
148		// import struct
149		// from sage.all import *
150		//
151		// def double_to_hex(f):
152		// return "0x" + format(struct.unpack('<Q', struct.pack('<d', f))[0], '016x')
153		//
154		// R = RealField(128)
155		// π = R.pi()
156		//
157		// RN = RealField(53)
158		//
159		// hi = RN(π/128)
160		// lo = RN(π/128 - R(hi))
161		//
162		// print("lo: " + double_to_hex(lo))
163		// print("hi: " + double_to_hex(hi))
164		const PI_OVER_128_DD: DoubleDouble = DoubleDouble::new(
165		f64::from_bits(0x3c31a62633145c07),
166		f64::from_bits(0x3f9921fb54442d18),
167		);
168
169		// Error bound: with {a} denote the fractional part of a, i.e.:
170		// {a} = a - round(a)
171		// Then,
172		// \| {x * 128/pi} - (y_hi + y_lo) \| <= ulp(ulp(y_hi)) <= 2^-105
173		// \| {x mod pi/128} - (u.hi + u.lo) \| < 2 * 2^-6 * 2^-105 = 2^-110
174	0	let u = DoubleDouble::quick_mult(y, PI_OVER_128_DD);
175
176	0	(kd as i64 as u64, u)
177	0	}
178		}
179
180		// Generated by SageMath
181		// nwords = 20
182		// prec = nwords * 64 + 150
183		// R = RealField(prec)
184		// invpi = R(1) / (R(2) * R.pi())
185		//
186		// scale = R(2)**64
187		//
188		// words = []
189		// x = invpi
190		// for i in range(nwords):
191		// y = floor(x * scale)
192		// words.append(int(y))
193		// x = x * scale - y
194		//
195		// for w in words:
196		// print("0x{:016x},".format(w))
197		static INVPI_2_64: [u64; 20] = [
198		0x28be60db9391054a,
199		0x7f09d5f47d4d3770,
200		0x36d8a5664f10e410,
201		0x7f9458eaf7aef158,
202		0x6dc91b8e909374b8,
203		0x1924bba82746487,
204		0x3f877ac72c4a69cf,
205		0xba208d7d4baed121,
206		0x3a671c09ad17df90,
207		0x4e64758e60d4ce7d,
208		0x272117e2ef7e4a0e,
209		0xc7fe25fff7816603,
210		0xfbcbc462d6829b47,
211		0xdb4d9fb3c9f2c26d,
212		0xd3d18fd9a797fa8b,
213		0x5d49eeb1faf97c5e,
214		0xcf41ce7de294a4ba,
215		0x9afed7ec47e35742,
216		0x1580cc11bf1edaea,
217		0xfc33ef0826bd0d87,
218		];
219
220		#[inline]
221	0	fn create_dd(c1: u64, c0: u64) -> DoubleDouble {
222	0	let mut c1 = c1;
223	0	let mut c0 = c0;
224	0	if c1 != 0 {
225	0	let e = c1.leading_zeros();
226	0	if e != 0 {
227	0	c1 = (c1 << e) \| (c0 >> (64 - e));
228	0	c0 = c0.wrapping_shl(e);
229	0	}
230	0	let f = 0x3fe - e;
231	0	let t_u = ((f as u64) << 52) \| ((c1 << 1) >> 12);
232	0	let hi = f64::from_bits(t_u);
233	0	c0 = (c1 << 53) \| (c0 >> 11);
234	0	let l = if c0 != 0 {
235	0	let g = c0.leading_zeros();
236	0	if (g) != 0 {
237	0	c0 = c0.wrapping_shl(g);
238	0	}
239	0	let t_u = (((f - 53 - g) as u64) << 52) \| ((c0 << 1) >> 12);
240	0	f64::from_bits(t_u)
241		} else {
242	0	0.
243		};
244	0	DoubleDouble::new(l, hi)
245	0	} else if c0 != 0 {
246	0	let e = c0.leading_zeros();
247	0	let f = 0x3fe - 64 - e;
248	0	c0 = c0.wrapping_shl(e + 1); // most significant bit shifted out
249
250		/* put the upper 52 bits of c0 into h */
251	0	let t_u = ((f as u64) << 52) \| (c0 >> 12);
252	0	let hi = f64::from_bits(t_u);
253		/* put the lower 12 bits of c0 into l */
254	0	c0 = c0.wrapping_shl(52);
255	0	let l = if c0 != 0 {
256	0	let g = c0.leading_zeros();
257	0	c0 = c0.wrapping_shl(g + 1);
258	0	let t_u = (((f - 64 - g) as u64) << 52) \| (c0 >> 12);
259	0	f64::from_bits(t_u)
260		} else {
261	0	0.
262		};
263	0	DoubleDouble::new(l, hi)
264		} else {
265	0	DoubleDouble::default()
266		}
267	0	}
268
269		#[inline]
270	0	fn frac_2pi(x: f64) -> DoubleDouble {
271	0	if x <= f64::from_bits(0x401921fb54442d17)
272		// x < 2*pi
273		{
274		/* \| CH+CL - 1/(2pi) \| < 2^-110.523 */
275		const C: DoubleDouble = DoubleDouble::new(
276		f64::from_bits(0xbc66b01ec5417056),
277		f64::from_bits(0x3fc45f306dc9c883),
278		);
279	0	let mut z = DoubleDouble::quick_mult_f64(C, x);
280	0	z.lo = f_fmla(C.lo, x, z.lo);
281	0	z
282		} else
283		// x > 0x1.921fb54442d17p+2
284		{
285	0	let t = x.to_bits();
286	0	let mut e = ((t >> 52) & 0x7ff) as i32; /* 1025 <= e <= 2046 */
287	0	let m = (1u64 << 52) \| (t & 0xfffffffffffffu64);
288		let mut c0: u64;
289		let mut c1: u64;
290		let mut c2: u64;
291		// x = m/2^53 * 2^(e-1022)
292	0	if e <= 1074
293		// 1025 <= e <= 1074: 2^2 <= x < 2^52
294	0	{
295	0	let mut u = m as u128 * INVPI_2_64[1] as u128;
296	0	c0 = u as u64;
297	0	c1 = (u >> 64) as u64;
298	0	u = m as u128 * INVPI_2_64[0] as u128;
299	0	c1 = c1.wrapping_add(u as u64);
300	0	c2 = (u >> 64) as u64 + (c1 < (u as u64)) as u64;
301	0	e = 1075 - e; // 1 <= e <= 50
302	0	} else
303		// 1075 <= e <= 2046, 2^52 <= x < 2^1024
304	0	{
305	0	let i = (e - 1138 + 63) / 64; // i = ceil((e-1138)/64), 0 <= i <= 15
306	0	let mut u = m as u128 * INVPI_2_64[i as usize + 2] as u128;
307	0	c0 = u as u64;
308	0	c1 = (u >> 64) as u64;
309	0	u = m as u128 * INVPI_2_64[i as usize + 1] as u128;
310	0	c1 = c1.wrapping_add(u as u64);
311	0	c2 = (u >> 64) as u64 + ((c1) < (u as u64)) as u64;
312	0	u = m as u128 * INVPI_2_64[i as usize] as u128;
313	0	c2 = c2.wrapping_add(u as u64);
314	0	e = 1139 + (i << 6) - e; // 1 <= e <= 64
315	0	}
316	0	if e == 64 {
317	0	c0 = c1;
318	0	c1 = c2;
319	0	} else {
320	0	c0 = (c1 << (64 - e)) \| c0 >> e;
321	0	c1 = (c2 << (64 - e)) \| c1 >> e;
322	0	}
323	0	create_dd(c1, c0)
324		}
325	0	}
326
327		/// Returns x mod 2*PI
328		#[inline]
329	0	pub(crate) fn rem2pi_any(x: f64) -> AngleReduced {
330		const TWO_PI: DoubleDouble = DoubleDouble::new(
331		f64::from_bits(0x3cb1a62633145c07),
332		f64::from_bits(0x401921fb54442d18),
333		);
334	0	let a = frac_2pi(x);
335	0	let z = DoubleDouble::quick_mult(a, TWO_PI);
336	0	AngleReduced { angle: z }
337	0	}
338
339		/**
340		Generated by SageMath:
341		```python
342		def triple_double_split(x, n):
343		VR = RealField(1000)
344		R32 = RealField(n)
345		hi = R32(x)
346		rem1 = x - VR(hi)
347		med = R32(rem1)
348		rem2 = rem1 - VR(med)
349		lo = R32(rem2)
350		rem2 = rem1 - VR(med)
351		lo = R32(rem2)
352		return (hi, med, lo)
353
354		hi, med, lo = triple_double_split((RealField(600).pi() * RealField(600)(2)), 51)
355
356		print(double_to_hex(hi))
357		print(double_to_hex(med))
358		print(double_to_hex(lo))
359		```
360		**/
361		#[inline]
362	0	fn rem2pif_small(x: f32) -> f64 {
363		const ONE_OVER_PI_2: f64 = f64::from_bits(0x3fc45f306dc9c883);
364		const TWO_PI: [u64; 3] = [0x401921fb54440000, 0x3da68c234c4c0000, 0x3b498a2e03707345];
365	0	let dx = x as f64;
366	0	let kd = (dx * ONE_OVER_PI_2).cpu_round();
367	0	let mut y = dd_fmla(-kd, f64::from_bits(TWO_PI[0]), dx);
368	0	y = dd_fmla(-kd, f64::from_bits(TWO_PI[1]), y);
369	0	y = dd_fmla(-kd, f64::from_bits(TWO_PI[2]), y);
370	0	y
371	0	}
372
373		#[inline]
374	0	pub(crate) fn rem2pif_any(x: f32) -> f64 {
375	0	let x_abs = x.abs();
376	0	if x_abs.to_bits() < 0x5600_0000u32 {
377	0	rem2pif_small(x_abs)
378		} else {
379	0	let p = rem2pi_any(x_abs as f64);
380	0	p.angle.to_f64()
381		}
382	0	}
383
384	0	pub(crate) fn frac2pi_d128(x: DyadicFloat128) -> DyadicFloat128 {
385	0	let e = x.biased_exponent();
386
387	0	let mut fe = x;
388
389	0	if e <= 1
390		// \|X\| < 2
391		{
392		/* multiply by T[0]/2^64 + T[1]/2^128, where
393		\|T[0]/2^64 + T[1]/2^128 - 1/(2pi)\| < 2^-130.22 */
394	0	let mut x_hi = (x.mantissa >> 64) as u64;
395		let mut x_lo: u64;
396	0	let mut u: u128 = x_hi as u128 * INVPI_2_64[1] as u128;
397	0	let tiny: u64 = u as u64;
398	0	x_lo = (u >> 64) as u64;
399	0	u = x_hi as u128 * INVPI_2_64[0] as u128;
400	0	x_lo = x_lo.wrapping_add(u as u64);
401	0	x_hi = (u >> 64) as u64 + (x_lo < u as u64) as u64;
402		/* hi + lo/2^64 + tiny/2^128 = hi_in * (T[0]/2^64 + T[1]/2^128) thus
403		\|hi + lo/2^64 + tiny/2^128 - hi_in/(2pi)\| < hi_in 2^-130.22
404		Since X is normalized at input, hi_in >= 2^63, and since T[0] >= 2^61,
405		we have hi >= 2^(63+61-64) = 2^60, thus the normalize() below
406		perform a left shift by at most 3 bits */
407	0	let mut e = x.exponent;
408	0	fe.mantissa = (x_hi as u128).wrapping_shl(64) \| (x_lo as u128);
409	0	fe.normalize();
410	0	e -= fe.exponent;
411		// put the upper e bits of tiny into X->lo
412	0	if (e) != 0 {
413	0	x_hi = (fe.mantissa >> 64) as u64;
414	0	x_lo = (fe.mantissa & 0xffff_ffff_ffff_ffff) as u64;
415	0	x_lo \|= tiny >> (64 - e);
416	0	fe.mantissa = (x_hi as u128).wrapping_shl(64) \| (x_lo as u128);
417	0	}
418		/* The error is bounded by 2^-130.22 (relative) + ulp(lo) (absolute).
419		Since now X->hi >= 2^63, the absolute error of ulp(lo) converts into
420		a relative error of less than 2^-127.
421		This yields a maximal relative error of:
422		(1 + 2^-130.22) * (1 + 2^-127) - 1 < 2^-126.852.
423		*/
424	0	return fe;
425	0	}
426
427		// now 2 <= e <= 1024
428
429		/* The upper 64-bit word X->hi corresponds to hi/2^64*2^e, if multiplied by
430		T[i]/2^((i+1)64) it yields hiT[i]/2^128 * 2^(e-i*64).
431		If e-64i <= -128, it contributes to less than 2^-128;
432		if e-64i >= 128, it yields an integer, which is 0 modulo 1.
433		We thus only consider the values of i such that -127 <= e-64i <= 127,
434		i.e., (-127+e)/64 <= i <= (127+e)/64.
435		Up to 4 consecutive values of T[i] can contribute (only 3 when e is a
436		multiple of 64). */
437	0	let i = (if e < 127 { 0 } else { (e - 127 + 64 - 1) / 64 }) as usize; // ceil((e-127)/64)
438		// 0 <= i <= 15
439	0	let mut c: [u64; 5] = [0u64; 5];
440
441	0	let mut x_hi = (x.mantissa >> 64) as u64;
442		let mut x_lo: u64;
443
444	0	let mut u: u128 = x_hi as u128 * INVPI_2_64[i + 3] as u128; // i+3 <= 18
445	0	c[0] = u as u64;
446	0	c[1] = (u >> 64) as u64;
447	0	u = x_hi as u128 * INVPI_2_64[i + 2] as u128;
448	0	c[1] = c[1].wrapping_add(u as u64);
449	0	c[2] = (u >> 64) as u64 + (c[1] < u as u64) as u64;
450	0	u = x_hi as u128 * INVPI_2_64[i + 1] as u128;
451	0	c[2] = c[2].wrapping_add(u as u64);
452	0	c[3] = (u >> 64) as u64 + (c[2] < u as u64) as u64;
453	0	u = x_hi as u128 * INVPI_2_64[i] as u128;
454	0	c[3] = c[3].wrapping_add(u as u64);
455	0	c[4] = (u >> 64) as u64 + (c[3] < u as u64) as u64;
456
457	0	let f = e as i32 - 64 * i as i32; // hi*T[i]/2^128 is multiplied by 2^f
458
459		/* {c, 5} = hi(T[i]+T[i+1]/2^64+T[i+2]/2^128+T[i+3]/2^192) /
460		/* now shift c[0..4] by f bits to the left */
461		let tiny;
462	0	if f < 64 {
463	0	x_hi = (c[4] << f) \| (c[3] >> (64 - f));
464	0	x_lo = (c[3] << f) \| (c[2] >> (64 - f));
465	0	tiny = (c[2] << f) \| (c[1] >> (64 - f));
466	0	/* the ignored part was less than 1 in c[1],
467	0	thus less than 2^(f-64) <= 1/2 in tiny */
468	0	} else if f == 64 {
469	0	x_hi = c[3];
470	0	x_lo = c[2];
471	0	tiny = c[1];
472	0	/* the ignored part was less than 1 in c[1],
473	0	thus less than 1 in tiny */
474	0	} else
475		/* 65 <= f <= 127: this case can only occur when e >= 65 */
476		{
477	0	let g = f - 64; /* 1 <= g <= 63 */
478		/* we compute an extra term */
479	0	u = x_hi as u128 * INVPI_2_64[i + 4] as u128; // i+4 <= 19
480	0	u >>= 64;
481	0	c[0] = c[0].wrapping_add(u as u64);
482	0	c[1] += (c[0] < u as u64) as u64;
483	0	c[2] += ((c[0] < u as u64) && c[1] == 0) as u64;
484	0	c[3] += ((c[0] < u as u64) && c[1] == 0 && c[2] == 0) as u64;
485	0	c[4] += ((c[0] < u as u64) && c[1] == 0 && c[2] == 0 && c[3] == 0) as u64;
486	0	x_hi = (c[3] << g) \| (c[2] >> (64 - g));
487	0	x_lo = (c[2] << g) \| (c[1] >> (64 - g));
488	0	tiny = (c[1] << g) \| (c[0] >> (64 - g));
489		/* the ignored part was less than 1 in c[0],
490		thus less than 1/2 in tiny */
491		}
492	0	let mut fe = x;
493	0	fe.exponent = -127;
494	0	fe.mantissa = (x_hi as u128).wrapping_shl(64) \| (x_lo as u128);
495	0	fe.normalize();
496	0	let ze = fe.biased_exponent();
497	0	if ze < 0 {
498	0	x_hi = (fe.mantissa >> 64) as u64;
499	0	x_lo = (fe.mantissa & 0xffff_ffff_ffff_ffff) as u64;
500	0	x_lo \|= tiny >> (64 + ze);
501	0	fe.mantissa = (x_hi as u128).wrapping_shl(64) \| (x_lo as u128);
502	0	}
503	0	fe
504	0	}
505
506	0	pub(crate) fn rem2pi_f128(x: DyadicFloat128) -> DyadicFloat128 {
507		/*
508		Generated by SageMath:
509		```python
510		def double_to_hex(f):
511		# Converts Python float (f64) to hex string
512		packed = struct.pack('>d', float(f))
513		return '0x' + packed.hex()
514
515		def format_dyadic_hex(value):
516		l = hex(value)[2:]
517		n = 8
518		x = [l[i:i + n] for i in range(0, len(l), n)]
519		return "0x" + "_".join(x) + "_u128"
520
521		def print_dyadic(value):
522		(s, m, e) = RealField(128)(value).sign_mantissa_exponent();
523		print("DyadicFloat128 {")
524		print(f" sign: DyadicSign::{'Pos' if s >= 0 else 'Neg'},")
525		print(f" exponent: {e},")
526		print(f" mantissa: {format_dyadic_hex(m)},")
527		print("},")
528
529		print_dyadic(RealField(300).pi() * 2)
530		```
531		*/
532		const TWO_PI: DyadicFloat128 = DyadicFloat128 {
533		sign: DyadicSign::Pos,
534		exponent: -125,
535		mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
536		};
537	0	let frac2pi = frac2pi_d128(x);
538	0	TWO_PI * frac2pi
539	0	}
540		//
541		// #[cfg(test)]
542		// mod tests {
543		// use crate::dyadic_float::DyadicFloat128;
544		// use crate::sincos_reduce::{frac_2pi, frac2pi_d128};
545		//
546		// #[test]
547		// fn test_reduce() {
548		// let x = 1.8481363e36f32;
549		// let reduced = frac_2pi(x as f64);
550		// let to_reduced2 = DyadicFloat128::new_from_f64(x as f64);
551		// let reduced2 = frac2pi_d128(to_reduced2);
552		// println!("{:?}", reduced);
553		// println!("{:?}", reduced2.fast_as_f64());
554		// }
555		// }