/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/exponents/exp2.rs

Source
/*
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::{dd_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::exponents::auxiliary::fast_ldexp;
use crate::exponents::exp::{EXP_REDUCE_T0, EXP_REDUCE_T1, to_denormal};
use crate::rounding::CpuRoundTiesEven;

#[inline]
fn exp2_poly_dd(z: f64) -> DoubleDouble {
    const C: [(u64, u64); 6] = [
        (0x3bbabc9e3b39873e, 0x3f262e42fefa39ef),
        (0xbae5e43a53e44950, 0x3e4ebfbdff82c58f),
        (0xba0d3a15710d3d83, 0x3d6c6b08d704a0c0),
        (0x3914dd5d2a5e025a, 0x3c83b2ab6fba4e77),
        (0xb83dc47e47beb9dd, 0x3b95d87fe7a66459),
        (0xb744fcd51fcb7640, 0x3aa430912f9fb79d),
    ];

    let mut r = DoubleDouble::quick_mul_f64_add(
        DoubleDouble::from_bit_pair(C[5]),
        z,
        DoubleDouble::from_bit_pair(C[4]),
    );
    r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[3]));
    r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[2]));
    r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[1]));
    DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[0]))
}

#[cold]
fn exp2_accurate(x: f64) -> f64 {
    let mut ix = x.to_bits();
    let sx = 4096.0 * x;
    let fx = sx.cpu_round_ties_even();
    let z = sx - fx;
    let k: i64 = unsafe {
        fx.to_int_unchecked::<i64>() // this is already finite here
    };
    let i1 = k & 0x3f;
    let i0 = (k >> 6) & 0x3f;
    let ie = k >> 12;

    let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
    let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
    let dt = DoubleDouble::quick_mult(t0, t1);

    let mut f = exp2_poly_dd(z);
    f = DoubleDouble::quick_mult_f64(f, z);
    if ix <= 0xc08ff00000000000u64 {
        // x >= -1022
        // for -0x1.71547652b82fep-54 <= x <= 0x1.71547652b82fdp-53,
        // exp2(x) round to x to nearest
        if f64::from_bits(0xbc971547652b82fe) <= x && x <= f64::from_bits(0x3ca71547652b82fd) {
            return dd_fmla(x, 0.5, 1.0);
        } else if (k & 0xfff) == 0 {
            // 4096*x rounds to 4096*integer
            let zf = DoubleDouble::from_exact_add(dt.hi, f.hi);
            let zfl = DoubleDouble::from_exact_add(zf.lo, f.lo);
            f.hi = zf.hi;
            f.lo = zfl.hi;
            ix = zfl.hi.to_bits();
            if ix & 0x000fffffffffffff == 0 {
                // fl is a power of 2
                if ((ix >> 52) & 0x7ff) != 0 {
                    // |fl| is Inf
                    let v = zfl.lo.to_bits();
                    let d: i64 = ((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64)
                        .wrapping_shl(1)
                        .wrapping_add(1) as i64;
                    ix = ix.wrapping_add(d as u64);
                    f.lo = f64::from_bits(ix);
                }
            }
        } else {
            f = DoubleDouble::quick_mult(f, dt);
            f = DoubleDouble::add(dt, f);
        }
        let hf = DoubleDouble::from_exact_add(f.hi, f.lo);

        fast_ldexp(hf.hi, ie as i32)
    } else {
        ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
        f = DoubleDouble::quick_mult(f, dt);
        f = DoubleDouble::add(dt, f);
        let zve = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
        f.hi = zve.hi;
        f.lo += zve.lo;

        to_denormal(f.to_f64())
    }
}

/// Computes exp2
///
/// Max found ULP 0.5
pub fn f_exp2(x: f64) -> f64 {
    let mut ix = x.to_bits();
    let ax = ix.wrapping_shl(1);
    if ax == 0 {
        return 1.0;
    }
    if ax >= 0x8120000000000000u64 {
        // |x| >= 1024
        if ax > 0xffe0000000000000u64 {
            return x + x; // nan
        }
        if ax == 0xffe0000000000000u64 {
            return if (ix >> 63) != 0 { 0.0 } else { x };
        }
        // +/-inf
        if (ix >> 63) != 0 {
            // x <= -1024
            if ix >= 0xc090cc0000000000u64 {
                // x <= -1075
                const Z: f64 = f64::from_bits(0x0010000000000000);
                return Z * Z;
            }
        } else {
            // x >= 1024
            return f64::from_bits(0x7fe0000000000000) * x;
        }
    }

    // for |x| <= 0x1.71547652b82fep-54, 2^x rounds to 1 to nearest
    // this avoids a spurious underflow in z*z below
    if ax <= 0x792e2a8eca5705fcu64 {
        return 1.0 + f64::copysign(f64::from_bits(0x3c90000000000000), x);
    }

    let m = ix.wrapping_shl(12);
    let ex = (ax >> 53).wrapping_sub(0x3ff);
    let frac = ex >> 63 | m << (ex & 63);
    let sx = 4096.0 * x;
    let fx = sx.cpu_round_ties_even();
    let z = sx - fx;
    let z2 = z * z;
    let k = unsafe {
        fx.to_int_unchecked::<i64>() // this already finite here
    };
    let i1 = k & 0x3f;
    let i0 = (k >> 6) & 0x3f;
    let ie = k >> 12;
    let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
    let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
    let ti0 = DoubleDouble::quick_mult(t0, t1);
    const C: [u64; 4] = [
        0x3f262e42fefa39ef,
        0x3e4ebfbdff82c58f,
        0x3d6c6b08d73b3e01,
        0x3c83b2ab6fdda001,
    ];
    let tz = ti0.hi * z;
    let mut fh = ti0.hi;

    let p0 = f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
    let p1 = f_fmla(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
    let p2 = f_fmla(z2, p1, p0);

    let mut fl = f_fmla(tz, p2, ti0.lo);

    const EPS: f64 = f64::from_bits(0x3c0833beace2b6fe);

    if ix <= 0xc08ff00000000000u64 {
        // x >= -1022
        if frac != 0 {
            let ub = fh + (fl + EPS);
            fh += fl - EPS;
            if ub != fh {
                return exp2_accurate(x);
            }
        }
        fh = fast_ldexp(fh, ie as i32);
    } else {
        // subnormal case
        ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
        let rs = DoubleDouble::from_exact_add(f64::from_bits(ix), fh);
        fl += rs.lo;
        fh = rs.hi;
        if frac != 0 {
            let ub = fh + (fl + EPS);
            fh += fl - EPS;
            if ub != fh {
                return exp2_accurate(x);
            }
        }
        // when 2^x is exact, no underflow should be raised
        fh = to_denormal(fh);
    }
    fh
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_exp2d() {
        assert_eq!(f_exp2(2.0), 4.0);
        assert_eq!(f_exp2(3.0), 8.0);
        assert_eq!(f_exp2(4.0), 16.0);
        assert_eq!(f_exp2(0.35f64), 1.2745606273192622);
        assert_eq!(f_exp2(-0.6f64), 0.6597539553864471);
        assert_eq!(f_exp2(f64::INFINITY), f64::INFINITY);
        assert_eq!(f_exp2(f64::NEG_INFINITY), 0.);
        assert!(f_exp2(f64::NAN).is_nan());
    }
}

Coverage Report

Created: 2025-10-14 06:57

Line	Count	Source
1		/*
2		* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3		* //
4		* // Redistribution and use in source and binary forms, with or without modification,
5		* // are permitted provided that the following conditions are met:
6		* //
7		* // 1. Redistributions of source code must retain the above copyright notice, this
8		* // list of conditions and the following disclaimer.
9		* //
10		* // 2. Redistributions in binary form must reproduce the above copyright notice,
11		* // this list of conditions and the following disclaimer in the documentation
12		* // and/or other materials provided with the distribution.
13		* //
14		* // 3. Neither the name of the copyright holder nor the names of its
15		* // contributors may be used to endorse or promote products derived from
16		* // this software without specific prior written permission.
17		* //
18		* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19		* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20		* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21		* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22		* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23		* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24		* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25		* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26		* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27		* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28		*/
29		use crate::common::{dd_fmla, f_fmla};
30		use crate::double_double::DoubleDouble;
31		use crate::exponents::auxiliary::fast_ldexp;
32		use crate::exponents::exp::{EXP_REDUCE_T0, EXP_REDUCE_T1, to_denormal};
33		use crate::rounding::CpuRoundTiesEven;
34
35		#[inline]
36	0	fn exp2_poly_dd(z: f64) -> DoubleDouble {
37		const C: [(u64, u64); 6] = [
38		(0x3bbabc9e3b39873e, 0x3f262e42fefa39ef),
39		(0xbae5e43a53e44950, 0x3e4ebfbdff82c58f),
40		(0xba0d3a15710d3d83, 0x3d6c6b08d704a0c0),
41		(0x3914dd5d2a5e025a, 0x3c83b2ab6fba4e77),
42		(0xb83dc47e47beb9dd, 0x3b95d87fe7a66459),
43		(0xb744fcd51fcb7640, 0x3aa430912f9fb79d),
44		];
45
46	0	let mut r = DoubleDouble::quick_mul_f64_add(
47	0	DoubleDouble::from_bit_pair(C[5]),
48	0	z,
49	0	DoubleDouble::from_bit_pair(C[4]),
50		);
51	0	r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[3]));
52	0	r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[2]));
53	0	r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[1]));
54	0	DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[0]))
55	0	}
56
57		#[cold]
58	0	fn exp2_accurate(x: f64) -> f64 {
59	0	let mut ix = x.to_bits();
60	0	let sx = 4096.0 * x;
61	0	let fx = sx.cpu_round_ties_even();
62	0	let z = sx - fx;
63	0	let k: i64 = unsafe {
64	0	fx.to_int_unchecked::<i64>() // this is already finite here
65		};
66	0	let i1 = k & 0x3f;
67	0	let i0 = (k >> 6) & 0x3f;
68	0	let ie = k >> 12;
69
70	0	let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
71	0	let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
72	0	let dt = DoubleDouble::quick_mult(t0, t1);
73
74	0	let mut f = exp2_poly_dd(z);
75	0	f = DoubleDouble::quick_mult_f64(f, z);
76	0	if ix <= 0xc08ff00000000000u64 {
77		// x >= -1022
78		// for -0x1.71547652b82fep-54 <= x <= 0x1.71547652b82fdp-53,
79		// exp2(x) round to x to nearest
80	0	if f64::from_bits(0xbc971547652b82fe) <= x && x <= f64::from_bits(0x3ca71547652b82fd) {
81	0	return dd_fmla(x, 0.5, 1.0);
82	0	} else if (k & 0xfff) == 0 {
83		// 4096x rounds to 4096integer
84	0	let zf = DoubleDouble::from_exact_add(dt.hi, f.hi);
85	0	let zfl = DoubleDouble::from_exact_add(zf.lo, f.lo);
86	0	f.hi = zf.hi;
87	0	f.lo = zfl.hi;
88	0	ix = zfl.hi.to_bits();
89	0	if ix & 0x000fffffffffffff == 0 {
90		// fl is a power of 2
91	0	if ((ix >> 52) & 0x7ff) != 0 {
92	0	// \|fl\| is Inf
93	0	let v = zfl.lo.to_bits();
94	0	let d: i64 = ((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64)
95	0	.wrapping_shl(1)
96	0	.wrapping_add(1) as i64;
97	0	ix = ix.wrapping_add(d as u64);
98	0	f.lo = f64::from_bits(ix);
99	0	}
100	0	}
101	0	} else {
102	0	f = DoubleDouble::quick_mult(f, dt);
103	0	f = DoubleDouble::add(dt, f);
104	0	}
105	0	let hf = DoubleDouble::from_exact_add(f.hi, f.lo);
106
107	0	fast_ldexp(hf.hi, ie as i32)
108		} else {
109	0	ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
110	0	f = DoubleDouble::quick_mult(f, dt);
111	0	f = DoubleDouble::add(dt, f);
112	0	let zve = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
113	0	f.hi = zve.hi;
114	0	f.lo += zve.lo;
115
116	0	to_denormal(f.to_f64())
117		}
118	0	}
119
120		/// Computes exp2
121		///
122		/// Max found ULP 0.5
123	0	pub fn f_exp2(x: f64) -> f64 {
124	0	let mut ix = x.to_bits();
125	0	let ax = ix.wrapping_shl(1);
126	0	if ax == 0 {
127	0	return 1.0;
128	0	}
129	0	if ax >= 0x8120000000000000u64 {
130		// \|x\| >= 1024
131	0	if ax > 0xffe0000000000000u64 {
132	0	return x + x; // nan
133	0	}
134	0	if ax == 0xffe0000000000000u64 {
135	0	return if (ix >> 63) != 0 { 0.0 } else { x };
136	0	}
137		// +/-inf
138	0	if (ix >> 63) != 0 {
139		// x <= -1024
140	0	if ix >= 0xc090cc0000000000u64 {
141		// x <= -1075
142		const Z: f64 = f64::from_bits(0x0010000000000000);
143	0	return Z * Z;
144	0	}
145		} else {
146		// x >= 1024
147	0	return f64::from_bits(0x7fe0000000000000) * x;
148		}
149	0	}
150
151		// for \|x\| <= 0x1.71547652b82fep-54, 2^x rounds to 1 to nearest
152		// this avoids a spurious underflow in z*z below
153	0	if ax <= 0x792e2a8eca5705fcu64 {
154	0	return 1.0 + f64::copysign(f64::from_bits(0x3c90000000000000), x);
155	0	}
156
157	0	let m = ix.wrapping_shl(12);
158	0	let ex = (ax >> 53).wrapping_sub(0x3ff);
159	0	let frac = ex >> 63 \| m << (ex & 63);
160	0	let sx = 4096.0 * x;
161	0	let fx = sx.cpu_round_ties_even();
162	0	let z = sx - fx;
163	0	let z2 = z * z;
164	0	let k = unsafe {
165	0	fx.to_int_unchecked::<i64>() // this already finite here
166		};
167	0	let i1 = k & 0x3f;
168	0	let i0 = (k >> 6) & 0x3f;
169	0	let ie = k >> 12;
170	0	let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
171	0	let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
172	0	let ti0 = DoubleDouble::quick_mult(t0, t1);
173		const C: [u64; 4] = [
174		0x3f262e42fefa39ef,
175		0x3e4ebfbdff82c58f,
176		0x3d6c6b08d73b3e01,
177		0x3c83b2ab6fdda001,
178		];
179	0	let tz = ti0.hi * z;
180	0	let mut fh = ti0.hi;
181
182	0	let p0 = f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
183	0	let p1 = f_fmla(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
184	0	let p2 = f_fmla(z2, p1, p0);
185
186	0	let mut fl = f_fmla(tz, p2, ti0.lo);
187
188		const EPS: f64 = f64::from_bits(0x3c0833beace2b6fe);
189
190	0	if ix <= 0xc08ff00000000000u64 {
191		// x >= -1022
192	0	if frac != 0 {
193	0	let ub = fh + (fl + EPS);
194	0	fh += fl - EPS;
195	0	if ub != fh {
196	0	return exp2_accurate(x);
197	0	}
198	0	}
199	0	fh = fast_ldexp(fh, ie as i32);
200		} else {
201		// subnormal case
202	0	ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
203	0	let rs = DoubleDouble::from_exact_add(f64::from_bits(ix), fh);
204	0	fl += rs.lo;
205	0	fh = rs.hi;
206	0	if frac != 0 {
207	0	let ub = fh + (fl + EPS);
208	0	fh += fl - EPS;
209	0	if ub != fh {
210	0	return exp2_accurate(x);
211	0	}
212	0	}
213		// when 2^x is exact, no underflow should be raised
214	0	fh = to_denormal(fh);
215		}
216	0	fh
217	0	}
218
219		#[cfg(test)]
220		mod tests {
221		use super::*;
222
223		#[test]
224		fn test_exp2d() {
225		assert_eq!(f_exp2(2.0), 4.0);
226		assert_eq!(f_exp2(3.0), 8.0);
227		assert_eq!(f_exp2(4.0), 16.0);
228		assert_eq!(f_exp2(0.35f64), 1.2745606273192622);
229		assert_eq!(f_exp2(-0.6f64), 0.6597539553864471);
230		assert_eq!(f_exp2(f64::INFINITY), f64::INFINITY);
231		assert_eq!(f_exp2(f64::NEG_INFINITY), 0.);
232		assert!(f_exp2(f64::NAN).is_nan());
233		}
234		}