/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/compound/powm1f.rs

Source
/*
 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::*;
use crate::compound::compound_m1f::compoundf_expf_poly;
use crate::compound::compoundf::{
    COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, LOG2P1_COMPOUNDF_INV, LOG2P1_COMPOUNDF_LOG2_INV,
};
use crate::rounding::CpuRoundTiesEven;
use std::hint::black_box;

#[inline]
fn powm1f_log2_fast(x: f64) -> f64 {
    /* for x > 0, 1+x is exact when 2^-29 <=  x < 2^53
    for x < 0, 1+x is exact when -1 < x <= 2^-30 */

    //  double u = (x >= 0x1p53) ? x : 1.0 + x;
    /* For x < 0x1p53, x + 1 is exact thus u = x+1.
    For x >= 2^53, we estimate log2(x) instead of log2(1+x),
    since log2(1+x) = log2(x) + log2(1+1/x),
    log2(x) >= 53 and |log2(1+1/x)| < 2^-52.471, the additional relative
    error is bounded by 2^-52.471/53 < 2^-58.198 */

    let mut v = x.to_bits();
    let m: u64 = v & 0xfffffffffffffu64;
    let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
    // 2^e/sqrt(2) < u < 2^e*sqrt(2), with -29 <= e <= 128
    v = v.wrapping_sub((e << 52) as u64);
    let t = f64::from_bits(v);
    // u = 2^e*t with 1/sqrt(2) < t < sqrt(2)
    // thus log2(u) = e + log2(t)
    v = (f64::from_bits(v) + 2.0).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
    let i = (v >> 45) as i32 - 0x2002d; // 0 <= i <= 45
    let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
    let z = dd_fmla(r, t, -1.0); // exact, -1/64 <= z <= 1/64
    // we approximates log2(t) by -log2(r) + log2(r*t)
    let p = crate::compound::compoundf::log2p1_polyeval_1(z);
    // p approximates log2(r*t) with rel. error < 2^-49.642, and |p| < 2^-5.459
    e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
}

/// Computes x^y - 1
pub fn f_powm1f(x: f32, y: f32) -> f32 {
    let ax: u32 = x.to_bits().wrapping_shl(1);
    let ay: u32 = y.to_bits().wrapping_shl(1);

    // filter out exceptional cases
    if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
        if x.is_nan() || y.is_nan() {
            return f32::NAN;
        }

        // Handle infinities
        if x.is_infinite() {
            return if x.is_sign_positive() {
                if y.is_infinite() {
                    return f32::INFINITY;
                } else if y > 0.0 {
                    f32::INFINITY // inf^positive -> inf
                } else if y < 0.0 {
                    -1.0 // inf^negative -> 0, so powm1 = -1
                } else {
                    f32::NAN // inf^0 -> NaN (0^0 conventionally 1, inf^0 = NaN)
                }
            } else {
                // x = -inf
                if y.is_infinite() {
                    return -1.0;
                }
                if is_integerf(y) {
                    // Negative base: (-inf)^even = +inf, (-inf)^odd = -inf
                    let pow = if y as i32 % 2 == 0 {
                        f32::INFINITY
                    } else {
                        f32::NEG_INFINITY
                    };
                    pow - 1.0
                } else {
                    f32::NAN // Negative base with non-integer exponent
                }
            };
        }

        // Handle y infinite
        if y.is_infinite() {
            return if x.abs() > 1.0 {
                if y.is_sign_positive() {
                    f32::INFINITY
                } else {
                    -1.0
                }
            } else if x.abs() < 1.0 {
                if y.is_sign_positive() {
                    -1.0
                } else {
                    f32::INFINITY
                }
            } else {
                // |x| == 1
                f32::NAN // 1^inf or -1^inf is undefined
            };
        }

        // Handle zero base
        if x == 0.0 {
            return if y > 0.0 {
                -1.0 // 0^positive -> 0, powm1 = -1
            } else if y < 0.0 {
                f32::INFINITY // 0^negative -> inf
            } else {
                0.0 // 0^0 -> conventionally 1, powm1 = 0
            };
        }
    }

    let y_integer = is_integerf(y);

    let mut negative_parity: bool = false;

    let mut x = x;

    // Handle negative base with non-integer exponent
    if x < 0.0 {
        if !y_integer {
            return f32::NAN; // x < 0 and non-integer y
        }
        x = x.abs();
        if is_odd_integerf(y) {
            negative_parity = true;
        }
    }

    let xd = x as f64;
    let yd = y as f64;
    let tx = xd.to_bits();
    let ty = yd.to_bits();

    let l: f64 = powm1f_log2_fast(f64::from_bits(tx));

    /* l approximates log2(1+x) with relative error < 2^-47.997,
    and 2^-149 <= |l| < 128 */

    let dt = l * f64::from_bits(ty);
    let t: u64 = dt.to_bits();

    // detect overflow/underflow
    if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
        // |t| >= 128
        if t >= 0x3018bu64 << 46 {
            // t <= -150
            return -1.;
        } else if (t >> 63) == 0 {
            // t >= 128: overflow
            return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
        }
    }

    let res = powm1_exp2m1_fast(f64::from_bits(t));
    // For x < 0 and integer y = n:
    // if n is even: x^n = |x|^n → powm1 = |x|^n - 1 (same sign as res).
    // if n is odd: x^n = -|x|^n → powm1 = -|x|^n - 1 = - (|x|^n + 1).
    if negative_parity {
        (-res - 2.) as f32
    } else {
        res as f32
    }
}

#[inline]
pub(crate) fn powm1_exp2m1_fast(t: f64) -> f64 {
    let k = t.cpu_round_ties_even(); // 0 <= |k| <= 150
    let mut r = t - k; // |r| <= 1/2, exact
    let mut v: f64 = 3.015625 + r; // 2.5 <= v <= 3.5015625
    // we add 2^-6 so that i is rounded to nearest
    let i: i32 = (v.to_bits() >> 46) as i32 - 0x10010; // 0 <= i <= 32
    r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
    // now |r| <= 2^-6
    // 2^t = 2^k * exp2_U[i][0] * 2^r
    let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1);
    let su = unsafe {
        k.to_int_unchecked::<i64>().wrapping_shl(52) // k is already integer
    };
    s = f64::from_bits(s.to_bits().wrapping_add(su as u64));
    let q_poly = compoundf_expf_poly(r);
    v = q_poly;

    #[cfg(any(
        all(
            any(target_arch = "x86", target_arch = "x86_64"),
            target_feature = "fma"
        ),
        target_arch = "aarch64"
    ))]
    {
        v = f_fmla(v, s, s - 1f64);
    }
    #[cfg(not(any(
        all(
            any(target_arch = "x86", target_arch = "x86_64"),
            target_feature = "fma"
        ),
        target_arch = "aarch64"
    )))]
    {
        use crate::double_double::DoubleDouble;
        let p0 = DoubleDouble::from_full_exact_add(s, -1.);
        let z = DoubleDouble::from_exact_mult(v, s);
        v = DoubleDouble::add(z, p0).to_f64();
    }

    v
}

#[cfg(test)]
mod tests {
    use super::*;
    #[test]
    fn test_powm1f() {
        assert_eq!(f_powm1f(1.83329e-40, 2.4645883e-32), -2.2550315e-30);
        assert_eq!(f_powm1f(f32::INFINITY, f32::INFINITY), f32::INFINITY);
        assert_eq!(f_powm1f(-3.375, -9671689000000000000000000.), -1.);
        assert_eq!(f_powm1f(3., 2.), 8.);
        assert_eq!(f_powm1f(3., 3.), 26.);
        assert_eq!(f_powm1f(5., 2.), 24.);
        assert_eq!(f_powm1f(5., -2.), 1. / 25. - 1.);
        assert_eq!(f_powm1f(-5., 2.), 24.);
        assert_eq!(f_powm1f(-5., 3.), -126.);
        assert_eq!(
            f_powm1f(196560., 0.000000000000000000000000000000000000001193773),
            1.455057e-38
        );
        assert!(f_powm1f(f32::NAN, f32::INFINITY).is_nan());
        assert!(f_powm1f(f32::INFINITY, f32::NAN).is_nan());
    }
}

Coverage Report

Created: 2025-10-28 08:03

Line	Count	Source
1		/*
2		* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3		* //
4		* // Redistribution and use in source and binary forms, with or without modification,
5		* // are permitted provided that the following conditions are met:
6		* //
7		* // 1. Redistributions of source code must retain the above copyright notice, this
8		* // list of conditions and the following disclaimer.
9		* //
10		* // 2. Redistributions in binary form must reproduce the above copyright notice,
11		* // this list of conditions and the following disclaimer in the documentation
12		* // and/or other materials provided with the distribution.
13		* //
14		* // 3. Neither the name of the copyright holder nor the names of its
15		* // contributors may be used to endorse or promote products derived from
16		* // this software without specific prior written permission.
17		* //
18		* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19		* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20		* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21		* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22		* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23		* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24		* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25		* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26		* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27		* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28		*/
29		use crate::common::*;
30		use crate::compound::compound_m1f::compoundf_expf_poly;
31		use crate::compound::compoundf::{
32		COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, LOG2P1_COMPOUNDF_INV, LOG2P1_COMPOUNDF_LOG2_INV,
33		};
34		use crate::rounding::CpuRoundTiesEven;
35		use std::hint::black_box;
36
37		#[inline]
38	0	fn powm1f_log2_fast(x: f64) -> f64 {
39		/* for x > 0, 1+x is exact when 2^-29 <= x < 2^53
40		for x < 0, 1+x is exact when -1 < x <= 2^-30 */
41
42		// double u = (x >= 0x1p53) ? x : 1.0 + x;
43		/* For x < 0x1p53, x + 1 is exact thus u = x+1.
44		For x >= 2^53, we estimate log2(x) instead of log2(1+x),
45		since log2(1+x) = log2(x) + log2(1+1/x),
46		log2(x) >= 53 and \|log2(1+1/x)\| < 2^-52.471, the additional relative
47		error is bounded by 2^-52.471/53 < 2^-58.198 */
48
49	0	let mut v = x.to_bits();
50	0	let m: u64 = v & 0xfffffffffffffu64;
51	0	let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
52		// 2^e/sqrt(2) < u < 2^e*sqrt(2), with -29 <= e <= 128
53	0	v = v.wrapping_sub((e << 52) as u64);
54	0	let t = f64::from_bits(v);
55		// u = 2^e*t with 1/sqrt(2) < t < sqrt(2)
56		// thus log2(u) = e + log2(t)
57	0	v = (f64::from_bits(v) + 2.0).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
58	0	let i = (v >> 45) as i32 - 0x2002d; // 0 <= i <= 45
59	0	let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
60	0	let z = dd_fmla(r, t, -1.0); // exact, -1/64 <= z <= 1/64
61		// we approximates log2(t) by -log2(r) + log2(r*t)
62	0	let p = crate::compound::compoundf::log2p1_polyeval_1(z);
63		// p approximates log2(r*t) with rel. error < 2^-49.642, and \|p\| < 2^-5.459
64	0	e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
65	0	}
66
67		/// Computes x^y - 1
68	0	pub fn f_powm1f(x: f32, y: f32) -> f32 {
69	0	let ax: u32 = x.to_bits().wrapping_shl(1);
70	0	let ay: u32 = y.to_bits().wrapping_shl(1);
71
72		// filter out exceptional cases
73	0	if ax == 0 \|\| ax >= 0xffu32 << 24 \|\| ay == 0 \|\| ay >= 0xffu32 << 24 {
74	0	if x.is_nan() \|\| y.is_nan() {
75	0	return f32::NAN;
76	0	}
77
78		// Handle infinities
79	0	if x.is_infinite() {
80	0	return if x.is_sign_positive() {
81	0	if y.is_infinite() {
82	0	return f32::INFINITY;
83	0	} else if y > 0.0 {
84	0	f32::INFINITY // inf^positive -> inf
85	0	} else if y < 0.0 {
86	0	-1.0 // inf^negative -> 0, so powm1 = -1
87		} else {
88	0	f32::NAN // inf^0 -> NaN (0^0 conventionally 1, inf^0 = NaN)
89		}
90		} else {
91		// x = -inf
92	0	if y.is_infinite() {
93	0	return -1.0;
94	0	}
95	0	if is_integerf(y) {
96		// Negative base: (-inf)^even = +inf, (-inf)^odd = -inf
97	0	let pow = if y as i32 % 2 == 0 {
98	0	f32::INFINITY
99		} else {
100	0	f32::NEG_INFINITY
101		};
102	0	pow - 1.0
103		} else {
104	0	f32::NAN // Negative base with non-integer exponent
105		}
106		};
107	0	}
108
109		// Handle y infinite
110	0	if y.is_infinite() {
111	0	return if x.abs() > 1.0 {
112	0	if y.is_sign_positive() {
113	0	f32::INFINITY
114		} else {
115	0	-1.0
116		}
117	0	} else if x.abs() < 1.0 {
118	0	if y.is_sign_positive() {
119	0	-1.0
120		} else {
121	0	f32::INFINITY
122		}
123		} else {
124		// \|x\| == 1
125	0	f32::NAN // 1^inf or -1^inf is undefined
126		};
127	0	}
128
129		// Handle zero base
130	0	if x == 0.0 {
131	0	return if y > 0.0 {
132	0	-1.0 // 0^positive -> 0, powm1 = -1
133	0	} else if y < 0.0 {
134	0	f32::INFINITY // 0^negative -> inf
135		} else {
136	0	0.0 // 0^0 -> conventionally 1, powm1 = 0
137		};
138	0	}
139	0	}
140
141	0	let y_integer = is_integerf(y);
142
143	0	let mut negative_parity: bool = false;
144
145	0	let mut x = x;
146
147		// Handle negative base with non-integer exponent
148	0	if x < 0.0 {
149	0	if !y_integer {
150	0	return f32::NAN; // x < 0 and non-integer y
151	0	}
152	0	x = x.abs();
153	0	if is_odd_integerf(y) {
154	0	negative_parity = true;
155	0	}
156	0	}
157
158	0	let xd = x as f64;
159	0	let yd = y as f64;
160	0	let tx = xd.to_bits();
161	0	let ty = yd.to_bits();
162
163	0	let l: f64 = powm1f_log2_fast(f64::from_bits(tx));
164
165		/* l approximates log2(1+x) with relative error < 2^-47.997,
166		and 2^-149 <= \|l\| < 128 */
167
168	0	let dt = l * f64::from_bits(ty);
169	0	let t: u64 = dt.to_bits();
170
171		// detect overflow/underflow
172	0	if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
173		// \|t\| >= 128
174	0	if t >= 0x3018bu64 << 46 {
175		// t <= -150
176	0	return -1.;
177	0	} else if (t >> 63) == 0 {
178		// t >= 128: overflow
179	0	return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
180	0	}
181	0	}
182
183	0	let res = powm1_exp2m1_fast(f64::from_bits(t));
184		// For x < 0 and integer y = n:
185		// if n is even: x^n = \|x\|^n → powm1 = \|x\|^n - 1 (same sign as res).
186		// if n is odd: x^n = -\|x\|^n → powm1 = -\|x\|^n - 1 = - (\|x\|^n + 1).
187	0	if negative_parity {
188	0	(-res - 2.) as f32
189		} else {
190	0	res as f32
191		}
192	0	}
193
194		#[inline]
195	0	pub(crate) fn powm1_exp2m1_fast(t: f64) -> f64 {
196	0	let k = t.cpu_round_ties_even(); // 0 <= \|k\| <= 150
197	0	let mut r = t - k; // \|r\| <= 1/2, exact
198	0	let mut v: f64 = 3.015625 + r; // 2.5 <= v <= 3.5015625
199		// we add 2^-6 so that i is rounded to nearest
200	0	let i: i32 = (v.to_bits() >> 46) as i32 - 0x10010; // 0 <= i <= 32
201	0	r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
202		// now \|r\| <= 2^-6
203		// 2^t = 2^k * exp2_U[i][0] * 2^r
204	0	let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1);
205	0	let su = unsafe {
206	0	k.to_int_unchecked::<i64>().wrapping_shl(52) // k is already integer
207		};
208	0	s = f64::from_bits(s.to_bits().wrapping_add(su as u64));
209	0	let q_poly = compoundf_expf_poly(r);
210	0	v = q_poly;
211
212		#[cfg(any(
213		all(
214		any(target_arch = "x86", target_arch = "x86_64"),
215		target_feature = "fma"
216		),
217		target_arch = "aarch64"
218		))]
219		{
220		v = f_fmla(v, s, s - 1f64);
221		}
222		#[cfg(not(any(
223		all(
224		any(target_arch = "x86", target_arch = "x86_64"),
225		target_feature = "fma"
226		),
227		target_arch = "aarch64"
228		)))]
229		{
230		use crate::double_double::DoubleDouble;
231	0	let p0 = DoubleDouble::from_full_exact_add(s, -1.);
232	0	let z = DoubleDouble::from_exact_mult(v, s);
233	0	v = DoubleDouble::add(z, p0).to_f64();
234		}
235
236	0	v
237	0	}
238
239		#[cfg(test)]
240		mod tests {
241		use super::*;
242		#[test]
243		fn test_powm1f() {
244		assert_eq!(f_powm1f(1.83329e-40, 2.4645883e-32), -2.2550315e-30);
245		assert_eq!(f_powm1f(f32::INFINITY, f32::INFINITY), f32::INFINITY);
246		assert_eq!(f_powm1f(-3.375, -9671689000000000000000000.), -1.);
247		assert_eq!(f_powm1f(3., 2.), 8.);
248		assert_eq!(f_powm1f(3., 3.), 26.);
249		assert_eq!(f_powm1f(5., 2.), 24.);
250		assert_eq!(f_powm1f(5., -2.), 1. / 25. - 1.);
251		assert_eq!(f_powm1f(-5., 2.), 24.);
252		assert_eq!(f_powm1f(-5., 3.), -126.);
253		assert_eq!(
254		f_powm1f(196560., 0.000000000000000000000000000000000000001193773),
255		1.455057e-38
256		);
257		assert!(f_powm1f(f32::NAN, f32::INFINITY).is_nan());
258		assert!(f_powm1f(f32::INFINITY, f32::NAN).is_nan());
259		}
260		}