/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/compound/powm1f.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::*; |
30 | | use crate::compound::compound_m1f::compoundf_expf_poly; |
31 | | use crate::compound::compoundf::{ |
32 | | COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, LOG2P1_COMPOUNDF_INV, LOG2P1_COMPOUNDF_LOG2_INV, |
33 | | }; |
34 | | use crate::rounding::CpuRoundTiesEven; |
35 | | use std::hint::black_box; |
36 | | |
37 | | #[inline] |
38 | 0 | fn powm1f_log2_fast(x: f64) -> f64 { |
39 | | /* for x > 0, 1+x is exact when 2^-29 <= x < 2^53 |
40 | | for x < 0, 1+x is exact when -1 < x <= 2^-30 */ |
41 | | |
42 | | // double u = (x >= 0x1p53) ? x : 1.0 + x; |
43 | | /* For x < 0x1p53, x + 1 is exact thus u = x+1. |
44 | | For x >= 2^53, we estimate log2(x) instead of log2(1+x), |
45 | | since log2(1+x) = log2(x) + log2(1+1/x), |
46 | | log2(x) >= 53 and |log2(1+1/x)| < 2^-52.471, the additional relative |
47 | | error is bounded by 2^-52.471/53 < 2^-58.198 */ |
48 | | |
49 | 0 | let mut v = x.to_bits(); |
50 | 0 | let m: u64 = v & 0xfffffffffffffu64; |
51 | 0 | let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64; |
52 | | // 2^e/sqrt(2) < u < 2^e*sqrt(2), with -29 <= e <= 128 |
53 | 0 | v = v.wrapping_sub((e << 52) as u64); |
54 | 0 | let t = f64::from_bits(v); |
55 | | // u = 2^e*t with 1/sqrt(2) < t < sqrt(2) |
56 | | // thus log2(u) = e + log2(t) |
57 | 0 | v = (f64::from_bits(v) + 2.0).to_bits(); // add 2 so that v.f is always in the binade [2, 4) |
58 | 0 | let i = (v >> 45) as i32 - 0x2002d; // 0 <= i <= 45 |
59 | 0 | let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]); |
60 | 0 | let z = dd_fmla(r, t, -1.0); // exact, -1/64 <= z <= 1/64 |
61 | | // we approximates log2(t) by -log2(r) + log2(r*t) |
62 | 0 | let p = crate::compound::compoundf::log2p1_polyeval_1(z); |
63 | | // p approximates log2(r*t) with rel. error < 2^-49.642, and |p| < 2^-5.459 |
64 | 0 | e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p) |
65 | 0 | } |
66 | | |
67 | | /// Computes x^y - 1 |
68 | 0 | pub fn f_powm1f(x: f32, y: f32) -> f32 { |
69 | 0 | let ax: u32 = x.to_bits().wrapping_shl(1); |
70 | 0 | let ay: u32 = y.to_bits().wrapping_shl(1); |
71 | | |
72 | | // filter out exceptional cases |
73 | 0 | if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 { |
74 | 0 | if x.is_nan() || y.is_nan() { |
75 | 0 | return f32::NAN; |
76 | 0 | } |
77 | | |
78 | | // Handle infinities |
79 | 0 | if x.is_infinite() { |
80 | 0 | return if x.is_sign_positive() { |
81 | 0 | if y.is_infinite() { |
82 | 0 | return f32::INFINITY; |
83 | 0 | } else if y > 0.0 { |
84 | 0 | f32::INFINITY // inf^positive -> inf |
85 | 0 | } else if y < 0.0 { |
86 | 0 | -1.0 // inf^negative -> 0, so powm1 = -1 |
87 | | } else { |
88 | 0 | f32::NAN // inf^0 -> NaN (0^0 conventionally 1, inf^0 = NaN) |
89 | | } |
90 | | } else { |
91 | | // x = -inf |
92 | 0 | if y.is_infinite() { |
93 | 0 | return -1.0; |
94 | 0 | } |
95 | 0 | if is_integerf(y) { |
96 | | // Negative base: (-inf)^even = +inf, (-inf)^odd = -inf |
97 | 0 | let pow = if y as i32 % 2 == 0 { |
98 | 0 | f32::INFINITY |
99 | | } else { |
100 | 0 | f32::NEG_INFINITY |
101 | | }; |
102 | 0 | pow - 1.0 |
103 | | } else { |
104 | 0 | f32::NAN // Negative base with non-integer exponent |
105 | | } |
106 | | }; |
107 | 0 | } |
108 | | |
109 | | // Handle y infinite |
110 | 0 | if y.is_infinite() { |
111 | 0 | return if x.abs() > 1.0 { |
112 | 0 | if y.is_sign_positive() { |
113 | 0 | f32::INFINITY |
114 | | } else { |
115 | 0 | -1.0 |
116 | | } |
117 | 0 | } else if x.abs() < 1.0 { |
118 | 0 | if y.is_sign_positive() { |
119 | 0 | -1.0 |
120 | | } else { |
121 | 0 | f32::INFINITY |
122 | | } |
123 | | } else { |
124 | | // |x| == 1 |
125 | 0 | f32::NAN // 1^inf or -1^inf is undefined |
126 | | }; |
127 | 0 | } |
128 | | |
129 | | // Handle zero base |
130 | 0 | if x == 0.0 { |
131 | 0 | return if y > 0.0 { |
132 | 0 | -1.0 // 0^positive -> 0, powm1 = -1 |
133 | 0 | } else if y < 0.0 { |
134 | 0 | f32::INFINITY // 0^negative -> inf |
135 | | } else { |
136 | 0 | 0.0 // 0^0 -> conventionally 1, powm1 = 0 |
137 | | }; |
138 | 0 | } |
139 | 0 | } |
140 | | |
141 | 0 | let y_integer = is_integerf(y); |
142 | | |
143 | 0 | let mut negative_parity: bool = false; |
144 | | |
145 | 0 | let mut x = x; |
146 | | |
147 | | // Handle negative base with non-integer exponent |
148 | 0 | if x < 0.0 { |
149 | 0 | if !y_integer { |
150 | 0 | return f32::NAN; // x < 0 and non-integer y |
151 | 0 | } |
152 | 0 | x = x.abs(); |
153 | 0 | if is_odd_integerf(y) { |
154 | 0 | negative_parity = true; |
155 | 0 | } |
156 | 0 | } |
157 | | |
158 | 0 | let xd = x as f64; |
159 | 0 | let yd = y as f64; |
160 | 0 | let tx = xd.to_bits(); |
161 | 0 | let ty = yd.to_bits(); |
162 | | |
163 | 0 | let l: f64 = powm1f_log2_fast(f64::from_bits(tx)); |
164 | | |
165 | | /* l approximates log2(1+x) with relative error < 2^-47.997, |
166 | | and 2^-149 <= |l| < 128 */ |
167 | | |
168 | 0 | let dt = l * f64::from_bits(ty); |
169 | 0 | let t: u64 = dt.to_bits(); |
170 | | |
171 | | // detect overflow/underflow |
172 | 0 | if (t.wrapping_shl(1)) >= (0x406u64 << 53) { |
173 | | // |t| >= 128 |
174 | 0 | if t >= 0x3018bu64 << 46 { |
175 | | // t <= -150 |
176 | 0 | return -1.; |
177 | 0 | } else if (t >> 63) == 0 { |
178 | | // t >= 128: overflow |
179 | 0 | return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000)); |
180 | 0 | } |
181 | 0 | } |
182 | | |
183 | 0 | let res = powm1_exp2m1_fast(f64::from_bits(t)); |
184 | | // For x < 0 and integer y = n: |
185 | | // if n is even: x^n = |x|^n → powm1 = |x|^n - 1 (same sign as res). |
186 | | // if n is odd: x^n = -|x|^n → powm1 = -|x|^n - 1 = - (|x|^n + 1). |
187 | 0 | if negative_parity { |
188 | 0 | (-res - 2.) as f32 |
189 | | } else { |
190 | 0 | res as f32 |
191 | | } |
192 | 0 | } |
193 | | |
194 | | #[inline] |
195 | 0 | pub(crate) fn powm1_exp2m1_fast(t: f64) -> f64 { |
196 | 0 | let k = t.cpu_round_ties_even(); // 0 <= |k| <= 150 |
197 | 0 | let mut r = t - k; // |r| <= 1/2, exact |
198 | 0 | let mut v: f64 = 3.015625 + r; // 2.5 <= v <= 3.5015625 |
199 | | // we add 2^-6 so that i is rounded to nearest |
200 | 0 | let i: i32 = (v.to_bits() >> 46) as i32 - 0x10010; // 0 <= i <= 32 |
201 | 0 | r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact |
202 | | // now |r| <= 2^-6 |
203 | | // 2^t = 2^k * exp2_U[i][0] * 2^r |
204 | 0 | let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1); |
205 | 0 | let su = unsafe { |
206 | 0 | k.to_int_unchecked::<i64>().wrapping_shl(52) // k is already integer |
207 | | }; |
208 | 0 | s = f64::from_bits(s.to_bits().wrapping_add(su as u64)); |
209 | 0 | let q_poly = compoundf_expf_poly(r); |
210 | 0 | v = q_poly; |
211 | | |
212 | | #[cfg(any( |
213 | | all( |
214 | | any(target_arch = "x86", target_arch = "x86_64"), |
215 | | target_feature = "fma" |
216 | | ), |
217 | | target_arch = "aarch64" |
218 | | ))] |
219 | | { |
220 | | v = f_fmla(v, s, s - 1f64); |
221 | | } |
222 | | #[cfg(not(any( |
223 | | all( |
224 | | any(target_arch = "x86", target_arch = "x86_64"), |
225 | | target_feature = "fma" |
226 | | ), |
227 | | target_arch = "aarch64" |
228 | | )))] |
229 | | { |
230 | | use crate::double_double::DoubleDouble; |
231 | 0 | let p0 = DoubleDouble::from_full_exact_add(s, -1.); |
232 | 0 | let z = DoubleDouble::from_exact_mult(v, s); |
233 | 0 | v = DoubleDouble::add(z, p0).to_f64(); |
234 | | } |
235 | | |
236 | 0 | v |
237 | 0 | } |
238 | | |
239 | | #[cfg(test)] |
240 | | mod tests { |
241 | | use super::*; |
242 | | #[test] |
243 | | fn test_powm1f() { |
244 | | assert_eq!(f_powm1f(1.83329e-40, 2.4645883e-32), -2.2550315e-30); |
245 | | assert_eq!(f_powm1f(f32::INFINITY, f32::INFINITY), f32::INFINITY); |
246 | | assert_eq!(f_powm1f(-3.375, -9671689000000000000000000.), -1.); |
247 | | assert_eq!(f_powm1f(3., 2.), 8.); |
248 | | assert_eq!(f_powm1f(3., 3.), 26.); |
249 | | assert_eq!(f_powm1f(5., 2.), 24.); |
250 | | assert_eq!(f_powm1f(5., -2.), 1. / 25. - 1.); |
251 | | assert_eq!(f_powm1f(-5., 2.), 24.); |
252 | | assert_eq!(f_powm1f(-5., 3.), -126.); |
253 | | assert_eq!( |
254 | | f_powm1f(196560., 0.000000000000000000000000000000000000001193773), |
255 | | 1.455057e-38 |
256 | | ); |
257 | | assert!(f_powm1f(f32::NAN, f32::INFINITY).is_nan()); |
258 | | assert!(f_powm1f(f32::INFINITY, f32::NAN).is_nan()); |
259 | | } |
260 | | } |