/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/exponents/expm1f.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::exponents::expf::{ExpfBackend, GenericExpfBackend}; |
30 | | |
31 | | #[inline(always)] |
32 | 0 | fn expm1f_gen<B: ExpfBackend>(x: f32, backend: B) -> f32 { |
33 | 0 | let x_u: u32 = x.to_bits(); |
34 | 0 | let x_abs = x_u & 0x7fff_ffffu32; |
35 | | |
36 | | // When |x| > 25*log(2), or nan |
37 | 0 | if x_abs >= 0x418a_a123u32 { |
38 | | // x < log(2^-25) |
39 | 0 | if x.is_sign_negative() { |
40 | | // exp(-Inf) = 0 |
41 | 0 | if x.is_infinite() { |
42 | 0 | return -1.0; |
43 | 0 | } |
44 | | // exp(nan) = nan |
45 | 0 | if x.is_nan() { |
46 | 0 | return x; |
47 | 0 | } |
48 | 0 | return -1.0; |
49 | | } else { |
50 | | // x >= 89 or nan |
51 | 0 | if x_u >= 0x42b2_0000 { |
52 | 0 | return x + f32::INFINITY; |
53 | 0 | } |
54 | | } |
55 | 0 | } |
56 | | |
57 | | // |x| < 2^-4 |
58 | 0 | if x_abs < 0x3d80_0000u32 { |
59 | | // |x| < 2^-25 |
60 | 0 | if x_abs < 0x3300_0000u32 { |
61 | | // x = -0.0f |
62 | 0 | if x_u == 0x8000_0000u32 { |
63 | 0 | return x; |
64 | 0 | } |
65 | | // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x |
66 | | // is: |
67 | | // |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x| |
68 | | // = |x| |
69 | | // < 2^-25 |
70 | | // < epsilon(1)/2. |
71 | | // To simplify the rounding decision and make it more efficient, we use |
72 | | // fma(x, x, x) ~ x + x^2 instead. |
73 | | // Note: to use the formula x + x^2 to decide the correct rounding, we |
74 | | // do need fma(x, x, x) to prevent underflow caused by x*x when |x| < |
75 | | // 2^-76. For targets without FMA instructions, we simply use double for |
76 | | // intermediate results as it is more efficient than using an emulated |
77 | | // version of FMA. |
78 | | #[cfg(any( |
79 | | all( |
80 | | any(target_arch = "x86", target_arch = "x86_64"), |
81 | | target_feature = "fma" |
82 | | ), |
83 | | target_arch = "aarch64" |
84 | | ))] |
85 | | { |
86 | | return backend.fmaf(x, x, x); |
87 | | } |
88 | | #[cfg(not(any( |
89 | | all( |
90 | | any(target_arch = "x86", target_arch = "x86_64"), |
91 | | target_feature = "fma" |
92 | | ), |
93 | | target_arch = "aarch64" |
94 | | )))] |
95 | | { |
96 | 0 | let xd = x as f64; |
97 | 0 | return backend.fma(xd, xd, xd) as f32; |
98 | | } |
99 | 0 | } |
100 | | |
101 | | const C: [u64; 7] = [ |
102 | | 0x3fe0000000000000, |
103 | | 0x3fc55555555557dd, |
104 | | 0x3fa55555555552fa, |
105 | | 0x3f8111110fcd58b7, |
106 | | 0x3f56c16c1717660b, |
107 | | 0x3f2a0241f0006d62, |
108 | | 0x3efa01e3f8d3c060, |
109 | | ]; |
110 | | |
111 | | // 2^-25 <= |x| < 2^-4 |
112 | 0 | let xd = x as f64; |
113 | 0 | let xsq = xd * xd; |
114 | | // Degree-8 minimax polynomial generated by Sollya with: |
115 | | // > display = hexadecimal; |
116 | | // > P = fpminimax((expm1(x) - x)/x^2, 6, [|D...|], [-2^-4, 2^-4]); |
117 | | |
118 | 0 | return backend.fma( |
119 | 0 | backend.polyeval7( |
120 | 0 | xd, |
121 | 0 | f64::from_bits(C[0]), |
122 | 0 | f64::from_bits(C[1]), |
123 | 0 | f64::from_bits(C[2]), |
124 | 0 | f64::from_bits(C[3]), |
125 | 0 | f64::from_bits(C[4]), |
126 | 0 | f64::from_bits(C[5]), |
127 | 0 | f64::from_bits(C[6]), |
128 | 0 | ), |
129 | 0 | xsq, |
130 | 0 | xd, |
131 | 0 | ) as f32; |
132 | 0 | } |
133 | | |
134 | | // For -104 < x < 89, to compute expm1(x), we perform the following range |
135 | | // reduction: find hi, mid, lo such that: |
136 | | // x = hi + mid + lo, in which |
137 | | // hi is an integer, |
138 | | // mid * 2^7 is an integer |
139 | | // -2^(-8) <= lo < 2^-8. |
140 | | // In particular, |
141 | | // hi + mid = round(x * 2^7) * 2^(-7). |
142 | | // Then, |
143 | | // expm1(x) = expm1(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo) - 1. |
144 | | // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2 |
145 | | // respectively. exp(lo) is computed using a degree-4 minimax polynomial |
146 | | // generated by Sollya. |
147 | | |
148 | | // x_hi = (hi + mid) * 2^7 = round(x * 2^7). |
149 | 0 | let kf = backend.roundf(x * 128.); |
150 | | // Subtract (hi + mid) from x to get lo. |
151 | 0 | let xd = backend.fmaf(kf, -0.0078125 /* - 1/128 */, x) as f64; |
152 | 0 | let mut x_hi = unsafe { kf.to_int_unchecked::<i32>() }; // it's already not indeterminate. |
153 | 0 | x_hi += 104 << 7; |
154 | | // hi = x_hi >> 7 |
155 | 0 | let exp_hi = f64::from_bits(crate::exponents::expf::EXP_M1[(x_hi >> 7) as usize]); |
156 | | // mid * 2^7 = x_hi & 0x0000'007fU; |
157 | 0 | let exp_mid = f64::from_bits(crate::exponents::expf::EXP_M2[(x_hi & 0x7f) as usize]); |
158 | | // Degree-4 minimax polynomial generated by Sollya with the following |
159 | | // commands: |
160 | | // d = [-2^-8, 2^-8]; |
161 | | // f_exp = expm1(x)/x; |
162 | | // Q = fpminimax(f_exp, 3, [|D...|], [-2^-8, 2^-8]); |
163 | 0 | let p = backend.polyeval5( |
164 | 0 | xd, |
165 | | 1., |
166 | 0 | f64::from_bits(0x3feffffffffff777), |
167 | 0 | f64::from_bits(0x3fe000000000071c), |
168 | 0 | f64::from_bits(0x3fc555566668e5e7), |
169 | 0 | f64::from_bits(0x3fa55555555ef243), |
170 | | ); |
171 | 0 | backend.fma(p * exp_hi, exp_mid, -1.) as f32 |
172 | 0 | } Unexecuted instantiation: pxfm::exponents::expm1f::expm1f_gen::<pxfm::exponents::expf::FmaBackend> Unexecuted instantiation: pxfm::exponents::expm1f::expm1f_gen::<pxfm::exponents::expf::GenericExpfBackend> |
173 | | |
174 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
175 | | #[target_feature(enable = "avx", enable = "fma")] |
176 | 0 | unsafe fn expm1f_fma_impl(x: f32) -> f32 { |
177 | | use crate::exponents::expf::FmaBackend; |
178 | 0 | expm1f_gen(x, FmaBackend {}) |
179 | 0 | } |
180 | | |
181 | | /// Computes e^x - 1 |
182 | | /// |
183 | | /// Max ULP 0.5 |
184 | | #[inline] |
185 | 0 | pub fn f_expm1f(x: f32) -> f32 { |
186 | | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] |
187 | | { |
188 | | expm1f_gen(x, GenericExpfBackend {}) |
189 | | } |
190 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
191 | | { |
192 | | use std::sync::OnceLock; |
193 | | static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new(); |
194 | 0 | let q = EXECUTOR.get_or_init(|| { |
195 | 0 | if std::arch::is_x86_feature_detected!("avx") |
196 | 0 | && std::arch::is_x86_feature_detected!("fma") |
197 | | { |
198 | 0 | expm1f_fma_impl |
199 | | } else { |
200 | 0 | fn def_expm1f(x: f32) -> f32 { |
201 | 0 | expm1f_gen(x, GenericExpfBackend {}) |
202 | 0 | } |
203 | 0 | def_expm1f |
204 | | } |
205 | 0 | }); |
206 | 0 | unsafe { q(x) } |
207 | | } |
208 | 0 | } |
209 | | |
210 | | #[cfg(test)] |
211 | | mod tests { |
212 | | use crate::f_expm1f; |
213 | | |
214 | | #[test] |
215 | | fn test_expm1f() { |
216 | | assert_eq!(f_expm1f(-3.0923562e-5), -3.0923085e-5); |
217 | | assert_eq!(f_expm1f(2.213121), 8.144211); |
218 | | assert_eq!(f_expm1f(-3.213121), -0.9597691); |
219 | | assert_eq!(f_expm1f(-2.35099e-38), -2.35099e-38); |
220 | | assert_eq!( |
221 | | f_expm1f(0.00000000000000000000000000000000000004355616), |
222 | | 0.00000000000000000000000000000000000004355616 |
223 | | ); |
224 | | assert_eq!(f_expm1f(25.12315), 81441420000.0); |
225 | | assert_eq!(f_expm1f(12.986543), 436498.6); |
226 | | assert_eq!(f_expm1f(-12.986543), -0.99999774); |
227 | | assert_eq!(f_expm1f(-25.12315), -1.0); |
228 | | assert_eq!(f_expm1f(f32::INFINITY), f32::INFINITY); |
229 | | assert_eq!(f_expm1f(f32::NEG_INFINITY), -1.); |
230 | | assert!(f_expm1f(f32::NAN).is_nan()); |
231 | | } |
232 | | } |