/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/exponents/exp2.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::dd_fmla; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::exponents::auxiliary::fast_ldexp; |
32 | | use crate::exponents::exp::{EXP_REDUCE_T0, EXP_REDUCE_T1, to_denormal}; |
33 | | use crate::exponents::expf::{ExpfBackend, GenericExpfBackend}; |
34 | | use crate::rounding::CpuRoundTiesEven; |
35 | | |
36 | | #[inline] |
37 | 0 | fn exp2_poly_dd(z: f64) -> DoubleDouble { |
38 | | const C: [(u64, u64); 6] = [ |
39 | | (0x3bbabc9e3b39873e, 0x3f262e42fefa39ef), |
40 | | (0xbae5e43a53e44950, 0x3e4ebfbdff82c58f), |
41 | | (0xba0d3a15710d3d83, 0x3d6c6b08d704a0c0), |
42 | | (0x3914dd5d2a5e025a, 0x3c83b2ab6fba4e77), |
43 | | (0xb83dc47e47beb9dd, 0x3b95d87fe7a66459), |
44 | | (0xb744fcd51fcb7640, 0x3aa430912f9fb79d), |
45 | | ]; |
46 | | |
47 | 0 | let mut r = DoubleDouble::quick_mul_f64_add( |
48 | 0 | DoubleDouble::from_bit_pair(C[5]), |
49 | 0 | z, |
50 | 0 | DoubleDouble::from_bit_pair(C[4]), |
51 | | ); |
52 | 0 | r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[3])); |
53 | 0 | r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[2])); |
54 | 0 | r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[1])); |
55 | 0 | DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[0])) |
56 | 0 | } |
57 | | |
58 | | #[cold] |
59 | 0 | fn exp2_accurate(x: f64) -> f64 { |
60 | 0 | let mut ix = x.to_bits(); |
61 | 0 | let sx = 4096.0 * x; |
62 | 0 | let fx = sx.cpu_round_ties_even(); |
63 | 0 | let z = sx - fx; |
64 | 0 | let k: i64 = unsafe { |
65 | 0 | fx.to_int_unchecked::<i64>() // this is already finite here |
66 | | }; |
67 | 0 | let i1 = k & 0x3f; |
68 | 0 | let i0 = (k >> 6) & 0x3f; |
69 | 0 | let ie = k >> 12; |
70 | | |
71 | 0 | let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]); |
72 | 0 | let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]); |
73 | 0 | let dt = DoubleDouble::quick_mult(t0, t1); |
74 | | |
75 | 0 | let mut f = exp2_poly_dd(z); |
76 | 0 | f = DoubleDouble::quick_mult_f64(f, z); |
77 | 0 | if ix <= 0xc08ff00000000000u64 { |
78 | | // x >= -1022 |
79 | | // for -0x1.71547652b82fep-54 <= x <= 0x1.71547652b82fdp-53, |
80 | | // exp2(x) round to x to nearest |
81 | 0 | if f64::from_bits(0xbc971547652b82fe) <= x && x <= f64::from_bits(0x3ca71547652b82fd) { |
82 | 0 | return dd_fmla(x, 0.5, 1.0); |
83 | 0 | } else if (k & 0xfff) == 0 { |
84 | | // 4096*x rounds to 4096*integer |
85 | 0 | let zf = DoubleDouble::from_exact_add(dt.hi, f.hi); |
86 | 0 | let zfl = DoubleDouble::from_exact_add(zf.lo, f.lo); |
87 | 0 | f.hi = zf.hi; |
88 | 0 | f.lo = zfl.hi; |
89 | 0 | ix = zfl.hi.to_bits(); |
90 | 0 | if ix & 0x000fffffffffffff == 0 { |
91 | | // fl is a power of 2 |
92 | 0 | if ((ix >> 52) & 0x7ff) != 0 { |
93 | 0 | // |fl| is Inf |
94 | 0 | let v = zfl.lo.to_bits(); |
95 | 0 | let d: i64 = ((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64) |
96 | 0 | .wrapping_shl(1) |
97 | 0 | .wrapping_add(1) as i64; |
98 | 0 | ix = ix.wrapping_add(d as u64); |
99 | 0 | f.lo = f64::from_bits(ix); |
100 | 0 | } |
101 | 0 | } |
102 | 0 | } else { |
103 | 0 | f = DoubleDouble::quick_mult(f, dt); |
104 | 0 | f = DoubleDouble::add(dt, f); |
105 | 0 | } |
106 | 0 | let hf = DoubleDouble::from_exact_add(f.hi, f.lo); |
107 | | |
108 | 0 | fast_ldexp(hf.hi, ie as i32) |
109 | | } else { |
110 | 0 | ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52); |
111 | 0 | f = DoubleDouble::quick_mult(f, dt); |
112 | 0 | f = DoubleDouble::add(dt, f); |
113 | 0 | let zve = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi); |
114 | 0 | f.hi = zve.hi; |
115 | 0 | f.lo += zve.lo; |
116 | | |
117 | 0 | to_denormal(f.to_f64()) |
118 | | } |
119 | 0 | } |
120 | | |
121 | | #[inline(always)] |
122 | 0 | fn exp2_gen<B: ExpfBackend>(x: f64, backend: B) -> f64 { |
123 | 0 | let mut ix = x.to_bits(); |
124 | 0 | let ax = ix.wrapping_shl(1); |
125 | 0 | if ax == 0 { |
126 | 0 | return 1.0; |
127 | 0 | } |
128 | 0 | if ax >= 0x8120000000000000u64 { |
129 | | // |x| >= 1024 |
130 | 0 | if ax > 0xffe0000000000000u64 { |
131 | 0 | return x + x; // nan |
132 | 0 | } |
133 | 0 | if ax == 0xffe0000000000000u64 { |
134 | 0 | return if (ix >> 63) != 0 { 0.0 } else { x }; |
135 | 0 | } |
136 | | // +/-inf |
137 | 0 | if (ix >> 63) != 0 { |
138 | | // x <= -1024 |
139 | 0 | if ix >= 0xc090cc0000000000u64 { |
140 | | // x <= -1075 |
141 | | const Z: f64 = f64::from_bits(0x0010000000000000); |
142 | 0 | return Z * Z; |
143 | 0 | } |
144 | | } else { |
145 | | // x >= 1024 |
146 | 0 | return f64::from_bits(0x7fe0000000000000) * x; |
147 | | } |
148 | 0 | } |
149 | | |
150 | | // for |x| <= 0x1.71547652b82fep-54, 2^x rounds to 1 to nearest |
151 | | // this avoids a spurious underflow in z*z below |
152 | 0 | if ax <= 0x792e2a8eca5705fcu64 { |
153 | 0 | return 1.0 + f64::copysign(f64::from_bits(0x3c90000000000000), x); |
154 | 0 | } |
155 | | |
156 | 0 | let m = ix.wrapping_shl(12); |
157 | 0 | let ex = (ax >> 53).wrapping_sub(0x3ff); |
158 | 0 | let frac = ex >> 63 | m << (ex & 63); |
159 | 0 | let sx = 4096.0 * x; |
160 | 0 | let fx = backend.round_ties_even(sx); |
161 | 0 | let z = sx - fx; |
162 | 0 | let z2 = z * z; |
163 | 0 | let k = unsafe { |
164 | 0 | fx.to_int_unchecked::<i64>() // this already finite here |
165 | | }; |
166 | 0 | let i1 = k & 0x3f; |
167 | 0 | let i0 = (k >> 6) & 0x3f; |
168 | 0 | let ie = k >> 12; |
169 | 0 | let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]); |
170 | 0 | let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]); |
171 | 0 | let ti0 = backend.quick_mult(t0, t1); |
172 | | const C: [u64; 4] = [ |
173 | | 0x3f262e42fefa39ef, |
174 | | 0x3e4ebfbdff82c58f, |
175 | | 0x3d6c6b08d73b3e01, |
176 | | 0x3c83b2ab6fdda001, |
177 | | ]; |
178 | 0 | let tz = ti0.hi * z; |
179 | 0 | let mut fh = ti0.hi; |
180 | | |
181 | 0 | let p0 = backend.fma(z, f64::from_bits(C[1]), f64::from_bits(C[0])); |
182 | 0 | let p1 = backend.fma(z, f64::from_bits(C[3]), f64::from_bits(C[2])); |
183 | 0 | let p2 = backend.fma(z2, p1, p0); |
184 | | |
185 | 0 | let mut fl = backend.fma(tz, p2, ti0.lo); |
186 | | |
187 | | const EPS: f64 = f64::from_bits(0x3c0833beace2b6fe); |
188 | | |
189 | 0 | if ix <= 0xc08ff00000000000u64 { |
190 | | // x >= -1022 |
191 | 0 | if frac != 0 { |
192 | 0 | let ub = fh + (fl + EPS); |
193 | 0 | fh += fl - EPS; |
194 | 0 | if ub != fh { |
195 | 0 | return exp2_accurate(x); |
196 | 0 | } |
197 | 0 | } |
198 | 0 | fh = fast_ldexp(fh, ie as i32); |
199 | | } else { |
200 | | // subnormal case |
201 | 0 | ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52); |
202 | 0 | let rs = DoubleDouble::from_exact_add(f64::from_bits(ix), fh); |
203 | 0 | fl += rs.lo; |
204 | 0 | fh = rs.hi; |
205 | 0 | if frac != 0 { |
206 | 0 | let ub = fh + (fl + EPS); |
207 | 0 | fh += fl - EPS; |
208 | 0 | if ub != fh { |
209 | 0 | return exp2_accurate(x); |
210 | 0 | } |
211 | 0 | } |
212 | | // when 2^x is exact, no underflow should be raised |
213 | 0 | fh = to_denormal(fh); |
214 | | } |
215 | 0 | fh |
216 | 0 | } Unexecuted instantiation: pxfm::exponents::exp2::exp2_gen::<pxfm::exponents::expf::FmaBackend> Unexecuted instantiation: pxfm::exponents::exp2::exp2_gen::<pxfm::exponents::expf::GenericExpfBackend> |
217 | | |
218 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
219 | | #[target_feature(enable = "avx", enable = "fma")] |
220 | 0 | unsafe fn exp2_fma_impl(x: f64) -> f64 { |
221 | | use crate::exponents::expf::FmaBackend; |
222 | 0 | exp2_gen(x, FmaBackend {}) |
223 | 0 | } |
224 | | |
225 | | /// Computes exp2 |
226 | | /// |
227 | | /// Max found ULP 0.5 |
228 | 0 | pub fn f_exp2(x: f64) -> f64 { |
229 | | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] |
230 | | { |
231 | | exp2_gen(x, GenericExpfBackend {}) |
232 | | } |
233 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
234 | | { |
235 | | use std::sync::OnceLock; |
236 | | static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new(); |
237 | 0 | let q = EXECUTOR.get_or_init(|| { |
238 | 0 | if std::arch::is_x86_feature_detected!("avx") |
239 | 0 | && std::arch::is_x86_feature_detected!("fma") |
240 | | { |
241 | 0 | exp2_fma_impl |
242 | | } else { |
243 | 0 | fn def_exp2(x: f64) -> f64 { |
244 | 0 | exp2_gen(x, GenericExpfBackend {}) |
245 | 0 | } |
246 | 0 | def_exp2 |
247 | | } |
248 | 0 | }); |
249 | 0 | unsafe { q(x) } |
250 | | } |
251 | 0 | } |
252 | | |
253 | | #[cfg(test)] |
254 | | mod tests { |
255 | | use super::*; |
256 | | |
257 | | #[test] |
258 | | fn test_exp2d() { |
259 | | assert_eq!(f_exp2(2.0), 4.0); |
260 | | assert_eq!(f_exp2(3.0), 8.0); |
261 | | assert_eq!(f_exp2(4.0), 16.0); |
262 | | assert_eq!(f_exp2(0.35f64), 1.2745606273192622); |
263 | | assert_eq!(f_exp2(-0.6f64), 0.6597539553864471); |
264 | | assert_eq!(f_exp2(f64::INFINITY), f64::INFINITY); |
265 | | assert_eq!(f_exp2(f64::NEG_INFINITY), 0.); |
266 | | assert!(f_exp2(f64::NAN).is_nan()); |
267 | | } |
268 | | } |