/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/exponents/exp2.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::{dd_fmla, f_fmla}; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::exponents::auxiliary::fast_ldexp; |
32 | | use crate::exponents::exp::{EXP_REDUCE_T0, EXP_REDUCE_T1, to_denormal}; |
33 | | use crate::rounding::CpuRoundTiesEven; |
34 | | |
35 | | #[inline] |
36 | 0 | fn exp2_poly_dd(z: f64) -> DoubleDouble { |
37 | | const C: [(u64, u64); 6] = [ |
38 | | (0x3bbabc9e3b39873e, 0x3f262e42fefa39ef), |
39 | | (0xbae5e43a53e44950, 0x3e4ebfbdff82c58f), |
40 | | (0xba0d3a15710d3d83, 0x3d6c6b08d704a0c0), |
41 | | (0x3914dd5d2a5e025a, 0x3c83b2ab6fba4e77), |
42 | | (0xb83dc47e47beb9dd, 0x3b95d87fe7a66459), |
43 | | (0xb744fcd51fcb7640, 0x3aa430912f9fb79d), |
44 | | ]; |
45 | | |
46 | 0 | let mut r = DoubleDouble::quick_mul_f64_add( |
47 | 0 | DoubleDouble::from_bit_pair(C[5]), |
48 | 0 | z, |
49 | 0 | DoubleDouble::from_bit_pair(C[4]), |
50 | | ); |
51 | 0 | r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[3])); |
52 | 0 | r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[2])); |
53 | 0 | r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[1])); |
54 | 0 | DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[0])) |
55 | 0 | } |
56 | | |
57 | | #[cold] |
58 | 0 | fn exp2_accurate(x: f64) -> f64 { |
59 | 0 | let mut ix = x.to_bits(); |
60 | 0 | let sx = 4096.0 * x; |
61 | 0 | let fx = sx.cpu_round_ties_even(); |
62 | 0 | let z = sx - fx; |
63 | 0 | let k: i64 = unsafe { |
64 | 0 | fx.to_int_unchecked::<i64>() // this is already finite here |
65 | | }; |
66 | 0 | let i1 = k & 0x3f; |
67 | 0 | let i0 = (k >> 6) & 0x3f; |
68 | 0 | let ie = k >> 12; |
69 | | |
70 | 0 | let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]); |
71 | 0 | let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]); |
72 | 0 | let dt = DoubleDouble::quick_mult(t0, t1); |
73 | | |
74 | 0 | let mut f = exp2_poly_dd(z); |
75 | 0 | f = DoubleDouble::quick_mult_f64(f, z); |
76 | 0 | if ix <= 0xc08ff00000000000u64 { |
77 | | // x >= -1022 |
78 | | // for -0x1.71547652b82fep-54 <= x <= 0x1.71547652b82fdp-53, |
79 | | // exp2(x) round to x to nearest |
80 | 0 | if f64::from_bits(0xbc971547652b82fe) <= x && x <= f64::from_bits(0x3ca71547652b82fd) { |
81 | 0 | return dd_fmla(x, 0.5, 1.0); |
82 | 0 | } else if (k & 0xfff) == 0 { |
83 | | // 4096*x rounds to 4096*integer |
84 | 0 | let zf = DoubleDouble::from_exact_add(dt.hi, f.hi); |
85 | 0 | let zfl = DoubleDouble::from_exact_add(zf.lo, f.lo); |
86 | 0 | f.hi = zf.hi; |
87 | 0 | f.lo = zfl.hi; |
88 | 0 | ix = zfl.hi.to_bits(); |
89 | 0 | if ix & 0x000fffffffffffff == 0 { |
90 | | // fl is a power of 2 |
91 | 0 | if ((ix >> 52) & 0x7ff) != 0 { |
92 | 0 | // |fl| is Inf |
93 | 0 | let v = zfl.lo.to_bits(); |
94 | 0 | let d: i64 = ((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64) |
95 | 0 | .wrapping_shl(1) |
96 | 0 | .wrapping_add(1) as i64; |
97 | 0 | ix = ix.wrapping_add(d as u64); |
98 | 0 | f.lo = f64::from_bits(ix); |
99 | 0 | } |
100 | 0 | } |
101 | 0 | } else { |
102 | 0 | f = DoubleDouble::quick_mult(f, dt); |
103 | 0 | f = DoubleDouble::add(dt, f); |
104 | 0 | } |
105 | 0 | let hf = DoubleDouble::from_exact_add(f.hi, f.lo); |
106 | | |
107 | 0 | fast_ldexp(hf.hi, ie as i32) |
108 | | } else { |
109 | 0 | ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52); |
110 | 0 | f = DoubleDouble::quick_mult(f, dt); |
111 | 0 | f = DoubleDouble::add(dt, f); |
112 | 0 | let zve = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi); |
113 | 0 | f.hi = zve.hi; |
114 | 0 | f.lo += zve.lo; |
115 | | |
116 | 0 | to_denormal(f.to_f64()) |
117 | | } |
118 | 0 | } |
119 | | |
120 | | /// Computes exp2 |
121 | | /// |
122 | | /// Max found ULP 0.5 |
123 | 0 | pub fn f_exp2(x: f64) -> f64 { |
124 | 0 | let mut ix = x.to_bits(); |
125 | 0 | let ax = ix.wrapping_shl(1); |
126 | 0 | if ax == 0 { |
127 | 0 | return 1.0; |
128 | 0 | } |
129 | 0 | if ax >= 0x8120000000000000u64 { |
130 | | // |x| >= 1024 |
131 | 0 | if ax > 0xffe0000000000000u64 { |
132 | 0 | return x + x; // nan |
133 | 0 | } |
134 | 0 | if ax == 0xffe0000000000000u64 { |
135 | 0 | return if (ix >> 63) != 0 { 0.0 } else { x }; |
136 | 0 | } |
137 | | // +/-inf |
138 | 0 | if (ix >> 63) != 0 { |
139 | | // x <= -1024 |
140 | 0 | if ix >= 0xc090cc0000000000u64 { |
141 | | // x <= -1075 |
142 | | const Z: f64 = f64::from_bits(0x0010000000000000); |
143 | 0 | return Z * Z; |
144 | 0 | } |
145 | | } else { |
146 | | // x >= 1024 |
147 | 0 | return f64::from_bits(0x7fe0000000000000) * x; |
148 | | } |
149 | 0 | } |
150 | | |
151 | | // for |x| <= 0x1.71547652b82fep-54, 2^x rounds to 1 to nearest |
152 | | // this avoids a spurious underflow in z*z below |
153 | 0 | if ax <= 0x792e2a8eca5705fcu64 { |
154 | 0 | return 1.0 + f64::copysign(f64::from_bits(0x3c90000000000000), x); |
155 | 0 | } |
156 | | |
157 | 0 | let m = ix.wrapping_shl(12); |
158 | 0 | let ex = (ax >> 53).wrapping_sub(0x3ff); |
159 | 0 | let frac = ex >> 63 | m << (ex & 63); |
160 | 0 | let sx = 4096.0 * x; |
161 | 0 | let fx = sx.cpu_round_ties_even(); |
162 | 0 | let z = sx - fx; |
163 | 0 | let z2 = z * z; |
164 | 0 | let k = unsafe { |
165 | 0 | fx.to_int_unchecked::<i64>() // this already finite here |
166 | | }; |
167 | 0 | let i1 = k & 0x3f; |
168 | 0 | let i0 = (k >> 6) & 0x3f; |
169 | 0 | let ie = k >> 12; |
170 | 0 | let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]); |
171 | 0 | let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]); |
172 | 0 | let ti0 = DoubleDouble::quick_mult(t0, t1); |
173 | | const C: [u64; 4] = [ |
174 | | 0x3f262e42fefa39ef, |
175 | | 0x3e4ebfbdff82c58f, |
176 | | 0x3d6c6b08d73b3e01, |
177 | | 0x3c83b2ab6fdda001, |
178 | | ]; |
179 | 0 | let tz = ti0.hi * z; |
180 | 0 | let mut fh = ti0.hi; |
181 | | |
182 | 0 | let p0 = f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0])); |
183 | 0 | let p1 = f_fmla(z, f64::from_bits(C[3]), f64::from_bits(C[2])); |
184 | 0 | let p2 = f_fmla(z2, p1, p0); |
185 | | |
186 | 0 | let mut fl = f_fmla(tz, p2, ti0.lo); |
187 | | |
188 | | const EPS: f64 = f64::from_bits(0x3c0833beace2b6fe); |
189 | | |
190 | 0 | if ix <= 0xc08ff00000000000u64 { |
191 | | // x >= -1022 |
192 | 0 | if frac != 0 { |
193 | 0 | let ub = fh + (fl + EPS); |
194 | 0 | fh += fl - EPS; |
195 | 0 | if ub != fh { |
196 | 0 | return exp2_accurate(x); |
197 | 0 | } |
198 | 0 | } |
199 | 0 | fh = fast_ldexp(fh, ie as i32); |
200 | | } else { |
201 | | // subnormal case |
202 | 0 | ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52); |
203 | 0 | let rs = DoubleDouble::from_exact_add(f64::from_bits(ix), fh); |
204 | 0 | fl += rs.lo; |
205 | 0 | fh = rs.hi; |
206 | 0 | if frac != 0 { |
207 | 0 | let ub = fh + (fl + EPS); |
208 | 0 | fh += fl - EPS; |
209 | 0 | if ub != fh { |
210 | 0 | return exp2_accurate(x); |
211 | 0 | } |
212 | 0 | } |
213 | | // when 2^x is exact, no underflow should be raised |
214 | 0 | fh = to_denormal(fh); |
215 | | } |
216 | 0 | fh |
217 | 0 | } |
218 | | |
219 | | #[cfg(test)] |
220 | | mod tests { |
221 | | use super::*; |
222 | | |
223 | | #[test] |
224 | | fn test_exp2d() { |
225 | | assert_eq!(f_exp2(2.0), 4.0); |
226 | | assert_eq!(f_exp2(3.0), 8.0); |
227 | | assert_eq!(f_exp2(4.0), 16.0); |
228 | | assert_eq!(f_exp2(0.35f64), 1.2745606273192622); |
229 | | assert_eq!(f_exp2(-0.6f64), 0.6597539553864471); |
230 | | assert_eq!(f_exp2(f64::INFINITY), f64::INFINITY); |
231 | | assert_eq!(f_exp2(f64::NEG_INFINITY), 0.); |
232 | | assert!(f_exp2(f64::NAN).is_nan()); |
233 | | } |
234 | | } |