/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/exponents/exp2f.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::{f_fmla, f_fmlaf, pow2if}; |
30 | | use crate::polyeval::f_polyeval6; |
31 | | use crate::rounding::CpuRound; |
32 | | use std::hint::black_box; |
33 | | |
34 | | const TBLSIZE: usize = 64; |
35 | | |
36 | | #[repr(align(64))] |
37 | | struct Exp2Table([(u32, u32); TBLSIZE]); |
38 | | |
39 | | #[rustfmt::skip] |
40 | | static EXP2FT: Exp2Table = Exp2Table([(0x3F3504F3, 0xB2D4175E),(0x3F36FD92, 0x3268D5EF),(0x3F38FBAF, 0xB30E8719),(0x3F3AFF5B, 0x3319E7DA),(0x3F3D08A4, 0x333CD82F),(0x3F3F179A, 0x330E1902),(0x3F412C4D, 0x32CCF4D7),(0x3F4346CD, 0x328F330E),(0x3F45672A, 0xB201B5B7),(0x3F478D75, 0x32CCCE34),(0x3F49B9BE, 0x335E937C),(0x3F4BEC15, 0x2FF41909),(0x3F4E248C, 0xB21760EA),(0x3F506334, 0x3283628B),(0x3F52A81E, 0x3340F500),(0x3F54F35B, 0x331202BD),(0x3F5744FD, 0x32B66A3E),(0x3F599D16, 0x32D0D9B1),(0x3F5BFBB8, 0x332ED93F),(0x3F5E60F5, 0x3350A709),(0x3F60CCDF, 0x32025744),(0x3F633F89, 0xB33A7C4D),(0x3F65B907, 0x321DA4E9),(0x3F68396A, 0xB2FF36A7),(0x3F6AC0C7, 0x3217E40E),(0x3F6D4F30, 0xB2400CBB),(0x3F6FE4BA, 0x331A2ACC),(0x3F728177, 0xB2B7D3E5),(0x3F75257D, 0xB1FED2BE),(0x3F77D0DF, 0xB32B73BA),(0x3F7A83B3, 0x32579081),(0x3F7D3E0C, 0xB19726B5),(0x3F800000, 0x00000000),(0x3F8164D2, 0x320C09FB),(0x3F82CD87, 0x3391E031),(0x3F843A29, 0x33287EEF),(0x3F85AAC3, 0xB38F6665),(0x3F871F62, 0x339004AB),(0x3F88980F, 0x33AC4561),(0x3F8A14D5, 0xB39CDAEA),(0x3F8B95C2, 0x32949D5C),(0x3F8D1ADF, 0xB36F79FA),(0x3F8EA43A, 0x33971DC2),(0x3F9031DC, 0xB32BD022),(0x3F91C3D3, 0xB3928952),(0x3F935A2B, 0xB2EBFECF),(0x3F94F4F0, 0x3357B8BB),(0x3F96942D, 0xB307353B),(0x3F9837F0, 0xB345DFE9),(0x3F99E046, 0x3382A804),(0x3F9B8D3A, 0x3326993E),(0x3F9D3EDA, 0x3350A029),(0x3F9EF532, 0xB3605F62),(0x3FA0B051, 0xB210909B),(0x3FA27043, 0xB0DDC369),(0x3FA43516, 0x33385844),(0x3FA5FED7, 0x33400757),(0x3FA7CD94, 0x3325446E),(0x3FA9A15B, 0x33237A50),(0x3FAB7A3A, 0x33201CA4),(0x3FAD583F, 0x32394687),(0x3FAF3B79, 0x332E1225),(0x3FB123F6, 0x33838969),(0x3FB311C4, 0xB219F2BA)]); |
41 | | |
42 | | /** |
43 | | Generated by SageMath: |
44 | | ```python |
45 | | print("[") |
46 | | for k in range(64): |
47 | | k = RealField(150)(2)**(RealField(150)(k) / RealField(150)(64)) |
48 | | print(double_to_hex(k) + ",") |
49 | | print("];") |
50 | | ``` |
51 | | **/ |
52 | | pub(crate) static EXP2F_TABLE: [u64; 64] = [ |
53 | | 0x3ff0000000000000, |
54 | | 0x3ff02c9a3e778061, |
55 | | 0x3ff059b0d3158574, |
56 | | 0x3ff0874518759bc8, |
57 | | 0x3ff0b5586cf9890f, |
58 | | 0x3ff0e3ec32d3d1a2, |
59 | | 0x3ff11301d0125b51, |
60 | | 0x3ff1429aaea92de0, |
61 | | 0x3ff172b83c7d517b, |
62 | | 0x3ff1a35beb6fcb75, |
63 | | 0x3ff1d4873168b9aa, |
64 | | 0x3ff2063b88628cd6, |
65 | | 0x3ff2387a6e756238, |
66 | | 0x3ff26b4565e27cdd, |
67 | | 0x3ff29e9df51fdee1, |
68 | | 0x3ff2d285a6e4030b, |
69 | | 0x3ff306fe0a31b715, |
70 | | 0x3ff33c08b26416ff, |
71 | | 0x3ff371a7373aa9cb, |
72 | | 0x3ff3a7db34e59ff7, |
73 | | 0x3ff3dea64c123422, |
74 | | 0x3ff4160a21f72e2a, |
75 | | 0x3ff44e086061892d, |
76 | | 0x3ff486a2b5c13cd0, |
77 | | 0x3ff4bfdad5362a27, |
78 | | 0x3ff4f9b2769d2ca7, |
79 | | 0x3ff5342b569d4f82, |
80 | | 0x3ff56f4736b527da, |
81 | | 0x3ff5ab07dd485429, |
82 | | 0x3ff5e76f15ad2148, |
83 | | 0x3ff6247eb03a5585, |
84 | | 0x3ff6623882552225, |
85 | | 0x3ff6a09e667f3bcd, |
86 | | 0x3ff6dfb23c651a2f, |
87 | | 0x3ff71f75e8ec5f74, |
88 | | 0x3ff75feb564267c9, |
89 | | 0x3ff7a11473eb0187, |
90 | | 0x3ff7e2f336cf4e62, |
91 | | 0x3ff82589994cce13, |
92 | | 0x3ff868d99b4492ed, |
93 | | 0x3ff8ace5422aa0db, |
94 | | 0x3ff8f1ae99157736, |
95 | | 0x3ff93737b0cdc5e5, |
96 | | 0x3ff97d829fde4e50, |
97 | | 0x3ff9c49182a3f090, |
98 | | 0x3ffa0c667b5de565, |
99 | | 0x3ffa5503b23e255d, |
100 | | 0x3ffa9e6b5579fdbf, |
101 | | 0x3ffae89f995ad3ad, |
102 | | 0x3ffb33a2b84f15fb, |
103 | | 0x3ffb7f76f2fb5e47, |
104 | | 0x3ffbcc1e904bc1d2, |
105 | | 0x3ffc199bdd85529c, |
106 | | 0x3ffc67f12e57d14b, |
107 | | 0x3ffcb720dcef9069, |
108 | | 0x3ffd072d4a07897c, |
109 | | 0x3ffd5818dcfba487, |
110 | | 0x3ffda9e603db3285, |
111 | | 0x3ffdfc97337b9b5f, |
112 | | 0x3ffe502ee78b3ff6, |
113 | | 0x3ffea4afa2a490da, |
114 | | 0x3ffefa1bee615a27, |
115 | | 0x3fff50765b6e4540, |
116 | | 0x3fffa7c1819e90d8, |
117 | | ]; |
118 | | |
119 | | /* ULP 0.508 method |
120 | | let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32; |
121 | | |
122 | | let ui = f32::to_bits(d + redux); |
123 | | let mut i0 = ui; |
124 | | i0 = i0.wrapping_add(TBLSIZE as u32 / 2); |
125 | | let k = i0 / TBLSIZE as u32; |
126 | | i0 &= TBLSIZE as u32 - 1; |
127 | | let mut uf = f32::from_bits(ui); |
128 | | uf -= redux; |
129 | | |
130 | | let item = EXP2FT.0[i0 as usize]; |
131 | | let z0: f32 = f32::from_bits(item.0); |
132 | | let z1: f32 = f32::from_bits(item.1); |
133 | | |
134 | | let f: f32 = d - uf - z1; |
135 | | |
136 | | let mut u = 0.055504108664458832; |
137 | | u = f_fmlaf(u, f, 0.24022650695908768); |
138 | | u = f_fmlaf(u, f, 0.69314718055994973); |
139 | | u *= f; |
140 | | |
141 | | let i2 = pow2if(k as i32); |
142 | | f_fmlaf(u, z0, z0) * i2 |
143 | | */ |
144 | | |
145 | | /// Computing exp2f |
146 | | /// |
147 | | /// ULP 0.4999994 |
148 | | #[inline] |
149 | 0 | pub fn f_exp2f(x: f32) -> f32 { |
150 | 0 | let mut t = x.to_bits(); |
151 | | |
152 | 0 | if (t & 0xffff) == 0 { |
153 | | // x maybe integer |
154 | 0 | let k: i32 = (((t >> 23) & 0xff) as i32).wrapping_sub(127); // 2^k <= |x| < 2^(k+1) |
155 | 0 | if k >= 0 && k < 9 && (t << (9i32.wrapping_add(k))) == 0 { |
156 | | // x integer, with 1 <= |x| < 2^9 |
157 | 0 | let msk = (t as i32) >> 31; |
158 | 0 | let mut m: i32 = (((t & 0x7fffff) | (1 << 23)) >> (23 - k)) as i32; |
159 | 0 | m = (m ^ msk).wrapping_sub(msk).wrapping_add(127); |
160 | 0 | if m > 0 && m < 255 { |
161 | 0 | t = (m as u32).wrapping_shl(23); |
162 | 0 | return f32::from_bits(t); |
163 | 0 | } else if m <= 0 && m > -23 { |
164 | 0 | t = 1i32.wrapping_shl(22i32.wrapping_add(m) as u32) as u32; |
165 | 0 | return f32::from_bits(t); |
166 | 0 | } |
167 | 0 | } |
168 | 0 | } |
169 | 0 | let ux = t.wrapping_shl(1); |
170 | | |
171 | 0 | if ux >= 0x86000000u32 || ux < 0x65000000u32 { |
172 | | // |x| >= 128 or x=nan or |x| < 0x1p-26 |
173 | 0 | if ux < 0x65000000u32 { |
174 | 0 | return 1.0 + x; |
175 | 0 | } // |x| < 0x1p-26 |
176 | | // if x < -149 or 128 <= x is special |
177 | 0 | if !(t >= 0xc3000000u32 && t < 0xc3150000u32) { |
178 | 0 | if ux >= 0xffu32 << 24 { |
179 | | // x is inf or nan |
180 | 0 | if ux > (0xffu32 << 24) { |
181 | 0 | return x + x; |
182 | 0 | } // x = nan |
183 | | static IR: [f32; 2] = [f32::INFINITY, 0.]; |
184 | 0 | return IR[(t >> 31) as usize]; // x = +-inf |
185 | 0 | } |
186 | 0 | if t >= 0xc3150000u32 { |
187 | | // x < -149 |
188 | 0 | let z = x; |
189 | 0 | let mut y = f_fmla( |
190 | 0 | z as f64 + 149., |
191 | 0 | f64::from_bits(0x3690000000000000), |
192 | 0 | f64::from_bits(0x36a0000000000000), |
193 | | ); |
194 | 0 | y = y.max(f64::from_bits(0x3680000000000000)); |
195 | 0 | return y as f32; |
196 | 0 | } |
197 | | // now x >= 128 |
198 | 0 | let r = black_box(f64::from_bits(0x47e0000000000000)) |
199 | 0 | * black_box(f64::from_bits(0x47e0000000000000)); |
200 | 0 | return r as f32; |
201 | 0 | } |
202 | 0 | } |
203 | | |
204 | 0 | if ux <= 0x7a000000u32 { |
205 | | // |x| < 1/32 |
206 | | |
207 | | // Generated by Sollya exp2 on range [-1/32;1/32]: |
208 | | // d = [-1/32, 1/32]; |
209 | | // f_exp2f = (2^y - 1)/y; |
210 | | // Q = fpminimax(f_exp2f, 5, [|D...|], d, relative, floating); |
211 | | |
212 | | // See ./notes/exp2f_small.sollya |
213 | | const C: [u64; 6] = [ |
214 | | 0x3fe62e42fefa39f3, |
215 | | 0x3fcebfbdff82c57b, |
216 | | 0x3fac6b08d6f2d7aa, |
217 | | 0x3f83b2ab6fc92f5d, |
218 | | 0x3f55d897cfe27125, |
219 | | 0x3f243090e61e6af1, |
220 | | ]; |
221 | 0 | let xd = x as f64; |
222 | 0 | let p = f_polyeval6( |
223 | 0 | xd, |
224 | 0 | f64::from_bits(C[0]), |
225 | 0 | f64::from_bits(C[1]), |
226 | 0 | f64::from_bits(C[2]), |
227 | 0 | f64::from_bits(C[3]), |
228 | 0 | f64::from_bits(C[4]), |
229 | 0 | f64::from_bits(C[5]), |
230 | | ); |
231 | 0 | return f_fmla(p, xd, 1.) as f32; |
232 | 0 | } |
233 | | |
234 | 0 | let x_d = x as f64; |
235 | 0 | let kf = (x_d * 64.).cpu_round(); |
236 | 0 | let k = unsafe { kf.to_int_unchecked::<i32>() }; // it's already not indeterminate. |
237 | | // dx = lo = x - (hi + mid) = x - kf * 2^(-6) |
238 | 0 | let dx = f_fmla(f64::from_bits(0xbf90000000000000), kf, x_d); |
239 | | |
240 | | const TABLE_BITS: u32 = 6; |
241 | | const TABLE_MASK: u64 = (1u64 << TABLE_BITS) - 1; |
242 | | |
243 | | // hi = floor(kf * 2^(-5)) |
244 | | // exp_hi = shift hi to the exponent field of double precision. |
245 | 0 | let exp_hi: i64 = ((k >> TABLE_BITS) as i64).wrapping_shl(52); |
246 | | |
247 | | // mh = 2^hi * 2^mid |
248 | | // mh_bits = bit field of mh |
249 | 0 | let mh_bits = (EXP2F_TABLE[((k as u64) & TABLE_MASK) as usize] as i64).wrapping_add(exp_hi); |
250 | 0 | let mh = f64::from_bits(mh_bits as u64); |
251 | | |
252 | | // Degree-4 polynomial approximating (2^x - 1)/x generated by Sollya with: |
253 | | // > P = fpminimax((2^y - 1)/y, 4, [|D...|], [-1/64. 1/64]); |
254 | | // see ./notes/exp2f.sollya |
255 | | const C: [u64; 5] = [ |
256 | | 0x3fe62e42fefa39ef, |
257 | | 0x3fcebfbdff8131c4, |
258 | | 0x3fac6b08d7061695, |
259 | | 0x3f83b2b1bee74b2a, |
260 | | 0x3f55d88091198529, |
261 | | ]; |
262 | 0 | let dx_sq = dx * dx; |
263 | 0 | let c1 = f_fmla(dx, f64::from_bits(C[0]), 1.0); |
264 | 0 | let c2 = f_fmla(dx, f64::from_bits(C[2]), f64::from_bits(C[1])); |
265 | 0 | let c3 = f_fmla(dx, f64::from_bits(C[4]), f64::from_bits(C[3])); |
266 | 0 | let p = f_fmla(dx_sq, c3, c2); |
267 | | // 2^x = 2^(hi + mid + lo) |
268 | | // = 2^(hi + mid) * 2^lo |
269 | | // ~ mh * (1 + lo * P(lo)) |
270 | | // = mh + (mh*lo) * P(lo) |
271 | 0 | f_fmla(p, dx_sq * mh, c1 * mh) as f32 |
272 | 0 | } |
273 | | |
274 | | #[inline] |
275 | 0 | pub(crate) fn dirty_exp2f(d: f32) -> f32 { |
276 | 0 | let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32; |
277 | | |
278 | 0 | let ui = f32::to_bits(d + redux); |
279 | 0 | let mut i0 = ui; |
280 | 0 | i0 = i0.wrapping_add(TBLSIZE as u32 / 2); |
281 | 0 | let k = i0 / TBLSIZE as u32; |
282 | 0 | i0 &= TBLSIZE as u32 - 1; |
283 | 0 | let mut uf = f32::from_bits(ui); |
284 | 0 | uf -= redux; |
285 | | |
286 | 0 | let item = EXP2FT.0[i0 as usize]; |
287 | 0 | let z0: f32 = f32::from_bits(item.0); |
288 | | |
289 | 0 | let f: f32 = d - uf; |
290 | | |
291 | 0 | let mut u = 0.24022650695908768; |
292 | 0 | u = f_fmlaf(u, f, 0.69314718055994973); |
293 | 0 | u *= f; |
294 | | |
295 | 0 | let i2 = pow2if(k as i32); |
296 | 0 | f_fmlaf(u, z0, z0) * i2 |
297 | 0 | } Unexecuted instantiation: pxfm::exponents::exp2f::dirty_exp2f Unexecuted instantiation: pxfm::exponents::exp2f::dirty_exp2f |
298 | | |
299 | | #[cfg(test)] |
300 | | mod tests { |
301 | | use super::*; |
302 | | |
303 | | #[test] |
304 | | fn test_exp2f() { |
305 | | assert_eq!(f_exp2f(1. / 64.), 1.0108893); |
306 | | assert_eq!(f_exp2f(2.0), 4.0); |
307 | | assert_eq!(f_exp2f(3.0), 8.0); |
308 | | assert_eq!(f_exp2f(4.0), 16.0); |
309 | | assert_eq!(f_exp2f(10.0), 1024.0); |
310 | | assert_eq!(f_exp2f(-10.0), 0.0009765625); |
311 | | assert!(f_exp2f(f32::NAN).is_nan()); |
312 | | assert_eq!(f_exp2f(-0.35), 0.7845841); |
313 | | assert_eq!(f_exp2f(0.35), 1.2745606); |
314 | | assert!(f_exp2f(f32::INFINITY).is_infinite()); |
315 | | assert_eq!(f_exp2f(f32::NEG_INFINITY), 0.0); |
316 | | } |
317 | | |
318 | | #[test] |
319 | | fn test_dirty_exp2f() { |
320 | | assert!((dirty_exp2f(0.35f32) - 0.35f32.exp2()).abs() < 1e-5); |
321 | | assert!((dirty_exp2f(-0.6f32) - (-0.6f32).exp2()).abs() < 1e-5); |
322 | | } |
323 | | } |