/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.26/src/exponents/exp.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::{f_fmla, fmla, pow2i, rintk}; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::exponents::auxiliary::fast_ldexp; |
32 | | use crate::rounding::CpuRound; |
33 | | |
34 | | /// Exp for given value for const context. |
35 | | /// This is simplified version just to make a good approximation on const context. |
36 | | #[inline] |
37 | 0 | pub const fn exp(d: f64) -> f64 { |
38 | | const EXP_POLY_1_D: f64 = 2f64; |
39 | | const EXP_POLY_2_D: f64 = 0.16666666666666674f64; |
40 | | const EXP_POLY_3_D: f64 = -0.0027777777777777614f64; |
41 | | const EXP_POLY_4_D: f64 = 6.613756613755705e-5f64; |
42 | | const EXP_POLY_5_D: f64 = -1.6534391534392554e-6f64; |
43 | | const EXP_POLY_6_D: f64 = 4.17535139757361979584e-8f64; |
44 | | |
45 | | const L2_U: f64 = 0.693_147_180_559_662_956_511_601_805_686_950_683_593_75; |
46 | | const L2_L: f64 = 0.282_352_905_630_315_771_225_884_481_750_134_360_255_254_120_68_e-12; |
47 | | const R_LN2: f64 = |
48 | | 1.442_695_040_888_963_407_359_924_681_001_892_137_426_645_954_152_985_934_135_449_406_931; |
49 | | |
50 | 0 | let qf = rintk(d * R_LN2); |
51 | 0 | let q = qf as i32; |
52 | | |
53 | 0 | let mut r = fmla(qf, -L2_U, d); |
54 | 0 | r = fmla(qf, -L2_L, r); |
55 | | |
56 | 0 | let f = r * r; |
57 | | // Poly for u = r*(exp(r)+1)/(exp(r)-1) |
58 | 0 | let mut u = EXP_POLY_6_D; |
59 | 0 | u = fmla(u, f, EXP_POLY_5_D); |
60 | 0 | u = fmla(u, f, EXP_POLY_4_D); |
61 | 0 | u = fmla(u, f, EXP_POLY_3_D); |
62 | 0 | u = fmla(u, f, EXP_POLY_2_D); |
63 | 0 | u = fmla(u, f, EXP_POLY_1_D); |
64 | 0 | let u = 1f64 + 2f64 * r / (u - r); |
65 | 0 | let i2 = pow2i(q); |
66 | 0 | u * i2 |
67 | | // if d < -964f64 { |
68 | | // r = 0f64; |
69 | | // } |
70 | | // if d > 709f64 { |
71 | | // r = f64::INFINITY; |
72 | | // } |
73 | 0 | } |
74 | | |
75 | | pub(crate) static EXP_REDUCE_T0: [(u64, u64); 64] = [ |
76 | | (0x0000000000000000, 0x3ff0000000000000), |
77 | | (0xbc719083535b085e, 0x3ff02c9a3e778061), |
78 | | (0x3c8d73e2a475b466, 0x3ff059b0d3158574), |
79 | | (0x3c6186be4bb28500, 0x3ff0874518759bc8), |
80 | | (0x3c98a62e4adc610a, 0x3ff0b5586cf9890f), |
81 | | (0x3c403a1727c57b52, 0x3ff0e3ec32d3d1a2), |
82 | | (0xbc96c51039449b3a, 0x3ff11301d0125b51), |
83 | | (0xbc932fbf9af1369e, 0x3ff1429aaea92de0), |
84 | | (0xbc819041b9d78a76, 0x3ff172b83c7d517b), |
85 | | (0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75), |
86 | | (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa), |
87 | | (0x3c8dc775814a8494, 0x3ff2063b88628cd6), |
88 | | (0x3c99b07eb6c70572, 0x3ff2387a6e756238), |
89 | | (0x3c82bd339940e9da, 0x3ff26b4565e27cdd), |
90 | | (0x3c8612e8afad1256, 0x3ff29e9df51fdee1), |
91 | | (0x3c90024754db41d4, 0x3ff2d285a6e4030b), |
92 | | (0x3c86f46ad23182e4, 0x3ff306fe0a31b715), |
93 | | (0x3c932721843659a6, 0x3ff33c08b26416ff), |
94 | | (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb), |
95 | | (0xbc75e436d661f5e2, 0x3ff3a7db34e59ff7), |
96 | | (0x3c8ada0911f09ebc, 0x3ff3dea64c123422), |
97 | | (0xbc5ef3691c309278, 0x3ff4160a21f72e2a), |
98 | | (0x3c489b7a04ef80d0, 0x3ff44e086061892d), |
99 | | (0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0), |
100 | | (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27), |
101 | | (0xbc94b309d25957e4, 0x3ff4f9b2769d2ca7), |
102 | | (0xbc807abe1db13cac, 0x3ff5342b569d4f82), |
103 | | (0x3c99bb2c011d93ac, 0x3ff56f4736b527da), |
104 | | (0x3c96324c054647ac, 0x3ff5ab07dd485429), |
105 | | (0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148), |
106 | | (0xbc9383c17e40b496, 0x3ff6247eb03a5585), |
107 | | (0xbc9bb60987591c34, 0x3ff6623882552225), |
108 | | (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd), |
109 | | (0xbc6bbe3a683c88aa, 0x3ff6dfb23c651a2f), |
110 | | (0xbc816e4786887a9a, 0x3ff71f75e8ec5f74), |
111 | | (0xbc90245957316dd4, 0x3ff75feb564267c9), |
112 | | (0xbc841577ee049930, 0x3ff7a11473eb0187), |
113 | | (0x3c705d02ba15797e, 0x3ff7e2f336cf4e62), |
114 | | (0xbc9d4c1dd41532d8, 0x3ff82589994cce13), |
115 | | (0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed), |
116 | | (0x3c96e9f156864b26, 0x3ff8ace5422aa0db), |
117 | | (0x3c85cc13a2e3976c, 0x3ff8f1ae99157736), |
118 | | (0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5), |
119 | | (0xbc9d185b7c1b85d0, 0x3ff97d829fde4e50), |
120 | | (0x3c7c7c46b071f2be, 0x3ff9c49182a3f090), |
121 | | (0xbc9359495d1cd532, 0x3ffa0c667b5de565), |
122 | | (0xbc9d2f6edb8d41e2, 0x3ffa5503b23e255d), |
123 | | (0x3c90fac90ef7fd32, 0x3ffa9e6b5579fdbf), |
124 | | (0x3c97a1cd345dcc82, 0x3ffae89f995ad3ad), |
125 | | (0xbc62805e3084d708, 0x3ffb33a2b84f15fb), |
126 | | (0xbc75584f7e54ac3a, 0x3ffb7f76f2fb5e47), |
127 | | (0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2), |
128 | | (0x3c811065895048de, 0x3ffc199bdd85529c), |
129 | | (0x3c92884dff483cac, 0x3ffc67f12e57d14b), |
130 | | (0x3c7503cbd1e949dc, 0x3ffcb720dcef9069), |
131 | | (0xbc9cbc3743797a9c, 0x3ffd072d4a07897c), |
132 | | (0x3c82ed02d75b3706, 0x3ffd5818dcfba487), |
133 | | (0x3c9c2300696db532, 0x3ffda9e603db3285), |
134 | | (0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f), |
135 | | (0x3c839e8980a9cc90, 0x3ffe502ee78b3ff6), |
136 | | (0xbc9e9c23179c2894, 0x3ffea4afa2a490da), |
137 | | (0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27), |
138 | | (0x3c99d3e12dd8a18a, 0x3fff50765b6e4540), |
139 | | (0x3c874853f3a5931e, 0x3fffa7c1819e90d8), |
140 | | ]; |
141 | | |
142 | | pub(crate) static EXP_REDUCE_T1: [(u64, u64); 64] = [ |
143 | | (0x0000000000000000, 0x3ff0000000000000), |
144 | | (0x3c9ae8e38c59c72a, 0x3ff000b175effdc7), |
145 | | (0xbc57b5d0d58ea8f4, 0x3ff00162f3904052), |
146 | | (0x3c94115cb6b16a8e, 0x3ff0021478e11ce6), |
147 | | (0xbc8d7c96f201bb2e, 0x3ff002c605e2e8cf), |
148 | | (0x3c984711d4c35ea0, 0x3ff003779a95f959), |
149 | | (0xbc80484245243778, 0x3ff0042936faa3d8), |
150 | | (0xbc94b237da2025fa, 0x3ff004dadb113da0), |
151 | | (0xbc75e00e62d6b30e, 0x3ff0058c86da1c0a), |
152 | | (0x3c9a1d6cedbb9480, 0x3ff0063e3a559473), |
153 | | (0xbc94acf197a00142, 0x3ff006eff583fc3d), |
154 | | (0xbc6eaf2ea42391a6, 0x3ff007a1b865a8ca), |
155 | | (0x3c7da93f90835f76, 0x3ff0085382faef83), |
156 | | (0xbc86a79084ab093c, 0x3ff00905554425d4), |
157 | | (0x3c986364f8fbe8f8, 0x3ff009b72f41a12b), |
158 | | (0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd), |
159 | | (0xbc84f6b2a7609f72, 0x3ff00b1afa5abcbf), |
160 | | (0xbc7e1a258ea8f71a, 0x3ff00bcceb7707ec), |
161 | | (0x3c74362ca5bc26f2, 0x3ff00c7ee448ee02), |
162 | | (0x3c9095a56c919d02, 0x3ff00d30e4d0c483), |
163 | | (0xbc6406ac4e81a646, 0x3ff00de2ed0ee0f5), |
164 | | (0x3c9b5a6902767e08, 0x3ff00e94fd0398e0), |
165 | | (0xbc991b2060859320, 0x3ff00f4714af41d3), |
166 | | (0x3c8427068ab22306, 0x3ff00ff93412315c), |
167 | | (0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11), |
168 | | (0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b), |
169 | | (0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63), |
170 | | (0xbc734104ee7edae8, 0x3ff012c1fecd613b), |
171 | | (0xbc72b6aeb6176892, 0x3ff0137444c9b5b5), |
172 | | (0x3c7a8cd33b8a1bb2, 0x3ff01426927f5278), |
173 | | (0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f), |
174 | | (0x3c857ba2dc7e0c72, 0x3ff0158b4517bb88), |
175 | | (0x3c9b61299ab8cdb8, 0x3ff0163da9fb3335), |
176 | | (0xbc990565902c5f44, 0x3ff016f0169949ed), |
177 | | (0x3c870fc41c5c2d54, 0x3ff017a28af25567), |
178 | | (0x3c94b9a6e145d76c, 0x3ff018550706ab62), |
179 | | (0xbc7008eff5142bfa, 0x3ff019078ad6a19f), |
180 | | (0xbc977669f033c7de, 0x3ff019ba16628de2), |
181 | | (0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3), |
182 | | (0x3c9371231477ece6, 0x3ff01b1f44af9f9e), |
183 | | (0x3c75e7626621eb5a, 0x3ff01bd1e77170b4), |
184 | | (0xbc9bc72b100828a4, 0x3ff01c8491f08f08), |
185 | | (0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070), |
186 | | (0x3c816996709da2e2, 0x3ff01de9fe280ac8), |
187 | | (0xbc8c11f5239bf536, 0x3ff01e9cbfe113ef), |
188 | | (0x3c8e1d4eb5edc6b4, 0x3ff01f4f8958c1c6), |
189 | | (0xbc9afb99946ee3f0, 0x3ff020025a8f6a35), |
190 | | (0xbc98f06d8a148a32, 0x3ff020b533856324), |
191 | | (0xbc82bf310fc54eb6, 0x3ff02168143b0281), |
192 | | (0xbc9c95a035eb4176, 0x3ff0221afcb09e3e), |
193 | | (0xbc9491793e46834c, 0x3ff022cdece68c4f), |
194 | | (0xbc73e8d0d9c49090, 0x3ff02380e4dd22ad), |
195 | | (0xbc9314aa16278aa4, 0x3ff02433e494b755), |
196 | | (0x3c848daf888e9650, 0x3ff024e6ec0da046), |
197 | | (0x3c856dc8046821f4, 0x3ff02599fb483385), |
198 | | (0x3c945b42356b9d46, 0x3ff0264d1244c719), |
199 | | (0xbc7082ef51b61d7e, 0x3ff027003103b10e), |
200 | | (0x3c72106ed0920a34, 0x3ff027b357854772), |
201 | | (0xbc9fd4cf26ea5d0e, 0x3ff0286685c9e059), |
202 | | (0xbc909f8775e78084, 0x3ff02919bbd1d1d8), |
203 | | (0x3c564cbba902ca28, 0x3ff029ccf99d720a), |
204 | | (0x3c94383ef231d206, 0x3ff02a803f2d170d), |
205 | | (0x3c94a47a505b3a46, 0x3ff02b338c811703), |
206 | | (0x3c9e471202234680, 0x3ff02be6e199c811), |
207 | | ]; |
208 | | |
209 | | // sets the exponent of a binary64 number to 0 (subnormal range) |
210 | | #[inline] |
211 | 0 | pub(crate) fn to_denormal(x: f64) -> f64 { |
212 | 0 | let mut ix = x.to_bits(); |
213 | 0 | ix &= 0x000fffffffffffff; |
214 | 0 | f64::from_bits(ix) |
215 | 0 | } |
216 | | |
217 | | #[inline] |
218 | 0 | fn exp_poly_dd(z: DoubleDouble) -> DoubleDouble { |
219 | | const C: [(u64, u64); 7] = [ |
220 | | (0x0000000000000000, 0x3ff0000000000000), |
221 | | (0x39c712f72ecec2cf, 0x3fe0000000000000), |
222 | | (0x3c65555555554d07, 0x3fc5555555555555), |
223 | | (0x3c455194d28275da, 0x3fa5555555555555), |
224 | | (0x3c012faa0e1c0f7b, 0x3f81111111111111), |
225 | | (0xbbf4ba45ab25d2a3, 0x3f56c16c16da6973), |
226 | | (0xbbc9091d845ecd36, 0x3f2a01a019eb7f31), |
227 | | ]; |
228 | 0 | let mut r = DoubleDouble::quick_mul_add( |
229 | 0 | DoubleDouble::from_bit_pair(C[6]), |
230 | 0 | z, |
231 | 0 | DoubleDouble::from_bit_pair(C[5]), |
232 | | ); |
233 | 0 | r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[4])); |
234 | 0 | r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[3])); |
235 | 0 | r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[2])); |
236 | 0 | r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[1])); |
237 | 0 | DoubleDouble::quick_mul_add_f64(r, z, f64::from_bits(0x3ff0000000000000)) |
238 | 0 | } |
239 | | |
240 | | #[cold] |
241 | 0 | fn as_exp_accurate(x: f64, t: f64, tz: DoubleDouble, ie: i64) -> f64 { |
242 | 0 | let mut ix = x.to_bits(); |
243 | 0 | if ((ix >> 52) & 0x7ff) < 0x3c9 { |
244 | 0 | return 1. + x; |
245 | 0 | } |
246 | | |
247 | | /* Use Cody-Waite argument reduction: since |x| < 745, we have |t| < 2^23, |
248 | | thus since l2h is exactly representable on 29 bits, l2h*t is exact. */ |
249 | | const L2: DoubleDouble = DoubleDouble::new( |
250 | | f64::from_bits(0x3d0718432a1b0e26), |
251 | | f64::from_bits(0x3f262e42ff000000), |
252 | | ); |
253 | | const L2LL: f64 = f64::from_bits(0x3999ff0342542fc3); |
254 | 0 | let dx = f_fmla(-L2.hi, t, x); |
255 | 0 | let dx_dd = DoubleDouble::quick_mult_f64(DoubleDouble::new(L2LL, L2.lo), t); |
256 | 0 | let dz = DoubleDouble::full_add_f64(dx_dd, dx); |
257 | 0 | let mut f = exp_poly_dd(dz); |
258 | 0 | f = DoubleDouble::quick_mult(dz, f); |
259 | 0 | if ix > 0xc086232bdd7abcd2u64 { |
260 | 0 | // x < -708.396 |
261 | 0 | ix = 1i64.wrapping_sub(ie).wrapping_shl(52) as u64; |
262 | 0 | f = DoubleDouble::quick_mult(f, tz); |
263 | 0 | f = DoubleDouble::add(tz, f); |
264 | 0 |
|
265 | 0 | let new_f = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi); |
266 | 0 | f.lo += new_f.lo; |
267 | 0 | f.hi = to_denormal(f.hi + f.lo); |
268 | 0 | } else { |
269 | 0 | if tz.hi == 1.0 { |
270 | 0 | let fhe = DoubleDouble::from_exact_add(tz.hi, f.hi); |
271 | 0 | let fhl = DoubleDouble::from_exact_add(fhe.lo, f.lo); |
272 | 0 | f.hi = fhe.hi; |
273 | 0 | f.lo = fhl.hi; |
274 | 0 | ix = f.lo.to_bits(); |
275 | 0 | if (ix & 0x000fffffffffffff) == 0 { |
276 | 0 | let v = fhl.lo.to_bits(); |
277 | 0 | let d: i64 = (((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64).wrapping_shl(1) |
278 | 0 | as i64) |
279 | 0 | .wrapping_add(1); |
280 | 0 | ix = ix.wrapping_add(d as u64); |
281 | 0 | f.lo = f64::from_bits(ix); |
282 | 0 | } |
283 | 0 | } else { |
284 | 0 | f = DoubleDouble::quick_mult(f, tz); |
285 | 0 | f = DoubleDouble::add(tz, f); |
286 | 0 | } |
287 | 0 | f = DoubleDouble::from_exact_add(f.hi, f.lo); |
288 | 0 | f.hi = fast_ldexp(f.hi, ie as i32); |
289 | | } |
290 | 0 | f.hi |
291 | 0 | } |
292 | | |
293 | | /// Computes exponent |
294 | | /// |
295 | | /// Max found ULP 0.5 |
296 | 0 | pub fn f_exp(x: f64) -> f64 { |
297 | 0 | let mut ix = x.to_bits(); |
298 | 0 | let aix = ix & 0x7fffffffffffffff; |
299 | | // exp(x) rounds to 1 to nearest for |x| <= 5.55112e-17 |
300 | 0 | if aix <= 0x3c90000000000000u64 { |
301 | | // |x| <= 5.55112e-17 |
302 | 0 | return 1.0 + x; |
303 | 0 | } |
304 | 0 | if aix >= 0x40862e42fefa39f0u64 { |
305 | | // |x| >= 709.783 |
306 | 0 | if aix > 0x7ff0000000000000u64 { |
307 | 0 | return x + x; |
308 | 0 | } // nan |
309 | 0 | if aix == 0x7ff0000000000000u64 { |
310 | | // |x| = inf |
311 | 0 | return if (ix >> 63) != 0 { |
312 | 0 | 0.0 // x = -inf |
313 | | } else { |
314 | 0 | x // x = inf |
315 | | }; |
316 | 0 | } |
317 | 0 | if (ix >> 63) == 0 { |
318 | | // x >= 709.783 |
319 | 0 | let z = std::hint::black_box(f64::from_bits(0x7fe0000000000000)); |
320 | 0 | return z * z; |
321 | 0 | } |
322 | 0 | if aix >= 0x40874910d52d3052u64 { |
323 | | // x <= -745.133 |
324 | 0 | return f64::from_bits(0x18000000000000) * f64::from_bits(0x3c80000000000000); |
325 | 0 | } |
326 | 0 | } |
327 | | const S: f64 = f64::from_bits(0x40b71547652b82fe); |
328 | 0 | let t = (x * S).cpu_round(); |
329 | 0 | let jt: i64 = unsafe { |
330 | 0 | t.to_int_unchecked::<i64>() // this is already finite here |
331 | | }; |
332 | 0 | let i0: i64 = (jt >> 6) & 0x3f; |
333 | 0 | let i1 = jt & 0x3f; |
334 | 0 | let ie: i64 = jt >> 12; |
335 | 0 | let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]); |
336 | 0 | let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]); |
337 | 0 | let tz = DoubleDouble::quick_mult(t0, t1); |
338 | | |
339 | | const L2: DoubleDouble = DoubleDouble::new( |
340 | | f64::from_bits(0x3d0718432a1b0e26), |
341 | | f64::from_bits(0x3f262e42ff000000), |
342 | | ); |
343 | | |
344 | | /* Use Cody-Waite argument reduction: since |x| < 745, we have |t| < 2^23, |
345 | | thus since l2h is exactly representable on 29 bits, l2h*t is exact. */ |
346 | 0 | let dx = f_fmla(L2.lo, t, f_fmla(-L2.hi, t, x)); |
347 | 0 | let dx2 = dx * dx; |
348 | | const CH: [u64; 4] = [ |
349 | | 0x3ff0000000000000, |
350 | | 0x3fe0000000000000, |
351 | | 0x3fc55555557e54ff, |
352 | | 0x3fa55555553a12f4, |
353 | | ]; |
354 | | |
355 | 0 | let pw0 = f_fmla(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2])); |
356 | 0 | let pw1 = f_fmla(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0])); |
357 | | |
358 | 0 | let p = f_fmla(dx2, pw0, pw1); |
359 | 0 | let mut f = DoubleDouble::new(f_fmla(tz.hi * dx, p, tz.lo), tz.hi); |
360 | | const EPS: f64 = f64::from_bits(0x3c0833beace2b6fe); |
361 | 0 | if ix > 0xc086232bdd7abcd2u64 { |
362 | | // subnormal case: x < -708.396 |
363 | 0 | ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52); |
364 | 0 | let sums = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi); |
365 | 0 | f.hi = sums.hi; |
366 | 0 | f.lo += sums.lo; |
367 | 0 | let ub = f.hi + (f.lo + EPS); |
368 | 0 | let lb = f.hi + (f.lo - EPS); |
369 | 0 | if ub != lb { |
370 | 0 | return as_exp_accurate(x, t, tz, ie); |
371 | 0 | } |
372 | 0 | f.hi = to_denormal(lb); |
373 | | } else { |
374 | 0 | let ub = f.hi + (f.lo + EPS); |
375 | 0 | let lb = f.hi + (f.lo - EPS); |
376 | 0 | if ub != lb { |
377 | 0 | return as_exp_accurate(x, t, tz, ie); |
378 | 0 | } |
379 | 0 | f.hi = fast_ldexp(lb, ie as i32); |
380 | | } |
381 | 0 | f.hi |
382 | 0 | } |
383 | | |
384 | | #[cfg(test)] |
385 | | mod tests { |
386 | | use super::*; |
387 | | |
388 | | #[test] |
389 | | fn exp_test() { |
390 | | assert!( |
391 | | (exp(0f64) - 1f64).abs() < 1e-8, |
392 | | "Invalid result {}", |
393 | | exp(0f64) |
394 | | ); |
395 | | assert!( |
396 | | (exp(5f64) - 148.4131591025766034211155800405522796f64).abs() < 1e-8, |
397 | | "Invalid result {}", |
398 | | exp(5f64) |
399 | | ); |
400 | | } |
401 | | |
402 | | #[test] |
403 | | fn f_exp_test() { |
404 | | assert_eq!(f_exp(0.000000014901161193847656), 1.0000000149011614); |
405 | | assert_eq!(f_exp(0.), 1.); |
406 | | assert_eq!(f_exp(5f64), 148.4131591025766034211155800405522796f64); |
407 | | assert_eq!(f_exp(f64::INFINITY), f64::INFINITY); |
408 | | assert_eq!(f_exp(f64::NEG_INFINITY), 0.); |
409 | | assert!(f_exp(f64::NAN).is_nan()); |
410 | | } |
411 | | } |