/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/exponents/exp2m1.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::{dd_fmla, dyad_fmla, f_fmla}; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::exponents::fast_ldexp; |
32 | | use crate::rounding::CpuFloor; |
33 | | use crate::rounding::CpuRoundTiesEven; |
34 | | |
35 | | const LN2H: f64 = f64::from_bits(0x3fe62e42fefa39ef); |
36 | | const LN2L: f64 = f64::from_bits(0x3c7abc9e3b39803f); |
37 | | |
38 | | struct Exp2m1 { |
39 | | exp: DoubleDouble, |
40 | | err: f64, |
41 | | } |
42 | | |
43 | | /* For 0 <= i < 64, T1[i] = (h,l) such that h+l is the best double-double |
44 | | approximation of 2^(i/64). The approximation error is bounded as follows: |
45 | | |h + l - 2^(i/64)| < 2^-107. */ |
46 | | pub(crate) static EXP_M1_2_TABLE1: [(u64, u64); 64] = [ |
47 | | (0x0000000000000000, 0x3ff0000000000000), |
48 | | (0xbc719083535b085d, 0x3ff02c9a3e778061), |
49 | | (0x3c8d73e2a475b465, 0x3ff059b0d3158574), |
50 | | (0x3c6186be4bb284ff, 0x3ff0874518759bc8), |
51 | | (0x3c98a62e4adc610b, 0x3ff0b5586cf9890f), |
52 | | (0x3c403a1727c57b53, 0x3ff0e3ec32d3d1a2), |
53 | | (0xbc96c51039449b3a, 0x3ff11301d0125b51), |
54 | | (0xbc932fbf9af1369e, 0x3ff1429aaea92de0), |
55 | | (0xbc819041b9d78a76, 0x3ff172b83c7d517b), |
56 | | (0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75), |
57 | | (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa), |
58 | | (0x3c8dc775814a8495, 0x3ff2063b88628cd6), |
59 | | (0x3c99b07eb6c70573, 0x3ff2387a6e756238), |
60 | | (0x3c82bd339940e9d9, 0x3ff26b4565e27cdd), |
61 | | (0x3c8612e8afad1255, 0x3ff29e9df51fdee1), |
62 | | (0x3c90024754db41d5, 0x3ff2d285a6e4030b), |
63 | | (0x3c86f46ad23182e4, 0x3ff306fe0a31b715), |
64 | | (0x3c932721843659a6, 0x3ff33c08b26416ff), |
65 | | (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb), |
66 | | (0xbc75e436d661f5e3, 0x3ff3a7db34e59ff7), |
67 | | (0x3c8ada0911f09ebc, 0x3ff3dea64c123422), |
68 | | (0xbc5ef3691c309278, 0x3ff4160a21f72e2a), |
69 | | (0x3c489b7a04ef80d0, 0x3ff44e086061892d), |
70 | | (0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0), |
71 | | (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27), |
72 | | (0xbc94b309d25957e3, 0x3ff4f9b2769d2ca7), |
73 | | (0xbc807abe1db13cad, 0x3ff5342b569d4f82), |
74 | | (0x3c99bb2c011d93ad, 0x3ff56f4736b527da), |
75 | | (0x3c96324c054647ad, 0x3ff5ab07dd485429), |
76 | | (0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148), |
77 | | (0xbc9383c17e40b497, 0x3ff6247eb03a5585), |
78 | | (0xbc9bb60987591c34, 0x3ff6623882552225), |
79 | | (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd), |
80 | | (0xbc6bbe3a683c88ab, 0x3ff6dfb23c651a2f), |
81 | | (0xbc816e4786887a99, 0x3ff71f75e8ec5f74), |
82 | | (0xbc90245957316dd3, 0x3ff75feb564267c9), |
83 | | (0xbc841577ee04992f, 0x3ff7a11473eb0187), |
84 | | (0x3c705d02ba15797e, 0x3ff7e2f336cf4e62), |
85 | | (0xbc9d4c1dd41532d8, 0x3ff82589994cce13), |
86 | | (0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed), |
87 | | (0x3c96e9f156864b27, 0x3ff8ace5422aa0db), |
88 | | (0x3c85cc13a2e3976c, 0x3ff8f1ae99157736), |
89 | | (0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5), |
90 | | (0xbc9d185b7c1b85d1, 0x3ff97d829fde4e50), |
91 | | (0x3c7c7c46b071f2be, 0x3ff9c49182a3f090), |
92 | | (0xbc9359495d1cd533, 0x3ffa0c667b5de565), |
93 | | (0xbc9d2f6edb8d41e1, 0x3ffa5503b23e255d), |
94 | | (0x3c90fac90ef7fd31, 0x3ffa9e6b5579fdbf), |
95 | | (0x3c97a1cd345dcc81, 0x3ffae89f995ad3ad), |
96 | | (0xbc62805e3084d708, 0x3ffb33a2b84f15fb), |
97 | | (0xbc75584f7e54ac3b, 0x3ffb7f76f2fb5e47), |
98 | | (0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2), |
99 | | (0x3c811065895048dd, 0x3ffc199bdd85529c), |
100 | | (0x3c92884dff483cad, 0x3ffc67f12e57d14b), |
101 | | (0x3c7503cbd1e949db, 0x3ffcb720dcef9069), |
102 | | (0xbc9cbc3743797a9c, 0x3ffd072d4a07897c), |
103 | | (0x3c82ed02d75b3707, 0x3ffd5818dcfba487), |
104 | | (0x3c9c2300696db532, 0x3ffda9e603db3285), |
105 | | (0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f), |
106 | | (0x3c839e8980a9cc8f, 0x3ffe502ee78b3ff6), |
107 | | (0xbc9e9c23179c2893, 0x3ffea4afa2a490da), |
108 | | (0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27), |
109 | | (0x3c99d3e12dd8a18b, 0x3fff50765b6e4540), |
110 | | (0x3c874853f3a5931e, 0x3fffa7c1819e90d8), |
111 | | ]; |
112 | | |
113 | | /* For 0 <= i < 64, T2[i] = (h,l) such that h+l is the best double-double |
114 | | approximation of 2^(i/2^12). The approximation error is bounded as follows: |
115 | | |h + l - 2^(i/2^12)| < 2^-107. */ |
116 | | pub(crate) static EXP_M1_2_TABLE2: [(u64, u64); 64] = [ |
117 | | (0x0000000000000000, 0x3ff0000000000000), |
118 | | (0x3c9ae8e38c59c72a, 0x3ff000b175effdc7), |
119 | | (0xbc57b5d0d58ea8f4, 0x3ff00162f3904052), |
120 | | (0x3c94115cb6b16a8e, 0x3ff0021478e11ce6), |
121 | | (0xbc8d7c96f201bb2f, 0x3ff002c605e2e8cf), |
122 | | (0x3c984711d4c35e9f, 0x3ff003779a95f959), |
123 | | (0xbc80484245243777, 0x3ff0042936faa3d8), |
124 | | (0xbc94b237da2025f9, 0x3ff004dadb113da0), |
125 | | (0xbc75e00e62d6b30d, 0x3ff0058c86da1c0a), |
126 | | (0x3c9a1d6cedbb9481, 0x3ff0063e3a559473), |
127 | | (0xbc94acf197a00142, 0x3ff006eff583fc3d), |
128 | | (0xbc6eaf2ea42391a5, 0x3ff007a1b865a8ca), |
129 | | (0x3c7da93f90835f75, 0x3ff0085382faef83), |
130 | | (0xbc86a79084ab093c, 0x3ff00905554425d4), |
131 | | (0x3c986364f8fbe8f8, 0x3ff009b72f41a12b), |
132 | | (0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd), |
133 | | (0xbc84f6b2a7609f71, 0x3ff00b1afa5abcbf), |
134 | | (0xbc7e1a258ea8f71b, 0x3ff00bcceb7707ec), |
135 | | (0x3c74362ca5bc26f1, 0x3ff00c7ee448ee02), |
136 | | (0x3c9095a56c919d02, 0x3ff00d30e4d0c483), |
137 | | (0xbc6406ac4e81a645, 0x3ff00de2ed0ee0f5), |
138 | | (0x3c9b5a6902767e09, 0x3ff00e94fd0398e0), |
139 | | (0xbc991b2060859321, 0x3ff00f4714af41d3), |
140 | | (0x3c8427068ab22306, 0x3ff00ff93412315c), |
141 | | (0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11), |
142 | | (0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b), |
143 | | (0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63), |
144 | | (0xbc734104ee7edae9, 0x3ff012c1fecd613b), |
145 | | (0xbc72b6aeb6176892, 0x3ff0137444c9b5b5), |
146 | | (0x3c7a8cd33b8a1bb3, 0x3ff01426927f5278), |
147 | | (0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f), |
148 | | (0x3c857ba2dc7e0c73, 0x3ff0158b4517bb88), |
149 | | (0x3c9b61299ab8cdb7, 0x3ff0163da9fb3335), |
150 | | (0xbc990565902c5f44, 0x3ff016f0169949ed), |
151 | | (0x3c870fc41c5c2d53, 0x3ff017a28af25567), |
152 | | (0x3c94b9a6e145d76c, 0x3ff018550706ab62), |
153 | | (0xbc7008eff5142bf9, 0x3ff019078ad6a19f), |
154 | | (0xbc977669f033c7de, 0x3ff019ba16628de2), |
155 | | (0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3), |
156 | | (0x3c9371231477ece5, 0x3ff01b1f44af9f9e), |
157 | | (0x3c75e7626621eb5b, 0x3ff01bd1e77170b4), |
158 | | (0xbc9bc72b100828a5, 0x3ff01c8491f08f08), |
159 | | (0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070), |
160 | | (0x3c816996709da2e2, 0x3ff01de9fe280ac8), |
161 | | (0xbc8c11f5239bf535, 0x3ff01e9cbfe113ef), |
162 | | (0x3c8e1d4eb5edc6b3, 0x3ff01f4f8958c1c6), |
163 | | (0xbc9afb99946ee3f0, 0x3ff020025a8f6a35), |
164 | | (0xbc98f06d8a148a32, 0x3ff020b533856324), |
165 | | (0xbc82bf310fc54eb6, 0x3ff02168143b0281), |
166 | | (0xbc9c95a035eb4175, 0x3ff0221afcb09e3e), |
167 | | (0xbc9491793e46834d, 0x3ff022cdece68c4f), |
168 | | (0xbc73e8d0d9c49091, 0x3ff02380e4dd22ad), |
169 | | (0xbc9314aa16278aa3, 0x3ff02433e494b755), |
170 | | (0x3c848daf888e9651, 0x3ff024e6ec0da046), |
171 | | (0x3c856dc8046821f4, 0x3ff02599fb483385), |
172 | | (0x3c945b42356b9d47, 0x3ff0264d1244c719), |
173 | | (0xbc7082ef51b61d7e, 0x3ff027003103b10e), |
174 | | (0x3c72106ed0920a34, 0x3ff027b357854772), |
175 | | (0xbc9fd4cf26ea5d0f, 0x3ff0286685c9e059), |
176 | | (0xbc909f8775e78084, 0x3ff02919bbd1d1d8), |
177 | | (0x3c564cbba902ca27, 0x3ff029ccf99d720a), |
178 | | (0x3c94383ef231d207, 0x3ff02a803f2d170d), |
179 | | (0x3c94a47a505b3a47, 0x3ff02b338c811703), |
180 | | (0x3c9e47120223467f, 0x3ff02be6e199c811), |
181 | | ]; |
182 | | |
183 | | // Approximation for the fast path of exp(z) for z=zh+zl, |
184 | | // with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6 |
185 | | // (assuming x^y does not overflow or underflow) |
186 | | #[inline] |
187 | 0 | fn q_1(dz: DoubleDouble) -> DoubleDouble { |
188 | | const Q_1: [u64; 5] = [ |
189 | | 0x3ff0000000000000, |
190 | | 0x3ff0000000000000, |
191 | | 0x3fe0000000000000, |
192 | | 0x3fc5555555995d37, |
193 | | 0x3fa55555558489dc, |
194 | | ]; |
195 | 0 | let z = dz.to_f64(); |
196 | 0 | let mut q = f_fmla(f64::from_bits(Q_1[4]), dz.hi, f64::from_bits(Q_1[3])); |
197 | 0 | q = f_fmla(q, z, f64::from_bits(Q_1[2])); |
198 | | |
199 | 0 | let mut p0 = DoubleDouble::from_exact_add(f64::from_bits(Q_1[1]), q * z); |
200 | 0 | p0 = DoubleDouble::quick_mult(dz, p0); |
201 | 0 | p0 = DoubleDouble::f64_add(f64::from_bits(Q_1[0]), p0); |
202 | 0 | p0 |
203 | 0 | } |
204 | | |
205 | | #[inline] |
206 | 0 | fn exp1(x: DoubleDouble) -> DoubleDouble { |
207 | | const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe); /* |INVLOG2-2^12/log(2)| < 2^-43.4 */ |
208 | 0 | let k = (x.hi * INVLOG2).cpu_round_ties_even(); |
209 | | |
210 | | const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef); |
211 | | const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f); |
212 | | const LOG2DD: DoubleDouble = DoubleDouble::new(LOG2L, LOG2H); |
213 | 0 | let zk = DoubleDouble::quick_mult_f64(LOG2DD, k); |
214 | | |
215 | 0 | let mut yz = DoubleDouble::from_exact_add(x.hi - zk.hi, x.lo); |
216 | 0 | yz.lo -= zk.lo; |
217 | | |
218 | 0 | let ik: i64 = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */ |
219 | 0 | let im: i64 = (ik >> 12).wrapping_add(0x3ff); |
220 | 0 | let i2: i64 = (ik >> 6) & 0x3f; |
221 | 0 | let i1: i64 = ik & 0x3f; |
222 | | |
223 | 0 | let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]); |
224 | 0 | let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]); |
225 | | |
226 | 0 | let p0 = DoubleDouble::quick_mult(t2, t1); |
227 | | |
228 | 0 | let mut q = q_1(yz); |
229 | 0 | q = DoubleDouble::quick_mult(p0, q); |
230 | | |
231 | | /* Scale by 2^k. Warning: for x near 1024, we can have k=2^22, thus |
232 | | M = 2047, which encodes Inf */ |
233 | 0 | let mut du = (im as u64).wrapping_shl(52); |
234 | 0 | if im == 0x7ff { |
235 | 0 | q.hi *= 2.0; |
236 | 0 | q.lo *= 2.0; |
237 | 0 | du = (im.wrapping_sub(1) as u64).wrapping_shl(52); |
238 | 0 | } |
239 | 0 | q.hi *= f64::from_bits(du); |
240 | 0 | q.lo *= f64::from_bits(du); |
241 | 0 | q |
242 | 0 | } |
243 | | |
244 | | #[inline] |
245 | 0 | fn exp2m1_fast(x: f64, tiny: bool) -> Exp2m1 { |
246 | 0 | if tiny { |
247 | 0 | return exp2m1_fast_tiny(x); |
248 | 0 | } |
249 | | /* now -54 < x < -0.125 or 0.125 < x < 1024: we approximate exp(x*log(2)) |
250 | | and subtract 1 */ |
251 | 0 | let mut v = DoubleDouble::from_exact_mult(LN2H, x); |
252 | 0 | v.lo = f_fmla(x, LN2L, v.lo); |
253 | | /* |
254 | | The a_mul() call is exact, and the error of the fma() is bounded by |
255 | | ulp(l). |
256 | | We have |t| <= ulp(h) <= ulp(LN2H*1024) = 2^-43, |
257 | | |t+x*LN2L| <= 2^-43 * 1024*LN2L < 2^-42.7, |
258 | | thus |l| <= |t| + |x*LN2L| + ulp(t+x*LN2L) |
259 | | <= 2^-42.7 + 2^-95 <= 2^-42.6, and ulp(l) <= 2^-95. |
260 | | Thus: |
261 | | |h + l - x*log(2)| <= |h + l - x*(LN2H+LN2L)| + |x|*|LN2H+LN2L-log(2)| |
262 | | <= 2^-95 + 1024*2^-110.4 < 2^-94.9 */ |
263 | | |
264 | 0 | let mut p = exp1(v); |
265 | | |
266 | 0 | let zf: DoubleDouble = if x >= 0. { |
267 | | // implies h >= 1 and the fast_two_sum pre-condition holds |
268 | 0 | DoubleDouble::from_exact_add(p.hi, -1.0) |
269 | | } else { |
270 | 0 | DoubleDouble::from_exact_add(-1.0, p.hi) |
271 | | }; |
272 | 0 | p.lo += zf.lo; |
273 | 0 | p.hi = zf.hi; |
274 | | /* The error in the above fast_two_sum is bounded by 2^-105*|h|, |
275 | | with the new value of h, thus the total absolute error is bounded |
276 | | by eps1*|h_in|+2^-105*|h|. |
277 | | Relatively to h this yields eps1*|h_in/h| + 2^-105, where the maximum |
278 | | of |h_in/h| is obtained for x near -0.125, with |2^x/(2^x-1)| < 11.05. |
279 | | We get a relative error bound of 2^-74.138*11.05 + 2^-105 < 2^-70.67. */ |
280 | 0 | Exp2m1 { |
281 | 0 | exp: p, |
282 | 0 | err: f64::from_bits(0x3b84200000000000) * p.hi, /* 2^-70.67 < 0x1.42p-71 */ |
283 | 0 | } |
284 | 0 | } |
285 | | |
286 | | // Approximation for the accurate path of exp(z) for z=zh+zl, |
287 | | // with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6 |
288 | | // (assuming x^y does not overflow or underflow) |
289 | | #[inline] |
290 | 0 | fn q_2(dz: DoubleDouble) -> DoubleDouble { |
291 | | /* Let q[0]..q[7] be the coefficients of degree 0..7 of Q_2. |
292 | | The ulp of q[7]*z^7 is at most 2^-155, thus we can compute q[7]*z^7 |
293 | | in double precision only. |
294 | | The ulp of q[6]*z^6 is at most 2^-139, thus we can compute q[6]*z^6 |
295 | | in double precision only. |
296 | | The ulp of q[5]*z^5 is at most 2^-124, thus we can compute q[5]*z^5 |
297 | | in double precision only. */ |
298 | | |
299 | | /* The following is a degree-7 polynomial generated by Sollya for exp(z) |
300 | | over [-0.000130273,0.000130273] with absolute error < 2^-113.218 |
301 | | (see file exp_accurate.sollya). Since we use this code only for |
302 | | |x| > 0.125 in exp2m1(x), the corresponding relative error for exp2m1 |
303 | | is about 2^-113.218/|exp2m1(-0.125)| which is about 2^-110. */ |
304 | | const Q_2: [u64; 9] = [ |
305 | | 0x3ff0000000000000, |
306 | | 0x3ff0000000000000, |
307 | | 0x3fe0000000000000, |
308 | | 0x3fc5555555555555, |
309 | | 0x3c655555555c4d26, |
310 | | 0x3fa5555555555555, |
311 | | 0x3f81111111111111, |
312 | | 0x3f56c16c3fbb4213, |
313 | | 0x3f2a01a023ede0d7, |
314 | | ]; |
315 | | |
316 | 0 | let z = dz.to_f64(); |
317 | 0 | let mut q = dd_fmla(f64::from_bits(Q_2[8]), dz.hi, f64::from_bits(Q_2[7])); |
318 | 0 | q = dd_fmla(q, z, f64::from_bits(Q_2[6])); |
319 | 0 | q = dd_fmla(q, z, f64::from_bits(Q_2[5])); |
320 | | |
321 | | // multiply q by z and add Q_2[3] + Q_2[4] |
322 | | |
323 | 0 | let mut p = DoubleDouble::from_exact_mult(q, z); |
324 | 0 | let r0 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[3]), p.hi); |
325 | 0 | p.hi = r0.hi; |
326 | 0 | p.lo += r0.lo + f64::from_bits(Q_2[4]); |
327 | | |
328 | | // multiply hi+lo by zh+zl and add Q_2[2] |
329 | | |
330 | 0 | p = DoubleDouble::quick_mult(p, dz); |
331 | 0 | let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[2]), p.hi); |
332 | 0 | p.hi = r1.hi; |
333 | 0 | p.lo += r1.lo; |
334 | | |
335 | | // multiply hi+lo by zh+zl and add Q_2[1] |
336 | 0 | p = DoubleDouble::quick_mult(p, dz); |
337 | 0 | let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[1]), p.hi); |
338 | 0 | p.hi = r1.hi; |
339 | 0 | p.lo += r1.lo; |
340 | | |
341 | | // multiply hi+lo by zh+zl and add Q_2[0] |
342 | 0 | p = DoubleDouble::quick_mult(p, dz); |
343 | 0 | let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[0]), p.hi); |
344 | 0 | p.hi = r1.hi; |
345 | 0 | p.lo += r1.lo; |
346 | 0 | p |
347 | 0 | } |
348 | | |
349 | | // returns a double-double approximation hi+lo of exp(x*log(2)) for |x| < 745 |
350 | | #[inline] |
351 | 0 | fn exp_2(x: f64) -> DoubleDouble { |
352 | 0 | let k = (x * f64::from_bits(0x40b0000000000000)).cpu_round_ties_even(); |
353 | | // since |x| <= 745 we have k <= 3051520 |
354 | | |
355 | 0 | let yhh = f_fmla(-k, f64::from_bits(0x3f30000000000000), x); // exact, |yh| <= 2^-13 |
356 | | |
357 | | /* now x = k + yh, thus 2^x = 2^k * 2^yh, and we multiply yh by log(2) |
358 | | to use the accurate path of exp() */ |
359 | 0 | let ky = DoubleDouble::quick_f64_mult(yhh, DoubleDouble::new(LN2L, LN2H)); |
360 | | |
361 | 0 | let ik: i64 = unsafe { |
362 | 0 | k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion |
363 | | }; |
364 | 0 | let im = (ik >> 12).wrapping_add(0x3ff); |
365 | 0 | let i2 = (ik >> 6) & 0x3f; |
366 | 0 | let i1 = ik & 0x3f; |
367 | | |
368 | 0 | let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]); |
369 | 0 | let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]); |
370 | | |
371 | 0 | let p = DoubleDouble::quick_mult(t2, t1); |
372 | | |
373 | 0 | let mut q = q_2(ky); |
374 | 0 | q = DoubleDouble::quick_mult(p, q); |
375 | 0 | let mut ud: u64 = (im as u64).wrapping_shl(52); |
376 | | |
377 | 0 | if im == 0x7ff { |
378 | 0 | q.hi *= 2.0; |
379 | 0 | q.lo *= 2.0; |
380 | 0 | ud = (im.wrapping_sub(1) as u64).wrapping_shl(52); |
381 | 0 | } |
382 | 0 | q.hi *= f64::from_bits(ud); |
383 | 0 | q.lo *= f64::from_bits(ud); |
384 | 0 | q |
385 | 0 | } |
386 | | |
387 | | #[cold] |
388 | 0 | pub(crate) fn exp2m1_accurate_tiny(x: f64) -> f64 { |
389 | 0 | let x2 = x * x; |
390 | 0 | let x4 = x2 * x2; |
391 | | const Q: [u64; 22] = [ |
392 | | 0x3fe62e42fefa39ef, |
393 | | 0x3c7abc9e3b398040, |
394 | | 0x3fcebfbdff82c58f, |
395 | | 0xbc65e43a53e44dcf, |
396 | | 0x3fac6b08d704a0c0, |
397 | | 0xbc4d331627517168, |
398 | | 0x3f83b2ab6fba4e77, |
399 | | 0x3c14e65df0779f8c, |
400 | | 0x3f55d87fe78a6731, |
401 | | 0x3bd0717fbf4bd050, |
402 | | 0x3f2430912f86c787, |
403 | | 0x3bcbd2bdec9bcd42, |
404 | | 0x3eeffcbfc588b0c7, |
405 | | 0xbb8e60aa6d5e4aa9, |
406 | | 0x3eb62c0223a5c824, |
407 | | 0x3e7b5253d395e7d4, |
408 | | 0x3e3e4cf5158b9160, |
409 | | 0x3dfe8cac734c6058, |
410 | | 0x3dbc3bd64f17199d, |
411 | | 0x3d78161a17e05651, |
412 | | 0x3d33150b3d792231, |
413 | | 0x3cec184260bfad7e, |
414 | | ]; |
415 | 0 | let mut c13 = dd_fmla(f64::from_bits(Q[20]), x, f64::from_bits(Q[19])); // degree 13 |
416 | 0 | let c11 = dd_fmla(f64::from_bits(Q[18]), x, f64::from_bits(Q[17])); // degree 11 |
417 | 0 | c13 = dd_fmla(f64::from_bits(Q[21]), x2, c13); // degree 13 |
418 | | // add Q[16]*x+c11*x2+c13*x4 to Q[15] (degree 9) |
419 | 0 | let mut p = DoubleDouble::from_exact_add( |
420 | 0 | f64::from_bits(Q[15]), |
421 | 0 | f_fmla(f64::from_bits(Q[16]), x, f_fmla(c11, x2, c13 * x4)), |
422 | | ); |
423 | | // multiply h+l by x and add Q[14] (degree 8) |
424 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
425 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[14]), p.hi); |
426 | 0 | p.lo += p0.lo; |
427 | 0 | p.hi = p0.hi; |
428 | | |
429 | | // multiply h+l by x and add Q[12]+Q[13] (degree 7) |
430 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
431 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[12]), p.hi); |
432 | 0 | p.lo += p0.lo + f64::from_bits(Q[13]); |
433 | 0 | p.hi = p0.hi; |
434 | | // multiply h+l by x and add Q[10]+Q[11] (degree 6) |
435 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
436 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[10]), p.hi); |
437 | 0 | p.lo += p0.lo + f64::from_bits(Q[11]); |
438 | 0 | p.hi = p0.hi; |
439 | | // multiply h+l by x and add Q[8]+Q[9] (degree 5) |
440 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
441 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[8]), p.hi); |
442 | 0 | p.lo += p0.lo + f64::from_bits(Q[9]); |
443 | 0 | p.hi = p0.hi; |
444 | | // multiply h+l by x and add Q[6]+Q[7] (degree 4) |
445 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
446 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[6]), p.hi); |
447 | 0 | p.lo += p0.lo + f64::from_bits(Q[7]); |
448 | 0 | p.hi = p0.hi; |
449 | | // multiply h+l by x and add Q[4]+Q[5] (degree 3) |
450 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
451 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[4]), p.hi); |
452 | 0 | p.lo += p0.lo + f64::from_bits(Q[5]); |
453 | 0 | p.hi = p0.hi; |
454 | | // multiply h+l by x and add Q[2]+Q[3] (degree 2) |
455 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
456 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[2]), p.hi); |
457 | 0 | p.lo += p0.lo + f64::from_bits(Q[3]); |
458 | 0 | p.hi = p0.hi; |
459 | | // multiply h+l by x and add Q[0]+Q[1] (degree 2) |
460 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
461 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[0]), p.hi); |
462 | 0 | p.lo += p0.lo + f64::from_bits(Q[1]); |
463 | 0 | p.hi = p0.hi; |
464 | | // multiply h+l by x |
465 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
466 | 0 | p.to_f64() |
467 | 0 | } |
468 | | |
469 | | #[cold] |
470 | 0 | fn exp2m1_accurate(x: f64) -> f64 { |
471 | 0 | let t = x.to_bits(); |
472 | 0 | let ux = t; |
473 | 0 | let ax = ux & 0x7fffffffffffffffu64; |
474 | | |
475 | 0 | if ax <= 0x3fc0000000000000u64 { |
476 | | // |x| <= 0.125 |
477 | 0 | return exp2m1_accurate_tiny(x); |
478 | 0 | } |
479 | | |
480 | 0 | let mut p = exp_2(x); |
481 | | |
482 | 0 | let zf: DoubleDouble = DoubleDouble::from_full_exact_add(p.hi, -1.0); |
483 | 0 | p.lo += zf.lo; |
484 | 0 | p.hi = zf.hi; |
485 | 0 | p.to_f64() |
486 | 0 | } |
487 | | |
488 | | /* |x| <= 0.125, put in h + l a double-double approximation of exp2m1(x), |
489 | | and return the maximal corresponding absolute error. |
490 | | We also have |x| > 0x1.0527dbd87e24dp-51. |
491 | | With xmin=RR("0x1.0527dbd87e24dp-51",16), the routine |
492 | | exp2m1_fast_tiny_all(xmin,0.125,2^-65.73) in exp2m1.sage returns |
493 | | 1.63414352331297e-20 < 2^-65.73, and |
494 | | exp2m1_fast_tiny_all(-0.125,-xmin,2^-65.62) returns |
495 | | 1.76283772822891e-20 < 2^-65.62, which proves the relative |
496 | | error is bounded by 2^-65.62. */ |
497 | | #[inline] |
498 | 0 | fn exp2m1_fast_tiny(x: f64) -> Exp2m1 { |
499 | | /* The maximal value of |c4*x^4/exp2m1(x)| over [-0.125,0.125] |
500 | | is less than 2^-15.109, where c4 is the degree-4 coefficient, |
501 | | thus we can compute the coefficients of degree 4 or higher |
502 | | using double precision only. */ |
503 | | const P: [u64; 12] = [ |
504 | | 0x3fe62e42fefa39ef, |
505 | | 0x3c7abd1697afcaf8, |
506 | | 0x3fcebfbdff82c58f, |
507 | | 0xbc65e5a1d09e1599, |
508 | | 0x3fac6b08d704a0bf, |
509 | | 0x3f83b2ab6fba4e78, |
510 | | 0x3f55d87fe78a84e6, |
511 | | 0x3f2430912f86a480, |
512 | | 0x3eeffcbfbc1f2b36, |
513 | | 0x3eb62c0226c7f6d1, |
514 | | 0x3e7b539529819e63, |
515 | | 0x3e3e4d552bed5b9c, |
516 | | ]; |
517 | 0 | let x2 = x * x; |
518 | 0 | let x4 = x2 * x2; |
519 | 0 | let mut c8 = dd_fmla(f64::from_bits(P[10]), x, f64::from_bits(P[9])); // degree 8 |
520 | 0 | let c6 = dd_fmla(f64::from_bits(P[8]), x, f64::from_bits(P[7])); // degree 6 |
521 | 0 | let mut c4 = dd_fmla(f64::from_bits(P[6]), x, f64::from_bits(P[5])); // degree 4 |
522 | 0 | c8 = dd_fmla(f64::from_bits(P[11]), x2, c8); // degree 8 |
523 | 0 | c4 = dd_fmla(c6, x2, c4); // degree 4 |
524 | 0 | c4 = dd_fmla(c8, x4, c4); // degree 4 |
525 | | |
526 | 0 | let mut p = DoubleDouble::from_exact_mult(c4, x); |
527 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(P[4]), p.hi); |
528 | 0 | p.lo += p0.lo; |
529 | 0 | p.hi = p0.hi; |
530 | | |
531 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
532 | | |
533 | 0 | let p1 = DoubleDouble::from_exact_add(f64::from_bits(P[2]), p.hi); |
534 | 0 | p.lo += p1.lo + f64::from_bits(P[3]); |
535 | 0 | p.hi = p1.hi; |
536 | | |
537 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
538 | 0 | let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[0]), p.hi); |
539 | 0 | p.lo += p2.lo + f64::from_bits(P[1]); |
540 | 0 | p.hi = p2.hi; |
541 | | |
542 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
543 | | |
544 | 0 | Exp2m1 { |
545 | 0 | exp: p, |
546 | 0 | err: f64::from_bits(0x3bd4e00000000000) * p.hi, // 2^-65.62 < 0x1.4ep-66 |
547 | 0 | } |
548 | 0 | } |
549 | | |
550 | | /// Computes 2^x - 1 |
551 | | /// |
552 | | /// Max found ULP 0.5 |
553 | 0 | pub fn f_exp2m1(d: f64) -> f64 { |
554 | 0 | let mut x = d; |
555 | 0 | let t = x.to_bits(); |
556 | 0 | let ux = t; |
557 | 0 | let ax = ux & 0x7fffffffffffffffu64; |
558 | | |
559 | 0 | if ux >= 0xc04b000000000000u64 { |
560 | | // x = -NaN or x <= -54 |
561 | 0 | if (ux >> 52) == 0xfff { |
562 | | // -NaN or -Inf |
563 | 0 | return if ux > 0xfff0000000000000u64 { |
564 | 0 | x + x |
565 | | } else { |
566 | 0 | -1.0 |
567 | | }; |
568 | 0 | } |
569 | | // for x <= -54, exp2m1(x) rounds to -1 to nearest |
570 | 0 | return -1.0 + f64::from_bits(0x3c90000000000000); |
571 | 0 | } else if ax >= 0x4090000000000000u64 { |
572 | | // x = +NaN or x >= 1024 |
573 | 0 | if (ux >> 52) == 0x7ff { |
574 | | // +NaN |
575 | 0 | return x + x; |
576 | 0 | } |
577 | | /* for x >= 1024, exp2m1(x) rounds to +Inf to nearest, |
578 | | but for RNDZ/RNDD, we should have no overflow for x=1024 */ |
579 | 0 | return f_fmla( |
580 | 0 | x, |
581 | 0 | f64::from_bits(0x7bffffffffffffff), |
582 | 0 | f64::from_bits(0x7fefffffffffffff), |
583 | | ); |
584 | 0 | } else if ax <= 0x3cc0527dbd87e24du64 |
585 | | // |x| <= 0x1.0527dbd87e24dp-51 |
586 | | /* then the second term of the Taylor expansion of 2^x-1 at x=0 is |
587 | | smaller in absolute value than 1/2 ulp(first term): |
588 | | log(2)*x + log(2)^2*x^2/2 + ... */ |
589 | | { |
590 | | /* we use special code when log(2)*|x| is very small, in which case |
591 | | the double-double approximation h+l has its lower part l |
592 | | "truncated" */ |
593 | 0 | return if ax <= 0x3970000000000000u64 |
594 | | // |x| <= 2^-104 |
595 | | { |
596 | | // special case for 0 |
597 | 0 | if x == 0. { |
598 | 0 | return x; |
599 | 0 | } |
600 | | // scale x by 2^106 |
601 | 0 | x *= f64::from_bits(0x4690000000000000); |
602 | 0 | let z = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN2L, LN2H), x); |
603 | 0 | let mut h2 = z.to_f64(); // round to 53-bit precision |
604 | | // scale back, hoping to avoid double rounding |
605 | 0 | h2 *= f64::from_bits(0x3950000000000000); |
606 | | // now subtract back h2 * 2^106 from h to get the correction term |
607 | 0 | let mut h = dd_fmla(-h2, f64::from_bits(0x4690000000000000), z.hi); |
608 | | // add l |
609 | 0 | h += z.lo; |
610 | | /* add h2 + h * 2^-106. Warning: when h=0, 2^-106*h2 might be exact, |
611 | | thus no underflow will be raised. We have underflow for |
612 | | 0 < x <= 0x1.71547652b82fep-1022 for RNDZ, and for |
613 | | 0 < x <= 0x1.71547652b82fdp-1022 for RNDN/RNDU. */ |
614 | 0 | dyad_fmla(h, f64::from_bits(0x3950000000000000), h2) |
615 | | } else { |
616 | | const C2: f64 = f64::from_bits(0x3fcebfbdff82c58f); // log(2)^2/2 |
617 | 0 | let mut z = DoubleDouble::from_exact_mult(LN2H, x); |
618 | 0 | z.lo = dyad_fmla(LN2L, x, z.lo); |
619 | | /* h+l approximates the first term x*log(2) */ |
620 | | /* we add C2*x^2 last, so that in case there is a cancellation in |
621 | | LN2L*x+l, it will contribute more bits */ |
622 | 0 | z.lo += C2 * x * x; |
623 | 0 | z.to_f64() |
624 | | }; |
625 | 0 | } |
626 | | |
627 | | /* now -54 < x < -0x1.0527dbd87e24dp-51 |
628 | | or 0x1.0527dbd87e24dp-51 < x < 1024 */ |
629 | | |
630 | | /* 2^x-1 is exact for x integer, -53 <= x <= 53 */ |
631 | 0 | if ux.wrapping_shl(17) == 0 { |
632 | 0 | let i = unsafe { x.cpu_floor().to_int_unchecked::<i32>() }; |
633 | 0 | if x == i as f64 && -53 <= i && i <= 53 { |
634 | 0 | return if i >= 0 { |
635 | 0 | ((1u64 << i) - 1) as f64 |
636 | | } else { |
637 | 0 | -1.0 + fast_ldexp(1.0, i) |
638 | | }; |
639 | 0 | } |
640 | 0 | } |
641 | | |
642 | 0 | let result = exp2m1_fast(x, ax <= 0x3fc0000000000000u64); |
643 | 0 | let left = result.exp.hi + (result.exp.lo - result.err); |
644 | 0 | let right = result.exp.hi + (result.exp.lo + result.err); |
645 | 0 | if left != right { |
646 | 0 | return exp2m1_accurate(x); |
647 | 0 | } |
648 | 0 | left |
649 | 0 | } |
650 | | |
651 | | #[cfg(test)] |
652 | | mod tests { |
653 | | use super::*; |
654 | | |
655 | | #[test] |
656 | | fn test_exp2m1() { |
657 | | assert_eq!(f_exp2m1(5.4172231599824623E-312), 3.75493295981e-312); |
658 | | assert_eq!(f_exp2m1( 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000017800593653177087), 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012338431302992956); |
659 | | assert_eq!(3., f_exp2m1(2.0)); |
660 | | assert_eq!(4.656854249492381, f_exp2m1(2.5)); |
661 | | assert_eq!(-0.30801352040368324, f_exp2m1(-0.5311842449009418)); |
662 | | } |
663 | | } |