/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/exponents/exp10m1.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::{dd_fmla, dyad_fmla, f_fmla}; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::exponents::exp2m1::{EXP_M1_2_TABLE1, EXP_M1_2_TABLE2}; |
32 | | use crate::rounding::CpuFloor; |
33 | | use crate::rounding::CpuRoundTiesEven; |
34 | | |
35 | | const LN10H: f64 = f64::from_bits(0x40026bb1bbb55516); |
36 | | const LN10L: f64 = f64::from_bits(0xbcaf48ad494ea3e9); |
37 | | |
38 | | struct Exp10m1 { |
39 | | exp: DoubleDouble, |
40 | | err: f64, |
41 | | } |
42 | | |
43 | | // Approximation for the fast path of exp(z) for z=zh+zl, |
44 | | // with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6 |
45 | | // (assuming x^y does not overflow or underflow) |
46 | | #[inline] |
47 | 0 | fn q_1(dz: DoubleDouble) -> DoubleDouble { |
48 | | const Q_1: [u64; 5] = [ |
49 | | 0x3ff0000000000000, |
50 | | 0x3ff0000000000000, |
51 | | 0x3fe0000000000000, |
52 | | 0x3fc5555555995d37, |
53 | | 0x3fa55555558489dc, |
54 | | ]; |
55 | 0 | let z = dz.to_f64(); |
56 | 0 | let mut q = f_fmla(f64::from_bits(Q_1[4]), dz.hi, f64::from_bits(Q_1[3])); |
57 | 0 | q = f_fmla(q, z, f64::from_bits(Q_1[2])); |
58 | | |
59 | 0 | let mut p0 = DoubleDouble::from_exact_add(f64::from_bits(Q_1[1]), q * z); |
60 | 0 | p0 = DoubleDouble::quick_mult(dz, p0); |
61 | 0 | p0 = DoubleDouble::f64_add(f64::from_bits(Q_1[0]), p0); |
62 | 0 | p0 |
63 | 0 | } |
64 | | |
65 | | #[inline] |
66 | 0 | fn exp1(x: DoubleDouble) -> DoubleDouble { |
67 | | const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe); /* |INVLOG2-2^12/log(2)| < 2^-43.4 */ |
68 | 0 | let k = (x.hi * INVLOG2).cpu_round_ties_even(); |
69 | | |
70 | | const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef); |
71 | | const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f); |
72 | 0 | let mut zk = DoubleDouble::from_exact_mult(LOG2H, k); |
73 | 0 | zk.lo = f_fmla(k, LOG2L, zk.lo); |
74 | | |
75 | 0 | let mut yz = DoubleDouble::from_exact_add(x.hi - zk.hi, x.lo); |
76 | 0 | yz.lo -= zk.lo; |
77 | | |
78 | 0 | let ik: i64 = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */ |
79 | 0 | let im: i64 = (ik >> 12).wrapping_add(0x3ff); |
80 | 0 | let i2: i64 = (ik >> 6) & 0x3f; |
81 | 0 | let i1: i64 = ik & 0x3f; |
82 | | |
83 | 0 | let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]); |
84 | 0 | let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]); |
85 | | |
86 | 0 | let p0 = DoubleDouble::quick_mult(t2, t1); |
87 | | |
88 | 0 | let mut q = q_1(yz); |
89 | 0 | q = DoubleDouble::quick_mult(p0, q); |
90 | | |
91 | | /* Scale by 2^k. Warning: for x near 1024, we can have k=2^22, thus |
92 | | M = 2047, which encodes Inf */ |
93 | 0 | let mut du = (im as u64).wrapping_shl(52); |
94 | 0 | if im == 0x7ff { |
95 | 0 | q.hi *= 2.0; |
96 | 0 | q.lo *= 2.0; |
97 | 0 | du = (im.wrapping_sub(1) as u64).wrapping_shl(52); |
98 | 0 | } |
99 | 0 | q.hi *= f64::from_bits(du); |
100 | 0 | q.lo *= f64::from_bits(du); |
101 | 0 | q |
102 | 0 | } |
103 | | |
104 | | #[inline] |
105 | 0 | fn exp10m1_fast(x: f64, tiny: bool) -> Exp10m1 { |
106 | 0 | if tiny { |
107 | 0 | return exp10m1_fast_tiny(x); |
108 | 0 | } |
109 | | /* now -54 < x < -0.125 or 0.125 < x < 1024: we approximate exp(x*log(2)) |
110 | | and subtract 1 */ |
111 | 0 | let v = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN10L, LN10H), x); |
112 | | /* |
113 | | The a_mul() call is exact, and the error of the fma() is bounded by |
114 | | ulp(l). |
115 | | We have |t| <= ulp(h) <= ulp(LN2H*1024) = 2^-43, |
116 | | |t+x*LN2L| <= 2^-43 * 1024*LN2L < 2^-42.7, |
117 | | thus |l| <= |t| + |x*LN2L| + ulp(t+x*LN2L) |
118 | | <= 2^-42.7 + 2^-95 <= 2^-42.6, and ulp(l) <= 2^-95. |
119 | | Thus: |
120 | | |h + l - x*log(2)| <= |h + l - x*(LN2H+LN2L)| + |x|*|LN2H+LN2L-log(2)| |
121 | | <= 2^-95 + 1024*2^-110.4 < 2^-94.9 */ |
122 | | |
123 | 0 | let mut p = exp1(v); |
124 | | |
125 | 0 | let zf: DoubleDouble = if x >= 0. { |
126 | | // implies h >= 1 and the fast_two_sum pre-condition holds |
127 | 0 | DoubleDouble::from_exact_add(p.hi, -1.0) |
128 | | } else { |
129 | 0 | DoubleDouble::from_exact_add(-1.0, p.hi) |
130 | | }; |
131 | 0 | p.lo += zf.lo; |
132 | 0 | p.hi = zf.hi; |
133 | | /* The error in the above fast_two_sum is bounded by 2^-105*|h|, |
134 | | with the new value of h, thus the total absolute error is bounded |
135 | | by eps1*|h_in|+2^-105*|h|. |
136 | | Relatively to h this yields eps1*|h_in/h| + 2^-105, where the maximum |
137 | | of |h_in/h| is obtained for x near -0.125, with |2^x/(2^x-1)| < 11.05. |
138 | | We get a relative error bound of 2^-74.138*11.05 + 2^-105 < 2^-70.67. */ |
139 | 0 | Exp10m1 { |
140 | 0 | exp: p, |
141 | 0 | err: f64::from_bits(0x3b77a00000000000) * p.hi, /* 2^-70.67 < 0x1.42p-71 */ |
142 | 0 | } |
143 | 0 | } |
144 | | |
145 | | // Approximation for the accurate path of exp(z) for z=zh+zl, |
146 | | // with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6 |
147 | | // (assuming x^y does not overflow or underflow) |
148 | | #[inline] |
149 | 0 | fn q_2(dz: DoubleDouble) -> DoubleDouble { |
150 | | /* Let q[0]..q[7] be the coefficients of degree 0..7 of Q_2. |
151 | | The ulp of q[7]*z^7 is at most 2^-155, thus we can compute q[7]*z^7 |
152 | | in double precision only. |
153 | | The ulp of q[6]*z^6 is at most 2^-139, thus we can compute q[6]*z^6 |
154 | | in double precision only. |
155 | | The ulp of q[5]*z^5 is at most 2^-124, thus we can compute q[5]*z^5 |
156 | | in double precision only. */ |
157 | | |
158 | | /* The following is a degree-7 polynomial generated by Sollya for exp(z) |
159 | | over [-0.000130273,0.000130273] with absolute error < 2^-113.218 |
160 | | (see file exp_accurate.sollya). Since we use this code only for |
161 | | |x| > 0.125 in exp2m1(x), the corresponding relative error for exp2m1 |
162 | | is about 2^-113.218/|exp2m1(-0.125)| which is about 2^-110. */ |
163 | | const Q_2: [u64; 9] = [ |
164 | | 0x3ff0000000000000, |
165 | | 0x3ff0000000000000, |
166 | | 0x3fe0000000000000, |
167 | | 0x3fc5555555555555, |
168 | | 0x3c655555555c4d26, |
169 | | 0x3fa5555555555555, |
170 | | 0x3f81111111111111, |
171 | | 0x3f56c16c3fbb4213, |
172 | | 0x3f2a01a023ede0d7, |
173 | | ]; |
174 | | |
175 | 0 | let z = dz.to_f64(); |
176 | 0 | let mut q = dd_fmla(f64::from_bits(Q_2[8]), dz.hi, f64::from_bits(Q_2[7])); |
177 | 0 | q = dd_fmla(q, z, f64::from_bits(Q_2[6])); |
178 | 0 | q = dd_fmla(q, z, f64::from_bits(Q_2[5])); |
179 | | |
180 | | // multiply q by z and add Q_2[3] + Q_2[4] |
181 | | |
182 | 0 | let mut p = DoubleDouble::from_exact_mult(q, z); |
183 | 0 | let r0 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[3]), p.hi); |
184 | 0 | p.hi = r0.hi; |
185 | 0 | p.lo += r0.lo + f64::from_bits(Q_2[4]); |
186 | | |
187 | | // multiply hi+lo by zh+zl and add Q_2[2] |
188 | | |
189 | 0 | p = DoubleDouble::quick_mult(p, dz); |
190 | 0 | let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[2]), p.hi); |
191 | 0 | p.hi = r1.hi; |
192 | 0 | p.lo += r1.lo; |
193 | | |
194 | | // multiply hi+lo by zh+zl and add Q_2[1] |
195 | 0 | p = DoubleDouble::quick_mult(p, dz); |
196 | 0 | let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[1]), p.hi); |
197 | 0 | p.hi = r1.hi; |
198 | 0 | p.lo += r1.lo; |
199 | | |
200 | | // multiply hi+lo by zh+zl and add Q_2[0] |
201 | 0 | p = DoubleDouble::quick_mult(p, dz); |
202 | 0 | let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[0]), p.hi); |
203 | 0 | p.hi = r1.hi; |
204 | 0 | p.lo += r1.lo; |
205 | 0 | p |
206 | 0 | } |
207 | | |
208 | | // returns a double-double approximation hi+lo of exp(x*log(10)) |
209 | | // assumes -0x1.041704c068efp+4 < x <= 0x1.34413509f79fep+8 |
210 | | #[inline] |
211 | 0 | fn exp_2(x: f64) -> DoubleDouble { |
212 | 0 | let mut k = (x * f64::from_bits(0x40ca934f0979a371)).cpu_round_ties_even(); |
213 | 0 | if k == 4194304. { |
214 | 0 | k = 4194303.; // ensures M < 2047 below |
215 | 0 | } |
216 | | // since |x| <= 745 we have k <= 3051520 |
217 | | |
218 | | const LOG2_10H: f64 = f64::from_bits(0x3f134413509f79ff); |
219 | | const LOG2_10M: f64 = f64::from_bits(0xbb89dc1da9800000); |
220 | | const LOG2_10L: f64 = f64::from_bits(0xb984fd20dba1f655); |
221 | | |
222 | 0 | let yhh = dd_fmla(-k, LOG2_10H, x); // exact, |yh| <= 2^-13 |
223 | | |
224 | 0 | let mut ky0 = DoubleDouble::from_exact_add(yhh, -k * LOG2_10M); |
225 | 0 | ky0.lo = dd_fmla(-k, LOG2_10L, ky0.lo); |
226 | | |
227 | | /* now x = k + yh, thus 2^x = 2^k * 2^yh, and we multiply yh by log(10) |
228 | | to use the accurate path of exp() */ |
229 | | |
230 | 0 | let ky = DoubleDouble::quick_mult(ky0, DoubleDouble::new(LN10L, LN10H)); |
231 | | |
232 | 0 | let ik = unsafe { |
233 | 0 | k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion |
234 | | }; |
235 | 0 | let im = (ik >> 12).wrapping_add(0x3ff); |
236 | 0 | let i2 = (ik >> 6) & 0x3f; |
237 | 0 | let i1 = ik & 0x3f; |
238 | | |
239 | 0 | let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]); |
240 | 0 | let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]); |
241 | | |
242 | 0 | let p = DoubleDouble::quick_mult(t2, t1); |
243 | | |
244 | 0 | let mut q = q_2(ky); |
245 | 0 | q = DoubleDouble::quick_mult(p, q); |
246 | 0 | let mut ud: u64 = (im as u64).wrapping_shl(52); |
247 | | |
248 | 0 | if im == 0x7ff { |
249 | 0 | q.hi *= 2.0; |
250 | 0 | q.lo *= 2.0; |
251 | 0 | ud = (im.wrapping_sub(1) as u64).wrapping_shl(52); |
252 | 0 | } |
253 | 0 | q.hi *= f64::from_bits(ud); |
254 | 0 | q.lo *= f64::from_bits(ud); |
255 | 0 | q |
256 | 0 | } |
257 | | |
258 | | #[cold] |
259 | 0 | fn exp10m1_accurate_tiny(x: f64) -> f64 { |
260 | 0 | let x2 = x * x; |
261 | 0 | let x4 = x2 * x2; |
262 | | /* The following is a degree-17 polynomial generated by Sollya |
263 | | (file exp10m1_accurate.sollya), |
264 | | which approximates exp10m1(x) with relative error bounded by 2^-107.506 |
265 | | for |x| <= 0.0625. */ |
266 | | |
267 | | const Q: [u64; 25] = [ |
268 | | 0x40026bb1bbb55516, |
269 | | 0xbcaf48ad494ea3e9, |
270 | | 0x40053524c73cea69, |
271 | | 0xbcae2bfab318d696, |
272 | | 0x4000470591de2ca4, |
273 | | 0x3ca823527cebf918, |
274 | | 0x3ff2bd7609fd98c4, |
275 | | 0x3c931ea51f6641df, |
276 | | 0x3fe1429ffd1d4d76, |
277 | | 0x3c7117195be7f232, |
278 | | 0x3fca7ed70847c8b6, |
279 | | 0xbc54260c5e23d0c8, |
280 | | 0x3fb16e4dfc333a87, |
281 | | 0xbc533fd284110905, |
282 | | 0x3f94116b05fdaa5d, |
283 | | 0xbc20721de44d79a8, |
284 | | 0x3f74897c45d93d43, |
285 | | 0x3f52ea52b2d182ac, |
286 | | 0x3f2facfd5d905b22, |
287 | | 0x3f084fe12df8bde3, |
288 | | 0x3ee1398ad75d01bf, |
289 | | 0x3eb6a9e96fbf6be7, |
290 | | 0x3e8bd456a29007c2, |
291 | | 0x3e6006cf8378cf9b, |
292 | | 0x3e368862b132b6e2, |
293 | | ]; |
294 | 0 | let mut c13 = dd_fmla(f64::from_bits(Q[23]), x, f64::from_bits(Q[22])); // degree 15 |
295 | 0 | let c11 = dd_fmla(f64::from_bits(Q[21]), x, f64::from_bits(Q[20])); // degree 14 |
296 | 0 | c13 = dd_fmla(f64::from_bits(Q[24]), x2, c13); // degree 15 |
297 | | // add Q[19]*x+c13*x2+c15*x4 to Q[18] (degree 11) |
298 | 0 | let mut p = DoubleDouble::from_exact_add( |
299 | 0 | f64::from_bits(Q[18]), |
300 | 0 | f_fmla(f64::from_bits(Q[19]), x, f_fmla(c11, x2, c13 * x4)), |
301 | | ); |
302 | | // multiply h+l by x and add Q[17] (degree 10) |
303 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
304 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[17]), p.hi); |
305 | 0 | p.lo += p0.lo; |
306 | 0 | p.hi = p0.hi; |
307 | | |
308 | | // multiply h+l by x and add Q[16] (degree 9) |
309 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
310 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[16]), p.hi); |
311 | 0 | p.lo += p0.lo; |
312 | 0 | p.hi = p0.hi; |
313 | | // multiply h+l by x and add Q[14]+Q[15] (degree 8) |
314 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
315 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[14]), p.hi); |
316 | 0 | p.lo += p0.lo + f64::from_bits(Q[15]); |
317 | 0 | p.hi = p0.hi; |
318 | | // multiply h+l by x and add Q[12]+Q[13] (degree 7) |
319 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
320 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[12]), p.hi); |
321 | 0 | p.lo += p0.lo + f64::from_bits(Q[13]); |
322 | 0 | p.hi = p0.hi; |
323 | | |
324 | | // multiply h+l by x and add Q[10]+Q[11] (degree 6) |
325 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
326 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[10]), p.hi); |
327 | 0 | p.lo += p0.lo + f64::from_bits(Q[11]); |
328 | 0 | p.hi = p0.hi; |
329 | | |
330 | | // multiply h+l by x and add Q[8]+Q[9] (degree 5) |
331 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
332 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[8]), p.hi); |
333 | 0 | p.lo += p0.lo + f64::from_bits(Q[9]); |
334 | 0 | p.hi = p0.hi; |
335 | | |
336 | | // multiply h+l by x and add Q[6]+Q[7] (degree 4) |
337 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
338 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[6]), p.hi); |
339 | 0 | p.lo += p0.lo + f64::from_bits(Q[7]); |
340 | 0 | p.hi = p0.hi; |
341 | | |
342 | | // multiply h+l by x and add Q[4]+Q[5] (degree 3) |
343 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
344 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[4]), p.hi); |
345 | 0 | p.lo += p0.lo + f64::from_bits(Q[5]); |
346 | 0 | p.hi = p0.hi; |
347 | | |
348 | | // multiply h+l by x and add Q[2]+Q[3] (degree 2) |
349 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
350 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[2]), p.hi); |
351 | 0 | p.lo += p0.lo + f64::from_bits(Q[3]); |
352 | 0 | p.hi = p0.hi; |
353 | | |
354 | | // multiply h+l by x and add Q[0]+Q[1] (degree 2) |
355 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
356 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[0]), p.hi); |
357 | 0 | p.lo += p0.lo + f64::from_bits(Q[1]); |
358 | 0 | p.hi = p0.hi; |
359 | | |
360 | | // multiply h+l by x |
361 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
362 | 0 | p.to_f64() |
363 | 0 | } |
364 | | |
365 | | #[cold] |
366 | 0 | fn exp10m1_accurate(x: f64) -> f64 { |
367 | 0 | let t = x.to_bits(); |
368 | 0 | let ux = t; |
369 | 0 | let ax = ux & 0x7fffffffffffffffu64; |
370 | | |
371 | 0 | if ax <= 0x3fc0000000000000u64 { |
372 | | // |x| <= 0.125 |
373 | 0 | return exp10m1_accurate_tiny(x); |
374 | 0 | } |
375 | | |
376 | 0 | let mut p = exp_2(x); |
377 | | |
378 | 0 | let zf: DoubleDouble = DoubleDouble::from_full_exact_add(p.hi, -1.0); |
379 | 0 | p.lo += zf.lo; |
380 | 0 | p.hi = zf.hi; |
381 | 0 | p.to_f64() |
382 | 0 | } |
383 | | |
384 | | /* |x| <= 0.125, put in h + l a double-double approximation of exp2m1(x), |
385 | | and return the maximal corresponding absolute error. |
386 | | We also have |x| > 0x1.0527dbd87e24dp-51. |
387 | | With xmin=RR("0x1.0527dbd87e24dp-51",16), the routine |
388 | | exp2m1_fast_tiny_all(xmin,0.125,2^-65.73) in exp2m1.sage returns |
389 | | 1.63414352331297e-20 < 2^-65.73, and |
390 | | exp2m1_fast_tiny_all(-0.125,-xmin,2^-65.62) returns |
391 | | 1.76283772822891e-20 < 2^-65.62, which proves the relative |
392 | | error is bounded by 2^-65.62. */ |
393 | | #[inline] |
394 | 0 | fn exp10m1_fast_tiny(x: f64) -> Exp10m1 { |
395 | | /* The following is a degree-11 polynomial generated by Sollya |
396 | | (file exp10m1_fast.sollya), |
397 | | which approximates exp10m1(x) with relative error bounded by 2^-69.58 |
398 | | for |x| <= 0.0625. */ |
399 | | const P: [u64; 14] = [ |
400 | | 0x40026bb1bbb55516, |
401 | | 0xbcaf48abcf79e094, |
402 | | 0x40053524c73cea69, |
403 | | 0xbcae1badf796d704, |
404 | | 0x4000470591de2ca4, |
405 | | 0x3ca7db8caacb2cea, |
406 | | 0x3ff2bd7609fd98ba, |
407 | | 0x3fe1429ffd1d4d98, |
408 | | 0x3fca7ed7084998e1, |
409 | | 0x3fb16e4dfc30944b, |
410 | | 0x3f94116ae4b57526, |
411 | | 0x3f74897c6a90f61c, |
412 | | 0x3f52ec689c32b3a0, |
413 | | 0x3f2faced20d698fe, |
414 | | ]; |
415 | 0 | let x2 = x * x; |
416 | 0 | let x4 = x2 * x2; |
417 | 0 | let mut c9 = dd_fmla(f64::from_bits(P[12]), x, f64::from_bits(P[11])); // degree 9 |
418 | 0 | let c7 = dd_fmla(f64::from_bits(P[10]), x, f64::from_bits(P[9])); // degree 7 |
419 | 0 | let mut c5 = dd_fmla(f64::from_bits(P[8]), x, f64::from_bits(P[7])); // degree 5 |
420 | 0 | c9 = dd_fmla(f64::from_bits(P[13]), x2, c9); // degree 9 |
421 | 0 | c5 = dd_fmla(c7, x2, c5); // degree 5 |
422 | 0 | c5 = dd_fmla(c9, x4, c5); // degree 5 |
423 | | |
424 | 0 | let mut p = DoubleDouble::from_exact_mult(c5, x); |
425 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(P[6]), p.hi); |
426 | 0 | p.lo += p0.lo; |
427 | 0 | p.hi = p0.hi; |
428 | | |
429 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
430 | | |
431 | 0 | let p1 = DoubleDouble::from_exact_add(f64::from_bits(P[4]), p.hi); |
432 | 0 | p.lo += p1.lo + f64::from_bits(P[5]); |
433 | 0 | p.hi = p1.hi; |
434 | | |
435 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
436 | | |
437 | 0 | let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[2]), p.hi); |
438 | 0 | p.lo += p2.lo + f64::from_bits(P[3]); |
439 | 0 | p.hi = p2.hi; |
440 | | |
441 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
442 | | |
443 | 0 | let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[0]), p.hi); |
444 | 0 | p.lo += p2.lo + f64::from_bits(P[1]); |
445 | 0 | p.hi = p2.hi; |
446 | | |
447 | 0 | p = DoubleDouble::quick_f64_mult(x, p); |
448 | | |
449 | 0 | Exp10m1 { |
450 | 0 | exp: p, |
451 | 0 | err: f64::from_bits(0x3bb0a00000000000) * p.hi, // 2^-65.62 < 0x1.4ep-66 |
452 | 0 | } |
453 | 0 | } |
454 | | |
455 | | /// Computes 10^x - 1 |
456 | | /// |
457 | | /// Max found ULP 0.5 |
458 | 0 | pub fn f_exp10m1(d: f64) -> f64 { |
459 | 0 | let mut x = d; |
460 | 0 | let t = x.to_bits(); |
461 | 0 | let ux = t; |
462 | 0 | let ax = ux & 0x7fffffffffffffffu64; |
463 | | |
464 | 0 | if ux >= 0xc03041704c068ef0u64 { |
465 | | // x = -NaN or x <= -0x1.041704c068efp+4 |
466 | 0 | if (ux >> 52) == 0xfff { |
467 | | // -NaN or -Inf |
468 | 0 | return if ux > 0xfff0000000000000u64 { |
469 | 0 | x + x |
470 | | } else { |
471 | 0 | -1.0 |
472 | | }; |
473 | 0 | } |
474 | | // for x <= -0x1.041704c068efp+4, exp10m1(x) rounds to -1 to nearest |
475 | 0 | return -1.0 + f64::from_bits(0x3c90000000000000); |
476 | 0 | } else if ax > 0x40734413509f79feu64 { |
477 | | // x = +NaN or x >= 1024 |
478 | 0 | if (ux >> 52) == 0x7ff { |
479 | | // +NaN |
480 | 0 | return x + x; |
481 | 0 | } |
482 | 0 | return f64::from_bits(0x7fefffffffffffff) * x; |
483 | 0 | } else if ax <= 0x3c90000000000000u64 |
484 | | // |x| <= 0x1.0527dbd87e24dp-51 |
485 | | /* then the second term of the Taylor expansion of 2^x-1 at x=0 is |
486 | | smaller in absolute value than 1/2 ulp(first term): |
487 | | log(2)*x + log(2)^2*x^2/2 + ... */ |
488 | | { |
489 | | /* we use special code when log(2)*|x| is very small, in which case |
490 | | the double-double approximation h+l has its lower part l |
491 | | "truncated" */ |
492 | 0 | return if ax <= 0x3970000000000000u64 |
493 | | // |x| <= 2^-104 |
494 | | { |
495 | | // special case for 0 |
496 | 0 | if x == 0. { |
497 | 0 | return x; |
498 | 0 | } |
499 | 0 | if x.abs() == f64::from_bits(0x000086c73059343c) { |
500 | 0 | return dd_fmla( |
501 | 0 | -f64::copysign(f64::from_bits(0x1e60010000000000), x), |
502 | 0 | f64::from_bits(0x1e50000000000000), |
503 | 0 | f64::copysign(f64::from_bits(0x000136568740cb56), x), |
504 | | ); |
505 | 0 | } |
506 | 0 | if x.abs() == f64::from_bits(0x00013a7b70d0248c) { |
507 | 0 | return dd_fmla( |
508 | 0 | f64::copysign(f64::from_bits(0x1e5ffe0000000000), x), |
509 | 0 | f64::from_bits(0x1e50000000000000), |
510 | 0 | f64::copysign(f64::from_bits(0x0002d41f3b972fc7), x), |
511 | | ); |
512 | 0 | } |
513 | | |
514 | | // scale x by 2^106 |
515 | 0 | x *= f64::from_bits(0x4690000000000000); |
516 | 0 | let mut z = DoubleDouble::from_exact_mult(LN10H, x); |
517 | 0 | z.lo = dd_fmla(LN10L, x, z.lo); |
518 | 0 | let mut h2 = z.to_f64(); // round to 53-bit precision |
519 | | // scale back, hoping to avoid double rounding |
520 | 0 | h2 *= f64::from_bits(0x3950000000000000); |
521 | | // now subtract back h2 * 2^106 from h to get the correction term |
522 | 0 | let mut h = dd_fmla(-h2, f64::from_bits(0x4690000000000000), z.hi); |
523 | | // add l |
524 | 0 | h += z.lo; |
525 | | /* add h2 + h * 2^-106. Warning: when h=0, 2^-106*h2 might be exact, |
526 | | thus no underflow will be raised. We have underflow for |
527 | | 0 < x <= 0x1.71547652b82fep-1022 for RNDZ, and for |
528 | | 0 < x <= 0x1.71547652b82fdp-1022 for RNDN/RNDU. */ |
529 | 0 | dyad_fmla(h, f64::from_bits(0x3950000000000000), h2) |
530 | | } else { |
531 | | const C2: f64 = f64::from_bits(0x40053524c73cea69); // log(2)^2/2 |
532 | 0 | let mut z = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN10L, LN10H), x); |
533 | | /* h+l approximates the first term x*log(2) */ |
534 | | /* we add C2*x^2 last, so that in case there is a cancellation in |
535 | | LN10L*x+l, it will contribute more bits */ |
536 | 0 | z.lo = dd_fmla(C2 * x, x, z.lo); |
537 | 0 | z.to_f64() |
538 | | }; |
539 | 0 | } |
540 | | |
541 | | /* now -0x1.041704c068efp+4 < x < -2^-54 |
542 | | or 2^-54 < x <= 0x1.34413509f79fep+8 */ |
543 | | |
544 | | /* 10^x-1 is exact for x integer, 1 <= x <= 15 */ |
545 | 0 | if ux << 15 == 0 { |
546 | 0 | let i = unsafe { x.cpu_floor().to_int_unchecked::<i32>() }; |
547 | 0 | if x == i as f64 && 1 <= i && i <= 15 { |
548 | | static EXP10_1_15: [u64; 16] = [ |
549 | | 0x0000000000000000, |
550 | | 0x4022000000000000, |
551 | | 0x4058c00000000000, |
552 | | 0x408f380000000000, |
553 | | 0x40c3878000000000, |
554 | | 0x40f869f000000000, |
555 | | 0x412e847e00000000, |
556 | | 0x416312cfe0000000, |
557 | | 0x4197d783fc000000, |
558 | | 0x41cdcd64ff800000, |
559 | | 0x4202a05f1ff80000, |
560 | | 0x42374876e7ff0000, |
561 | | 0x426d1a94a1ffe000, |
562 | | 0x42a2309ce53ffe00, |
563 | | 0x42d6bcc41e8fffc0, |
564 | | 0x430c6bf52633fff8, |
565 | | ]; |
566 | 0 | return f64::from_bits(EXP10_1_15[i as usize]); |
567 | 0 | } |
568 | 0 | } |
569 | | |
570 | 0 | let result = exp10m1_fast(x, ax <= 0x3fb0000000000000u64); |
571 | 0 | let left = result.exp.hi + (result.exp.lo - result.err); |
572 | 0 | let right = result.exp.hi + (result.exp.lo + result.err); |
573 | 0 | if left != right { |
574 | 0 | return exp10m1_accurate(x); |
575 | 0 | } |
576 | 0 | left |
577 | 0 | } |
578 | | |
579 | | #[cfg(test)] |
580 | | mod tests { |
581 | | use super::*; |
582 | | |
583 | | #[test] |
584 | | fn test_exp10m1() { |
585 | | assert_eq!(f_exp10m1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002364140972981833), |
586 | | 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005443635762124408); |
587 | | assert_eq!(99., f_exp10m1(2.0)); |
588 | | assert_eq!(315.22776601683796, f_exp10m1(2.5)); |
589 | | assert_eq!(-0.7056827241416722, f_exp10m1(-0.5311842449009418)); |
590 | | } |
591 | | } |