Coverage Report

Created: 2025-12-20 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/exponents/exp10m1.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::common::{dd_fmla, dyad_fmla, f_fmla};
30
use crate::double_double::DoubleDouble;
31
use crate::exponents::exp2m1::{EXP_M1_2_TABLE1, EXP_M1_2_TABLE2};
32
use crate::rounding::CpuFloor;
33
use crate::rounding::CpuRoundTiesEven;
34
35
const LN10H: f64 = f64::from_bits(0x40026bb1bbb55516);
36
const LN10L: f64 = f64::from_bits(0xbcaf48ad494ea3e9);
37
38
struct Exp10m1 {
39
    exp: DoubleDouble,
40
    err: f64,
41
}
42
43
// Approximation for the fast path of exp(z) for z=zh+zl,
44
// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
45
// (assuming x^y does not overflow or underflow)
46
#[inline]
47
0
fn q_1(dz: DoubleDouble) -> DoubleDouble {
48
    const Q_1: [u64; 5] = [
49
        0x3ff0000000000000,
50
        0x3ff0000000000000,
51
        0x3fe0000000000000,
52
        0x3fc5555555995d37,
53
        0x3fa55555558489dc,
54
    ];
55
0
    let z = dz.to_f64();
56
0
    let mut q = f_fmla(f64::from_bits(Q_1[4]), dz.hi, f64::from_bits(Q_1[3]));
57
0
    q = f_fmla(q, z, f64::from_bits(Q_1[2]));
58
59
0
    let mut p0 = DoubleDouble::from_exact_add(f64::from_bits(Q_1[1]), q * z);
60
0
    p0 = DoubleDouble::quick_mult(dz, p0);
61
0
    p0 = DoubleDouble::f64_add(f64::from_bits(Q_1[0]), p0);
62
0
    p0
63
0
}
64
65
#[inline]
66
0
fn exp1(x: DoubleDouble) -> DoubleDouble {
67
    const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe); /* |INVLOG2-2^12/log(2)| < 2^-43.4 */
68
0
    let k = (x.hi * INVLOG2).cpu_round_ties_even();
69
70
    const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
71
    const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
72
0
    let mut zk = DoubleDouble::from_exact_mult(LOG2H, k);
73
0
    zk.lo = f_fmla(k, LOG2L, zk.lo);
74
75
0
    let mut yz = DoubleDouble::from_exact_add(x.hi - zk.hi, x.lo);
76
0
    yz.lo -= zk.lo;
77
78
0
    let ik: i64 = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */
79
0
    let im: i64 = (ik >> 12).wrapping_add(0x3ff);
80
0
    let i2: i64 = (ik >> 6) & 0x3f;
81
0
    let i1: i64 = ik & 0x3f;
82
83
0
    let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]);
84
0
    let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]);
85
86
0
    let p0 = DoubleDouble::quick_mult(t2, t1);
87
88
0
    let mut q = q_1(yz);
89
0
    q = DoubleDouble::quick_mult(p0, q);
90
91
    /* Scale by 2^k. Warning: for x near 1024, we can have k=2^22, thus
92
    M = 2047, which encodes Inf */
93
0
    let mut du = (im as u64).wrapping_shl(52);
94
0
    if im == 0x7ff {
95
0
        q.hi *= 2.0;
96
0
        q.lo *= 2.0;
97
0
        du = (im.wrapping_sub(1) as u64).wrapping_shl(52);
98
0
    }
99
0
    q.hi *= f64::from_bits(du);
100
0
    q.lo *= f64::from_bits(du);
101
0
    q
102
0
}
103
104
#[inline]
105
0
fn exp10m1_fast(x: f64, tiny: bool) -> Exp10m1 {
106
0
    if tiny {
107
0
        return exp10m1_fast_tiny(x);
108
0
    }
109
    /* now -54 < x < -0.125 or 0.125 < x < 1024: we approximate exp(x*log(2))
110
    and subtract 1 */
111
0
    let v = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN10L, LN10H), x);
112
    /*
113
    The a_mul() call is exact, and the error of the fma() is bounded by
114
     ulp(l).
115
     We have |t| <= ulp(h) <= ulp(LN2H*1024) = 2^-43,
116
     |t+x*LN2L| <= 2^-43 * 1024*LN2L < 2^-42.7,
117
     thus |l| <= |t| + |x*LN2L| + ulp(t+x*LN2L)
118
              <= 2^-42.7 + 2^-95 <= 2^-42.6, and ulp(l) <= 2^-95.
119
     Thus:
120
     |h + l - x*log(2)| <= |h + l - x*(LN2H+LN2L)| + |x|*|LN2H+LN2L-log(2)|
121
                        <= 2^-95 + 1024*2^-110.4 < 2^-94.9 */
122
123
0
    let mut p = exp1(v);
124
125
0
    let zf: DoubleDouble = if x >= 0. {
126
        // implies h >= 1 and the fast_two_sum pre-condition holds
127
0
        DoubleDouble::from_exact_add(p.hi, -1.0)
128
    } else {
129
0
        DoubleDouble::from_exact_add(-1.0, p.hi)
130
    };
131
0
    p.lo += zf.lo;
132
0
    p.hi = zf.hi;
133
    /* The error in the above fast_two_sum is bounded by 2^-105*|h|,
134
    with the new value of h, thus the total absolute error is bounded
135
    by eps1*|h_in|+2^-105*|h|.
136
    Relatively to h this yields eps1*|h_in/h| + 2^-105, where the maximum
137
    of |h_in/h| is obtained for x near -0.125, with |2^x/(2^x-1)| < 11.05.
138
    We get a relative error bound of 2^-74.138*11.05 + 2^-105 < 2^-70.67. */
139
0
    Exp10m1 {
140
0
        exp: p,
141
0
        err: f64::from_bits(0x3b77a00000000000) * p.hi, /* 2^-70.67 < 0x1.42p-71 */
142
0
    }
143
0
}
144
145
// Approximation for the accurate path of exp(z) for z=zh+zl,
146
// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
147
// (assuming x^y does not overflow or underflow)
148
#[inline]
149
0
fn q_2(dz: DoubleDouble) -> DoubleDouble {
150
    /* Let q[0]..q[7] be the coefficients of degree 0..7 of Q_2.
151
    The ulp of q[7]*z^7 is at most 2^-155, thus we can compute q[7]*z^7
152
    in double precision only.
153
    The ulp of q[6]*z^6 is at most 2^-139, thus we can compute q[6]*z^6
154
    in double precision only.
155
    The ulp of q[5]*z^5 is at most 2^-124, thus we can compute q[5]*z^5
156
    in double precision only. */
157
158
    /* The following is a degree-7 polynomial generated by Sollya for exp(z)
159
    over [-0.000130273,0.000130273] with absolute error < 2^-113.218
160
    (see file exp_accurate.sollya). Since we use this code only for
161
    |x| > 0.125 in exp2m1(x), the corresponding relative error for exp2m1
162
    is about 2^-113.218/|exp2m1(-0.125)| which is about 2^-110. */
163
    const Q_2: [u64; 9] = [
164
        0x3ff0000000000000,
165
        0x3ff0000000000000,
166
        0x3fe0000000000000,
167
        0x3fc5555555555555,
168
        0x3c655555555c4d26,
169
        0x3fa5555555555555,
170
        0x3f81111111111111,
171
        0x3f56c16c3fbb4213,
172
        0x3f2a01a023ede0d7,
173
    ];
174
175
0
    let z = dz.to_f64();
176
0
    let mut q = dd_fmla(f64::from_bits(Q_2[8]), dz.hi, f64::from_bits(Q_2[7]));
177
0
    q = dd_fmla(q, z, f64::from_bits(Q_2[6]));
178
0
    q = dd_fmla(q, z, f64::from_bits(Q_2[5]));
179
180
    // multiply q by z and add Q_2[3] + Q_2[4]
181
182
0
    let mut p = DoubleDouble::from_exact_mult(q, z);
183
0
    let r0 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[3]), p.hi);
184
0
    p.hi = r0.hi;
185
0
    p.lo += r0.lo + f64::from_bits(Q_2[4]);
186
187
    // multiply hi+lo by zh+zl and add Q_2[2]
188
189
0
    p = DoubleDouble::quick_mult(p, dz);
190
0
    let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[2]), p.hi);
191
0
    p.hi = r1.hi;
192
0
    p.lo += r1.lo;
193
194
    // multiply hi+lo by zh+zl and add Q_2[1]
195
0
    p = DoubleDouble::quick_mult(p, dz);
196
0
    let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[1]), p.hi);
197
0
    p.hi = r1.hi;
198
0
    p.lo += r1.lo;
199
200
    // multiply hi+lo by zh+zl and add Q_2[0]
201
0
    p = DoubleDouble::quick_mult(p, dz);
202
0
    let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[0]), p.hi);
203
0
    p.hi = r1.hi;
204
0
    p.lo += r1.lo;
205
0
    p
206
0
}
207
208
// returns a double-double approximation hi+lo of exp(x*log(10))
209
// assumes -0x1.041704c068efp+4 < x <= 0x1.34413509f79fep+8
210
#[inline]
211
0
fn exp_2(x: f64) -> DoubleDouble {
212
0
    let mut k = (x * f64::from_bits(0x40ca934f0979a371)).cpu_round_ties_even();
213
0
    if k == 4194304. {
214
0
        k = 4194303.; // ensures M < 2047 below
215
0
    }
216
    // since |x| <= 745 we have k <= 3051520
217
218
    const LOG2_10H: f64 = f64::from_bits(0x3f134413509f79ff);
219
    const LOG2_10M: f64 = f64::from_bits(0xbb89dc1da9800000);
220
    const LOG2_10L: f64 = f64::from_bits(0xb984fd20dba1f655);
221
222
0
    let yhh = dd_fmla(-k, LOG2_10H, x); // exact, |yh| <= 2^-13
223
224
0
    let mut ky0 = DoubleDouble::from_exact_add(yhh, -k * LOG2_10M);
225
0
    ky0.lo = dd_fmla(-k, LOG2_10L, ky0.lo);
226
227
    /* now x = k + yh, thus 2^x = 2^k * 2^yh, and we multiply yh by log(10)
228
    to use the accurate path of exp() */
229
230
0
    let ky = DoubleDouble::quick_mult(ky0, DoubleDouble::new(LN10L, LN10H));
231
232
0
    let ik = unsafe {
233
0
        k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion
234
    };
235
0
    let im = (ik >> 12).wrapping_add(0x3ff);
236
0
    let i2 = (ik >> 6) & 0x3f;
237
0
    let i1 = ik & 0x3f;
238
239
0
    let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]);
240
0
    let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]);
241
242
0
    let p = DoubleDouble::quick_mult(t2, t1);
243
244
0
    let mut q = q_2(ky);
245
0
    q = DoubleDouble::quick_mult(p, q);
246
0
    let mut ud: u64 = (im as u64).wrapping_shl(52);
247
248
0
    if im == 0x7ff {
249
0
        q.hi *= 2.0;
250
0
        q.lo *= 2.0;
251
0
        ud = (im.wrapping_sub(1) as u64).wrapping_shl(52);
252
0
    }
253
0
    q.hi *= f64::from_bits(ud);
254
0
    q.lo *= f64::from_bits(ud);
255
0
    q
256
0
}
257
258
#[cold]
259
0
fn exp10m1_accurate_tiny(x: f64) -> f64 {
260
0
    let x2 = x * x;
261
0
    let x4 = x2 * x2;
262
    /* The following is a degree-17 polynomial generated by Sollya
263
    (file exp10m1_accurate.sollya),
264
    which approximates exp10m1(x) with relative error bounded by 2^-107.506
265
    for |x| <= 0.0625. */
266
267
    const Q: [u64; 25] = [
268
        0x40026bb1bbb55516,
269
        0xbcaf48ad494ea3e9,
270
        0x40053524c73cea69,
271
        0xbcae2bfab318d696,
272
        0x4000470591de2ca4,
273
        0x3ca823527cebf918,
274
        0x3ff2bd7609fd98c4,
275
        0x3c931ea51f6641df,
276
        0x3fe1429ffd1d4d76,
277
        0x3c7117195be7f232,
278
        0x3fca7ed70847c8b6,
279
        0xbc54260c5e23d0c8,
280
        0x3fb16e4dfc333a87,
281
        0xbc533fd284110905,
282
        0x3f94116b05fdaa5d,
283
        0xbc20721de44d79a8,
284
        0x3f74897c45d93d43,
285
        0x3f52ea52b2d182ac,
286
        0x3f2facfd5d905b22,
287
        0x3f084fe12df8bde3,
288
        0x3ee1398ad75d01bf,
289
        0x3eb6a9e96fbf6be7,
290
        0x3e8bd456a29007c2,
291
        0x3e6006cf8378cf9b,
292
        0x3e368862b132b6e2,
293
    ];
294
0
    let mut c13 = dd_fmla(f64::from_bits(Q[23]), x, f64::from_bits(Q[22])); // degree 15
295
0
    let c11 = dd_fmla(f64::from_bits(Q[21]), x, f64::from_bits(Q[20])); // degree 14
296
0
    c13 = dd_fmla(f64::from_bits(Q[24]), x2, c13); // degree 15
297
    // add Q[19]*x+c13*x2+c15*x4 to Q[18] (degree 11)
298
0
    let mut p = DoubleDouble::from_exact_add(
299
0
        f64::from_bits(Q[18]),
300
0
        f_fmla(f64::from_bits(Q[19]), x, f_fmla(c11, x2, c13 * x4)),
301
    );
302
    // multiply h+l by x and add Q[17] (degree 10)
303
0
    p = DoubleDouble::quick_f64_mult(x, p);
304
0
    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[17]), p.hi);
305
0
    p.lo += p0.lo;
306
0
    p.hi = p0.hi;
307
308
    // multiply h+l by x and add Q[16] (degree 9)
309
0
    p = DoubleDouble::quick_f64_mult(x, p);
310
0
    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[16]), p.hi);
311
0
    p.lo += p0.lo;
312
0
    p.hi = p0.hi;
313
    // multiply h+l by x and add Q[14]+Q[15] (degree 8)
314
0
    p = DoubleDouble::quick_f64_mult(x, p);
315
0
    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[14]), p.hi);
316
0
    p.lo += p0.lo + f64::from_bits(Q[15]);
317
0
    p.hi = p0.hi;
318
    // multiply h+l by x and add Q[12]+Q[13] (degree 7)
319
0
    p = DoubleDouble::quick_f64_mult(x, p);
320
0
    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[12]), p.hi);
321
0
    p.lo += p0.lo + f64::from_bits(Q[13]);
322
0
    p.hi = p0.hi;
323
324
    // multiply h+l by x and add Q[10]+Q[11] (degree 6)
325
0
    p = DoubleDouble::quick_f64_mult(x, p);
326
0
    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[10]), p.hi);
327
0
    p.lo += p0.lo + f64::from_bits(Q[11]);
328
0
    p.hi = p0.hi;
329
330
    // multiply h+l by x and add Q[8]+Q[9] (degree 5)
331
0
    p = DoubleDouble::quick_f64_mult(x, p);
332
0
    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[8]), p.hi);
333
0
    p.lo += p0.lo + f64::from_bits(Q[9]);
334
0
    p.hi = p0.hi;
335
336
    // multiply h+l by x and add Q[6]+Q[7] (degree 4)
337
0
    p = DoubleDouble::quick_f64_mult(x, p);
338
0
    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[6]), p.hi);
339
0
    p.lo += p0.lo + f64::from_bits(Q[7]);
340
0
    p.hi = p0.hi;
341
342
    // multiply h+l by x and add Q[4]+Q[5] (degree 3)
343
0
    p = DoubleDouble::quick_f64_mult(x, p);
344
0
    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[4]), p.hi);
345
0
    p.lo += p0.lo + f64::from_bits(Q[5]);
346
0
    p.hi = p0.hi;
347
348
    // multiply h+l by x and add Q[2]+Q[3] (degree 2)
349
0
    p = DoubleDouble::quick_f64_mult(x, p);
350
0
    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[2]), p.hi);
351
0
    p.lo += p0.lo + f64::from_bits(Q[3]);
352
0
    p.hi = p0.hi;
353
354
    // multiply h+l by x and add Q[0]+Q[1] (degree 2)
355
0
    p = DoubleDouble::quick_f64_mult(x, p);
356
0
    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[0]), p.hi);
357
0
    p.lo += p0.lo + f64::from_bits(Q[1]);
358
0
    p.hi = p0.hi;
359
360
    // multiply h+l by x
361
0
    p = DoubleDouble::quick_f64_mult(x, p);
362
0
    p.to_f64()
363
0
}
364
365
#[cold]
366
0
fn exp10m1_accurate(x: f64) -> f64 {
367
0
    let t = x.to_bits();
368
0
    let ux = t;
369
0
    let ax = ux & 0x7fffffffffffffffu64;
370
371
0
    if ax <= 0x3fc0000000000000u64 {
372
        // |x| <= 0.125
373
0
        return exp10m1_accurate_tiny(x);
374
0
    }
375
376
0
    let mut p = exp_2(x);
377
378
0
    let zf: DoubleDouble = DoubleDouble::from_full_exact_add(p.hi, -1.0);
379
0
    p.lo += zf.lo;
380
0
    p.hi = zf.hi;
381
0
    p.to_f64()
382
0
}
383
384
/* |x| <= 0.125, put in h + l a double-double approximation of exp2m1(x),
385
and return the maximal corresponding absolute error.
386
We also have |x| > 0x1.0527dbd87e24dp-51.
387
With xmin=RR("0x1.0527dbd87e24dp-51",16), the routine
388
exp2m1_fast_tiny_all(xmin,0.125,2^-65.73) in exp2m1.sage returns
389
1.63414352331297e-20 < 2^-65.73, and
390
exp2m1_fast_tiny_all(-0.125,-xmin,2^-65.62) returns
391
1.76283772822891e-20 < 2^-65.62, which proves the relative
392
error is bounded by 2^-65.62. */
393
#[inline]
394
0
fn exp10m1_fast_tiny(x: f64) -> Exp10m1 {
395
    /* The following is a degree-11 polynomial generated by Sollya
396
    (file exp10m1_fast.sollya),
397
    which approximates exp10m1(x) with relative error bounded by 2^-69.58
398
    for |x| <= 0.0625. */
399
    const P: [u64; 14] = [
400
        0x40026bb1bbb55516,
401
        0xbcaf48abcf79e094,
402
        0x40053524c73cea69,
403
        0xbcae1badf796d704,
404
        0x4000470591de2ca4,
405
        0x3ca7db8caacb2cea,
406
        0x3ff2bd7609fd98ba,
407
        0x3fe1429ffd1d4d98,
408
        0x3fca7ed7084998e1,
409
        0x3fb16e4dfc30944b,
410
        0x3f94116ae4b57526,
411
        0x3f74897c6a90f61c,
412
        0x3f52ec689c32b3a0,
413
        0x3f2faced20d698fe,
414
    ];
415
0
    let x2 = x * x;
416
0
    let x4 = x2 * x2;
417
0
    let mut c9 = dd_fmla(f64::from_bits(P[12]), x, f64::from_bits(P[11])); // degree 9
418
0
    let c7 = dd_fmla(f64::from_bits(P[10]), x, f64::from_bits(P[9])); // degree 7
419
0
    let mut c5 = dd_fmla(f64::from_bits(P[8]), x, f64::from_bits(P[7])); // degree 5
420
0
    c9 = dd_fmla(f64::from_bits(P[13]), x2, c9); // degree 9
421
0
    c5 = dd_fmla(c7, x2, c5); // degree 5
422
0
    c5 = dd_fmla(c9, x4, c5); // degree 5
423
424
0
    let mut p = DoubleDouble::from_exact_mult(c5, x);
425
0
    let p0 = DoubleDouble::from_exact_add(f64::from_bits(P[6]), p.hi);
426
0
    p.lo += p0.lo;
427
0
    p.hi = p0.hi;
428
429
0
    p = DoubleDouble::quick_f64_mult(x, p);
430
431
0
    let p1 = DoubleDouble::from_exact_add(f64::from_bits(P[4]), p.hi);
432
0
    p.lo += p1.lo + f64::from_bits(P[5]);
433
0
    p.hi = p1.hi;
434
435
0
    p = DoubleDouble::quick_f64_mult(x, p);
436
437
0
    let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[2]), p.hi);
438
0
    p.lo += p2.lo + f64::from_bits(P[3]);
439
0
    p.hi = p2.hi;
440
441
0
    p = DoubleDouble::quick_f64_mult(x, p);
442
443
0
    let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[0]), p.hi);
444
0
    p.lo += p2.lo + f64::from_bits(P[1]);
445
0
    p.hi = p2.hi;
446
447
0
    p = DoubleDouble::quick_f64_mult(x, p);
448
449
0
    Exp10m1 {
450
0
        exp: p,
451
0
        err: f64::from_bits(0x3bb0a00000000000) * p.hi, // 2^-65.62 < 0x1.4ep-66
452
0
    }
453
0
}
454
455
/// Computes 10^x - 1
456
///
457
/// Max found ULP 0.5
458
0
pub fn f_exp10m1(d: f64) -> f64 {
459
0
    let mut x = d;
460
0
    let t = x.to_bits();
461
0
    let ux = t;
462
0
    let ax = ux & 0x7fffffffffffffffu64;
463
464
0
    if ux >= 0xc03041704c068ef0u64 {
465
        // x = -NaN or x <= -0x1.041704c068efp+4
466
0
        if (ux >> 52) == 0xfff {
467
            // -NaN or -Inf
468
0
            return if ux > 0xfff0000000000000u64 {
469
0
                x + x
470
            } else {
471
0
                -1.0
472
            };
473
0
        }
474
        // for x <= -0x1.041704c068efp+4, exp10m1(x) rounds to -1 to nearest
475
0
        return -1.0 + f64::from_bits(0x3c90000000000000);
476
0
    } else if ax > 0x40734413509f79feu64 {
477
        // x = +NaN or x >= 1024
478
0
        if (ux >> 52) == 0x7ff {
479
            // +NaN
480
0
            return x + x;
481
0
        }
482
0
        return f64::from_bits(0x7fefffffffffffff) * x;
483
0
    } else if ax <= 0x3c90000000000000u64
484
    // |x| <= 0x1.0527dbd87e24dp-51
485
    /* then the second term of the Taylor expansion of 2^x-1 at x=0 is
486
    smaller in absolute value than 1/2 ulp(first term):
487
    log(2)*x + log(2)^2*x^2/2 + ... */
488
    {
489
        /* we use special code when log(2)*|x| is very small, in which case
490
        the double-double approximation h+l has its lower part l
491
        "truncated" */
492
0
        return if ax <= 0x3970000000000000u64
493
        // |x| <= 2^-104
494
        {
495
            // special case for 0
496
0
            if x == 0. {
497
0
                return x;
498
0
            }
499
0
            if x.abs() == f64::from_bits(0x000086c73059343c) {
500
0
                return dd_fmla(
501
0
                    -f64::copysign(f64::from_bits(0x1e60010000000000), x),
502
0
                    f64::from_bits(0x1e50000000000000),
503
0
                    f64::copysign(f64::from_bits(0x000136568740cb56), x),
504
                );
505
0
            }
506
0
            if x.abs() == f64::from_bits(0x00013a7b70d0248c) {
507
0
                return dd_fmla(
508
0
                    f64::copysign(f64::from_bits(0x1e5ffe0000000000), x),
509
0
                    f64::from_bits(0x1e50000000000000),
510
0
                    f64::copysign(f64::from_bits(0x0002d41f3b972fc7), x),
511
                );
512
0
            }
513
514
            // scale x by 2^106
515
0
            x *= f64::from_bits(0x4690000000000000);
516
0
            let mut z = DoubleDouble::from_exact_mult(LN10H, x);
517
0
            z.lo = dd_fmla(LN10L, x, z.lo);
518
0
            let mut h2 = z.to_f64(); // round to 53-bit precision
519
            // scale back, hoping to avoid double rounding
520
0
            h2 *= f64::from_bits(0x3950000000000000);
521
            // now subtract back h2 * 2^106 from h to get the correction term
522
0
            let mut h = dd_fmla(-h2, f64::from_bits(0x4690000000000000), z.hi);
523
            // add l
524
0
            h += z.lo;
525
            /* add h2 + h * 2^-106. Warning: when h=0, 2^-106*h2 might be exact,
526
            thus no underflow will be raised. We have underflow for
527
            0 < x <= 0x1.71547652b82fep-1022 for RNDZ, and for
528
            0 < x <= 0x1.71547652b82fdp-1022 for RNDN/RNDU. */
529
0
            dyad_fmla(h, f64::from_bits(0x3950000000000000), h2)
530
        } else {
531
            const C2: f64 = f64::from_bits(0x40053524c73cea69); // log(2)^2/2
532
0
            let mut z = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN10L, LN10H), x);
533
            /* h+l approximates the first term x*log(2) */
534
            /* we add C2*x^2 last, so that in case there is a cancellation in
535
            LN10L*x+l, it will contribute more bits */
536
0
            z.lo = dd_fmla(C2 * x, x, z.lo);
537
0
            z.to_f64()
538
        };
539
0
    }
540
541
    /* now -0x1.041704c068efp+4 < x < -2^-54
542
    or 2^-54 < x <= 0x1.34413509f79fep+8 */
543
544
    /* 10^x-1 is exact for x integer, 1 <= x <= 15 */
545
0
    if ux << 15 == 0 {
546
0
        let i = unsafe { x.cpu_floor().to_int_unchecked::<i32>() };
547
0
        if x == i as f64 && 1 <= i && i <= 15 {
548
            static EXP10_1_15: [u64; 16] = [
549
                0x0000000000000000,
550
                0x4022000000000000,
551
                0x4058c00000000000,
552
                0x408f380000000000,
553
                0x40c3878000000000,
554
                0x40f869f000000000,
555
                0x412e847e00000000,
556
                0x416312cfe0000000,
557
                0x4197d783fc000000,
558
                0x41cdcd64ff800000,
559
                0x4202a05f1ff80000,
560
                0x42374876e7ff0000,
561
                0x426d1a94a1ffe000,
562
                0x42a2309ce53ffe00,
563
                0x42d6bcc41e8fffc0,
564
                0x430c6bf52633fff8,
565
            ];
566
0
            return f64::from_bits(EXP10_1_15[i as usize]);
567
0
        }
568
0
    }
569
570
0
    let result = exp10m1_fast(x, ax <= 0x3fb0000000000000u64);
571
0
    let left = result.exp.hi + (result.exp.lo - result.err);
572
0
    let right = result.exp.hi + (result.exp.lo + result.err);
573
0
    if left != right {
574
0
        return exp10m1_accurate(x);
575
0
    }
576
0
    left
577
0
}
578
579
#[cfg(test)]
580
mod tests {
581
    use super::*;
582
583
    #[test]
584
    fn test_exp10m1() {
585
        assert_eq!(f_exp10m1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002364140972981833),
586
                   0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005443635762124408);
587
        assert_eq!(99., f_exp10m1(2.0));
588
        assert_eq!(315.22776601683796, f_exp10m1(2.5));
589
        assert_eq!(-0.7056827241416722, f_exp10m1(-0.5311842449009418));
590
    }
591
}