Coverage Report

Created: 2025-11-24 07:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/pow_exec.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::common::dd_fmla;
30
use crate::double_double::DoubleDouble;
31
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
32
use crate::exponents::{EXP_REDUCE_T0, EXP_REDUCE_T1, ldexp};
33
use crate::exponents::{EXPM1_T0, EXPM1_T1};
34
use crate::polyeval::f_polyeval6;
35
use crate::pow_tables::{EXP_T1_2_DYADIC, EXP_T2_2_DYADIC, POW_INVERSE, POW_LOG_INV};
36
use crate::rounding::CpuRound;
37
use crate::rounding::CpuRoundTiesEven;
38
39
#[inline(always)]
40
0
pub(crate) fn log_poly_1(z: f64) -> DoubleDouble {
41
    /* The following is a degree-8 polynomial generated by Sollya for
42
    log(1+x)-x+x^2/2 over [-0.0040283203125,0.0040283203125]
43
    with absolute error < 2^-81.63
44
    and relative error < 2^-72.423 (see sollya/P_1.sollya).
45
    The relative error is for x - x^2/2 + P(x) with respect to log(1+x). */
46
    const P_1: [u64; 6] = [
47
        0x3fd5555555555558,
48
        0xbfd0000000000003,
49
        0x3fc999999981f535,
50
        0xbfc55555553d1eb4,
51
        0x3fc2494526fd4a06,
52
        0xbfc0001f0c80e8ce,
53
    ];
54
0
    let w = DoubleDouble::from_exact_mult(z, z);
55
0
    let t = dd_fmla(f64::from_bits(P_1[5]), z, f64::from_bits(P_1[4]));
56
0
    let mut u = dd_fmla(f64::from_bits(P_1[3]), z, f64::from_bits(P_1[2]));
57
0
    let mut v = dd_fmla(f64::from_bits(P_1[1]), z, f64::from_bits(P_1[0]));
58
0
    u = dd_fmla(t, w.hi, u);
59
0
    v = dd_fmla(u, w.hi, v);
60
0
    u = v * w.hi;
61
0
    DoubleDouble::new(dd_fmla(u, z, -0.5 * w.lo), -0.5 * w.hi)
62
0
}
63
64
/* Given 2^-1074 <= x <= 0x1.fffffffffffffp+1023, this routine puts in h+l
65
   an approximation of log(x) such that |l| < 2^-23.89*|h| and
66
67
   | h + l - log(x) | <= elog * |log x|
68
69
   with elog = 2^-73.527  if x < 1/sqrt(2) or sqrt(2) < x,
70
   and  elog = 2^-67.0544 if 1/sqrt(2) < x < sqrt(2)
71
   (note that x cannot equal 1/sqrt(2) nor sqrt(2)).
72
*/
73
#[inline]
74
0
pub(crate) fn pow_log_1(x: f64) -> (DoubleDouble, bool) {
75
    /* for 181 <= i <= 362, r[i] = _INVERSE[i-181] is a 9-bit approximation of
76
    1/x[i], where i*2^-8 <= x[i] < (i+1)*2^-8.
77
    More precisely r[i] is a 9-bit value such that r[i]*y-1 is representable
78
    exactly on 53 bits for for any y, i*2^-8 <= y < (i+1)*2^-8.
79
    Moreover |r[i]*y-1| < 0.0040283203125.
80
    Table generated with the accompanying pow.sage file,
81
    with l=inverse_centered(k=8,prec=9,maxbits=53,verbose=false) */
82
0
    let x_u = x.to_bits();
83
0
    let mut m = x_u & 0xfffffffffffff;
84
0
    let mut e: i64 = ((x_u >> 52) & 0x7ff) as i64;
85
86
    let t;
87
0
    if e != 0 {
88
0
        t = m | (0x3ffu64 << 52);
89
0
        m = m.wrapping_add(1u64 << 52);
90
0
        e -= 0x3ff;
91
0
    } else {
92
0
        /* x is a subnormal double  */
93
0
        let k = m.leading_zeros() - 11;
94
0
95
0
        e = -0x3fei64 - k as i64;
96
0
        m = m.wrapping_shl(k);
97
0
        t = m | (0x3ffu64 << 52);
98
0
    }
99
100
    /* now |x| = 2^_e*_t = 2^(_e-52)*m with 1 <= _t < 2,
101
    and 2^52 <= _m < 2^53 */
102
103
    //   log(x) = log(t) + E ยท log(2)
104
0
    let mut t = f64::from_bits(t);
105
106
    // If m > sqrt(2) we divide it by 2 so ensure 1/sqrt(2) < t < sqrt(2)
107
0
    let c: usize = (m >= 0x16a09e667f3bcd) as usize;
108
    static CY: [f64; 2] = [1.0, 0.5];
109
    static CM: [u64; 2] = [44, 45];
110
111
0
    e = e.wrapping_add(c as i64);
112
0
    let be = e;
113
0
    let i = m >> CM[c]; /* i/2^8 <= t < (i+1)/2^8 */
114
    /* when c=1, we have 0x16a09e667f3bcd <= m < 2^53, thus 90 <= i <= 127;
115
    when c=0, we have 2^52 <= m < 0x16a09e667f3bcd, thus 128 <= i <= 181 */
116
0
    t *= CY[c];
117
    /* now 0x1.6a09e667f3bcdp-1 <= t < 0x1.6a09e667f3bcdp+0,
118
    and log(x) = E * log(2) + log(t) */
119
120
0
    let r = f64::from_bits(POW_INVERSE[(i - 181) as usize]);
121
0
    let l1 = f64::from_bits(POW_LOG_INV[(i - 181) as usize].1);
122
0
    let l2 = f64::from_bits(POW_LOG_INV[(i - 181) as usize].0);
123
124
0
    let z = dd_fmla(r, t, -1.0);
125
126
    const LOG2_DD: DoubleDouble = DoubleDouble::new(
127
        f64::from_bits(0x3d2ef35793c76730),
128
        f64::from_bits(0x3fe62e42fefa3800),
129
    );
130
131
0
    let th = dd_fmla(be as f64, LOG2_DD.hi, l1);
132
0
    let tl = dd_fmla(be as f64, LOG2_DD.lo, l2);
133
134
0
    let mut v = DoubleDouble::f64_add(th, DoubleDouble::new(tl, z));
135
0
    let p = log_poly_1(z);
136
0
    v = DoubleDouble::f64_add(v.hi, DoubleDouble::new(v.lo + p.lo, p.hi));
137
138
0
    if e == 0 && v.lo.abs() > (v.hi.abs()) * f64::from_bits(0x3e70000000000000) {
139
0
        v = DoubleDouble::from_exact_add(v.hi, v.lo);
140
0
        return (v, true);
141
0
    }
142
143
0
    (v, false)
144
0
}
145
146
/* Given z such that |z| < 2^-12.905,
147
   this routine puts in qh+ql an approximation of exp(z) such that
148
149
   | (qh+ql) / exp(z) - 1 | < 2^-64.902632
150
151
   and |ql| <= 2^-51.999.
152
*/
153
#[inline(always)]
154
0
fn exp_poly_1(z: f64) -> DoubleDouble {
155
    /* The following is a degree-4 polynomial generated by Sollya for exp(x)
156
    over [-2^-12.905,2^-12.905]
157
    with absolute error < 2^-74.34 (see sollya/Q_1.sollya). */
158
    const Q_1: [u64; 5] = [
159
        0x3ff0000000000000,
160
        0x3ff0000000000000,
161
        0x3fe0000000000000,
162
        0x3fc5555555997996,
163
        0x3fa5555555849d8d,
164
    ];
165
0
    let mut q = dd_fmla(f64::from_bits(Q_1[4]), z, f64::from_bits(Q_1[3]));
166
0
    q = dd_fmla(q, z, f64::from_bits(Q_1[2]));
167
0
    let h0 = dd_fmla(q, z, f64::from_bits(Q_1[1]));
168
169
0
    let v1 = DoubleDouble::from_exact_mult(z, h0);
170
0
    DoubleDouble::f64_add(f64::from_bits(Q_1[0]), v1)
171
0
}
172
173
/* Given z such that |z| < 2^-12.905,
174
   this routine puts in qh+ql an approximation of exp(z) such that
175
176
   | (qh+ql) / exp(z) - 1 | < 2^-64.902632
177
178
   and |ql| <= 2^-51.999.
179
*/
180
181
// #[inline(always)]
182
// fn exp_poly_dd(z: DoubleDouble) -> DoubleDouble {
183
//     /* The following is a degree-4 polynomial generated by Sollya for exp(x)
184
//     over [-2^-12.905,2^-12.905] */
185
//     const Q_1: [(u64, u64); 7] = [
186
//         (0x0000000000000000, 0x3ff0000000000000),
187
//         (0x3a20e40000000000, 0x3ff0000000000000),
188
//         (0x3a04820000000000, 0x3fe0000000000000),
189
//         (0xbc756423c5338a66, 0x3fc5555555555556),
190
//         (0xbc5560f74db5556c, 0x3fa5555555555556),
191
//         (0x3c3648eca89bc6ac, 0x3f8111111144fbee),
192
//         (0xbbd53d924ae90c8c, 0x3f56c16c16ffeecc),
193
//     ];
194
//     let mut p = DoubleDouble::mult(z, DoubleDouble::from_bit_pair(Q_1[6]));
195
//     p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[5]));
196
//     p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[4]));
197
//     p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[3]));
198
//     p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[2]));
199
//     p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[1]));
200
//     p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[0]));
201
//     p
202
// }
203
204
#[inline]
205
0
pub(crate) fn pow_exp_1(r: DoubleDouble, s: f64) -> DoubleDouble {
206
    const RHO0: f64 = f64::from_bits(0xc0874910ee4e8a27);
207
    // #define RHO1 -0x1.577453f1799a6p+9
208
    /* We increase the initial value of RHO1 to avoid spurious underflow in
209
    the result value el. However, it is not possible to obtain a lower
210
    bound on |el| from the input value rh, thus this modified value of RHO1
211
    is obtained experimentally. */
212
    const RHO1: f64 = f64::from_bits(0xc08483b8cca421af);
213
    const RHO2: f64 = f64::from_bits(0x40862e42e709a95b);
214
    const RHO3: f64 = f64::from_bits(0x40862e4316ea5df9);
215
216
    // use !(rh <= RHO2) instead of rh < RHO2 to catch rh = NaN too
217
    #[allow(clippy::neg_cmp_op_on_partial_ord)]
218
0
    if !(r.hi <= RHO2) {
219
0
        return if r.hi > RHO3 {
220
            /* If rh > RHO3, we are sure there is overflow,
221
            For s=1 we return eh = el = DBL_MAX, which yields
222
            res_min = res_max = +Inf for rounding up or to nearest,
223
            and res_min = res_max = DBL_MAX for rounding down or toward zero,
224
            which will yield the correct rounding.
225
            For s=-1 we return eh = el = -DBL_MAX, which similarly gives
226
            res_min = res_max = -Inf or res_min = res_max = -DBL_MAX,
227
            which is the correct rounding. */
228
0
            DoubleDouble::new(
229
0
                f64::from_bits(0x7fefffffffffffff) * s,
230
0
                f64::from_bits(0x7fefffffffffffff) * s,
231
            )
232
        } else {
233
            /* If RHO2 < rh <= RHO3, we are in the intermediate region
234
            where there might be overflow or not, thus we set eh = el = NaN,
235
            which will set res_min = res_max = NaN, the comparison
236
            res_min == res_max will fail: we defer to the 2nd phase. */
237
0
            DoubleDouble::new(f64::NAN, f64::NAN)
238
        };
239
0
    }
240
241
0
    if r.hi < RHO1 {
242
0
        return if r.hi < RHO0 {
243
            /* For s=1, we have eh=el=+0 except for rounding up,
244
               thus res_min=+0 or -0, res_max=+0 in the main code,
245
               the rounding test succeeds, and we return res_max which is the
246
               expected result in the underflow case.
247
               For s=1 and rounding up, we have eh=+0, el=2^-1074,
248
               thus res_min = res_max = 2^-1074, which is the expected result too.
249
               For s=-1, we have eh=el=-0 except for rounding down,
250
               thus res_min=-0 or +0, res_max=-0 in the main code,
251
               the rounding test succeeds, and we return res_max which is the
252
               expected result in the underflow case.
253
               For s=-1 and rounding down, we have eh=-0, el=-2^-1074,
254
               thus res_min = res_max = -2^-1074, which is the expected result too.
255
            */
256
0
            DoubleDouble::new(f64::from_bits(0x0000000000000001) * (0.5 * s), 0.0 * s)
257
        } else {
258
            /* RHO0 <= rh < RHO1 or s < 0: we defer to the 2nd phase */
259
0
            DoubleDouble::new(f64::NAN, f64::NAN)
260
        };
261
0
    }
262
    const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
263
264
0
    let k = (r.hi * INVLOG2).cpu_round();
265
266
    const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
267
    const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
268
269
0
    let zh = dd_fmla(LOG2H, -k, r.hi);
270
0
    let zl = dd_fmla(LOG2L, -k, r.lo);
271
272
0
    let bk = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */
273
0
    let mk = (bk >> 12) + 0x3ff;
274
0
    let i2 = (bk >> 6) & 0x3f;
275
0
    let i1 = bk & 0x3f;
276
277
0
    let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i2 as usize]);
278
0
    let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
279
0
    let mut de = DoubleDouble::quick_mult(t1, t0);
280
0
    let q = exp_poly_1(zh + zl);
281
0
    de = DoubleDouble::quick_mult(de, q);
282
    /* we should have 1 < M < 2047 here, since we filtered out
283
    potential underflow/overflow cases at the beginning of this function */
284
285
0
    let mut du = (mk as u64).wrapping_shl(52);
286
0
    du = (f64::from_bits(du) * s).to_bits();
287
0
    de.hi *= f64::from_bits(du);
288
0
    de.lo *= f64::from_bits(du);
289
0
    de
290
0
}
291
292
#[inline]
293
0
pub(crate) fn exp_dd_fast(r: DoubleDouble) -> DoubleDouble {
294
    const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
295
296
0
    let k = (r.hi * INVLOG2).cpu_round();
297
298
    const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
299
    const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
300
301
0
    let mut z = DoubleDouble::mul_f64_add(DoubleDouble::new(LOG2L, LOG2H), -k, r);
302
0
    z = DoubleDouble::from_exact_add(z.hi, z.lo);
303
304
0
    let bk = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */
305
0
    let mk = (bk >> 12) + 0x3ff;
306
0
    let i2 = (bk >> 6) & 0x3f;
307
0
    let i1 = bk & 0x3f;
308
309
0
    let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i2 as usize]);
310
0
    let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
311
0
    let mut de = DoubleDouble::quick_mult(t1, t0);
312
    // exp(hi + lo) = exp(hi) * exp(lo)
313
0
    let q_hi = exp_poly_1(z.hi);
314
    // Taylor series exp(x) ~ 1 + x since z.lo < ulp(z.h)
315
0
    let q_lo = DoubleDouble::from_exact_add(1., z.lo);
316
0
    let q = DoubleDouble::quick_mult(q_hi, q_lo);
317
0
    de = DoubleDouble::quick_mult(de, q);
318
    /* we should have 1 < M < 2047 here, since we filtered out
319
    potential underflow/overflow cases at the beginning of this function */
320
321
0
    let du = (mk as u64).wrapping_shl(52);
322
0
    de.hi *= f64::from_bits(du);
323
0
    de.lo *= f64::from_bits(du);
324
0
    de
325
0
}
326
327
#[inline]
328
0
pub(crate) fn pow_exp_dd(r: DoubleDouble, s: f64) -> DoubleDouble {
329
    const RHO0: f64 = f64::from_bits(0xc0874910ee4e8a27);
330
    // #define RHO1 -0x1.577453f1799a6p+9
331
    /* We increase the initial value of RHO1 to avoid spurious underflow in
332
    the result value el. However, it is not possible to obtain a lower
333
    bound on |el| from the input value rh, thus this modified value of RHO1
334
    is obtained experimentally. */
335
    const RHO1: f64 = f64::from_bits(0xc08483b8cca421af);
336
    const RHO2: f64 = f64::from_bits(0x40862e42e709a95b);
337
    const RHO3: f64 = f64::from_bits(0x40862e4316ea5df9);
338
339
    // use !(rh <= RHO2) instead of rh < RHO2 to catch rh = NaN too
340
    #[allow(clippy::neg_cmp_op_on_partial_ord)]
341
0
    if !(r.hi <= RHO2) {
342
0
        return if r.hi > RHO3 {
343
            /* If rh > RHO3, we are sure there is overflow,
344
            For s=1 we return eh = el = DBL_MAX, which yields
345
            res_min = res_max = +Inf for rounding up or to nearest,
346
            and res_min = res_max = DBL_MAX for rounding down or toward zero,
347
            which will yield the correct rounding.
348
            For s=-1 we return eh = el = -DBL_MAX, which similarly gives
349
            res_min = res_max = -Inf or res_min = res_max = -DBL_MAX,
350
            which is the correct rounding. */
351
0
            DoubleDouble::new(
352
0
                f64::from_bits(0x7fefffffffffffff) * s,
353
0
                f64::from_bits(0x7fefffffffffffff) * s,
354
            )
355
        } else {
356
            /* If RHO2 < rh <= RHO3, we are in the intermediate region
357
            where there might be overflow or not, thus we set eh = el = NaN,
358
            which will set res_min = res_max = NaN, the comparison
359
            res_min == res_max will fail: we defer to the 2nd phase. */
360
0
            DoubleDouble::new(f64::NAN, f64::NAN)
361
        };
362
0
    }
363
364
0
    if r.hi < RHO1 {
365
0
        return if r.hi < RHO0 {
366
            /* For s=1, we have eh=el=+0 except for rounding up,
367
               thus res_min=+0 or -0, res_max=+0 in the main code,
368
               the rounding test succeeds, and we return res_max which is the
369
               expected result in the underflow case.
370
               For s=1 and rounding up, we have eh=+0, el=2^-1074,
371
               thus res_min = res_max = 2^-1074, which is the expected result too.
372
               For s=-1, we have eh=el=-0 except for rounding down,
373
               thus res_min=-0 or +0, res_max=-0 in the main code,
374
               the rounding test succeeds, and we return res_max which is the
375
               expected result in the underflow case.
376
               For s=-1 and rounding down, we have eh=-0, el=-2^-1074,
377
               thus res_min = res_max = -2^-1074, which is the expected result too.
378
            */
379
0
            DoubleDouble::new(f64::from_bits(0x0000000000000001) * (0.5 * s), 0.0 * s)
380
        } else {
381
            /* RHO0 <= rh < RHO1 or s < 0: we defer to the 2nd phase */
382
0
            DoubleDouble::new(f64::NAN, f64::NAN)
383
        };
384
0
    }
385
    const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
386
387
0
    let k = (r.hi * INVLOG2).cpu_round();
388
389
    const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
390
    const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
391
392
0
    let z = DoubleDouble::mul_f64_add(DoubleDouble::new(LOG2L, LOG2H), -k, r);
393
394
0
    let bk = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */
395
0
    let mk = (bk >> 12) + 0x3ff;
396
0
    let i2 = (bk >> 6) & 0x3f;
397
0
    let i1 = bk & 0x3f;
398
399
0
    let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i2 as usize]);
400
0
    let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
401
0
    let mut de = DoubleDouble::quick_mult(t1, t0);
402
0
    let q = exp_poly_1(z.to_f64());
403
0
    de = DoubleDouble::quick_mult(de, q);
404
    /* we should have 1 < M < 2047 here, since we filtered out
405
    potential underflow/overflow cases at the beginning of this function */
406
407
0
    let mut du = (mk as u64).wrapping_shl(52);
408
0
    du = (f64::from_bits(du) * s).to_bits();
409
0
    de.hi *= f64::from_bits(du);
410
0
    de.lo *= f64::from_bits(du);
411
0
    de
412
0
}
413
/*
414
#[inline(always)]
415
pub(crate) fn expm1_poly_dd(z: DoubleDouble) -> DoubleDouble {
416
    /*
417
       Sollya:
418
       pretty = proc(u) {
419
         return ~(floor(u*1000)/1000);
420
       };
421
422
       d = [-2^-12.905,2^-12.905];
423
       f = expm1(x);
424
       w = 1;
425
       pf = fpminimax(f, [|1,2,3,4,5,6,7|], [|1, 1, 107...|], d, absolute, floating);
426
       err_p = -log2(dirtyinfnorm(pf*w-f, d));
427
       display = decimal;
428
429
       for i from 1 to degree(pf) do print(coeff(pf, i));
430
431
       print (pf);
432
       display = decimal;
433
       print ("absolute error:",pretty(err_p));
434
       f = 1;
435
       w = 1/expm1(x);
436
       err_p = -log2(dirtyinfnorm(pf*w-f, d));
437
       print ("relative error:",pretty(err_p));
438
    */
439
    const Q: [(u64, u64); 7] = [
440
        (0x0000000000000000, 0x3ff0000000000000),
441
        (0x0000000000000000, 0x3fe0000000000000),
442
        (0xbc75555554d7c48c, 0x3fc5555555555556),
443
        (0xbc555a40ffb472d9, 0x3fa5555555555556),
444
        (0x3c24866314c38093, 0x3f8111111111110e),
445
        (0x3be34665978dddb8, 0x3f56c16c16efac90),
446
        (0x3baeab43b813ef24, 0x3f2a01a1e12d253c),
447
    ];
448
    let z2 = z * z;
449
    let z4 = z2 * z2;
450
451
    let b0 = DoubleDouble::quick_mul_add(
452
        z,
453
        DoubleDouble::from_bit_pair(Q[1]),
454
        DoubleDouble::from_bit_pair(Q[0]),
455
    );
456
    let b1 = DoubleDouble::quick_mul_add(
457
        z,
458
        DoubleDouble::from_bit_pair(Q[3]),
459
        DoubleDouble::from_bit_pair(Q[2]),
460
    );
461
    let b2 = DoubleDouble::quick_mul_add(
462
        z,
463
        DoubleDouble::from_bit_pair(Q[5]),
464
        DoubleDouble::from_bit_pair(Q[4]),
465
    );
466
467
    let c0 = DoubleDouble::quick_mul_add(z2, b1, b0);
468
    let c1 = DoubleDouble::quick_mul_add(z2, DoubleDouble::from_bit_pair(Q[6]), b2);
469
470
    let p = DoubleDouble::quick_mul_add(z4, c1, c0);
471
    DoubleDouble::quick_mult(p, z)
472
}
473
*/
474
#[inline(always)]
475
0
pub(crate) fn expm1_poly_fast(z: DoubleDouble) -> DoubleDouble {
476
    // Polynomial generated by Sollya:
477
    // d = [-2^-12.905,2^-12.905];
478
    // f = expm1(x);
479
    // w = 1;
480
    // pf = fpminimax(f, [|1,2,3,4,5,6,7|], [|1, 1, D...|], d, absolute, floating);
481
    // See ./notes/compound_m1_expm1_fast.sollya
482
0
    let p = f_polyeval6(
483
0
        z.hi,
484
0
        f64::from_bits(0x3fe0000000000000),
485
0
        f64::from_bits(0x3fc5555555555555),
486
0
        f64::from_bits(0x3fa55555555553de),
487
0
        f64::from_bits(0x3f81111144995a9a),
488
0
        f64::from_bits(0x3f56c241f9a791c5),
489
0
        f64::from_bits(0xbfad9209c6d8b9e1),
490
    );
491
0
    let px = DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_mult(z.hi, p), z.hi);
492
    // expm1(hi + lo) = expm1(hi) + expm1(lo)(1 + expm1(hi)) = expm1(hi) + expm1(lo)expm1(hi) + expm1(lo)
493
    // expm1(lo) ~ lo
494
0
    let expm1_hi = DoubleDouble::f64_add(z.hi, px);
495
0
    let mut lowest_part = DoubleDouble::quick_mult_f64(expm1_hi, z.lo);
496
0
    lowest_part = DoubleDouble::full_add_f64(lowest_part, z.lo);
497
0
    DoubleDouble::quick_dd_add(expm1_hi, lowest_part)
498
0
}
499
500
/// |z.hi| < 2^-7
501
#[inline(always)]
502
0
pub(crate) fn expm1_poly_dd_tiny(z: DoubleDouble) -> DoubleDouble {
503
    // Polynomial generated in Sollya
504
    // d = [-2^-7,2^-7];
505
    // f = expm1(x);
506
    // w = 1;
507
    // pf = fpminimax(f, [|1,2,3,4,5,6,7,8,9|], [|1, 1, 107...|], d, absolute, floating);
508
    // See ./notes/compound_expm1_tiny.sollya
509
    const Q: [(u64, u64); 9] = [
510
        (0x0000000000000000, 0x3ff0000000000000),
511
        (0x0000000000000000, 0x3fe0000000000000),
512
        (0x3c6555564150ff16, 0x3fc5555555555555),
513
        (0x3c4586275c26f8a5, 0x3fa5555555555555),
514
        (0xbc19e6193ac658a6, 0x3f81111111111111),
515
        (0xbbf025e72dc21051, 0x3f56c16c16c1500a),
516
        (0x3bc2d641a7b7b9b8, 0x3f2a01a01a07dc46),
517
        (0xbb42cc8aaeeb3d00, 0x3efa01a29fef3e6f),
518
        (0x3b52b1589125ce82, 0x3ec71db6af553255),
519
    ];
520
0
    let z = DoubleDouble::from_exact_add(z.hi, z.lo);
521
0
    let mut d = DoubleDouble::quick_mul_add(
522
0
        z,
523
0
        DoubleDouble::from_bit_pair(Q[8]),
524
0
        DoubleDouble::from_bit_pair(Q[7]),
525
    );
526
0
    d = DoubleDouble::quick_mul_add(z, d, DoubleDouble::from_bit_pair(Q[6]));
527
0
    d = DoubleDouble::quick_mul_add(z, d, DoubleDouble::from_bit_pair(Q[5]));
528
0
    d = DoubleDouble::quick_mul_add(z, d, DoubleDouble::from_bit_pair(Q[4]));
529
0
    d = DoubleDouble::quick_mul_add(z, d, DoubleDouble::from_bit_pair(Q[3]));
530
0
    d = DoubleDouble::quick_mul_add(z, d, DoubleDouble::from_bit_pair(Q[2]));
531
0
    d = DoubleDouble::quick_mul_add_f64(z, d, f64::from_bits(0x3fe0000000000000));
532
0
    d = DoubleDouble::quick_mul_add_f64(z, d, f64::from_bits(0x3ff0000000000000));
533
0
    DoubleDouble::quick_mult(d, z)
534
0
}
535
536
#[inline]
537
0
pub(crate) fn pow_expm1_1(r: DoubleDouble, s: f64) -> DoubleDouble {
538
    const RHO0: f64 = f64::from_bits(0xc0874910ee4e8a27);
539
    // #define RHO1 -0x1.577453f1799a6p+9
540
    /* We increase the initial value of RHO1 to avoid spurious underflow in
541
    the result value el. However, it is not possible to obtain a lower
542
    bound on |el| from the input value rh, thus this modified value of RHO1
543
    is obtained experimentally. */
544
    const RHO1: f64 = f64::from_bits(0xc08483b8cca421af);
545
    const RHO2: f64 = f64::from_bits(0x40862e42e709a95b);
546
    const RHO3: f64 = f64::from_bits(0x40862e4316ea5df9);
547
548
    // use !(rh <= RHO2) instead of rh < RHO2 to catch rh = NaN too
549
    #[allow(clippy::neg_cmp_op_on_partial_ord)]
550
0
    if !(r.hi <= RHO2) {
551
0
        return if r.hi > RHO3 {
552
            /* If rh > RHO3, we are sure there is overflow,
553
            For s=1 we return eh = el = DBL_MAX, which yields
554
            res_min = res_max = +Inf for rounding up or to nearest,
555
            and res_min = res_max = DBL_MAX for rounding down or toward zero,
556
            which will yield the correct rounding.
557
            For s=-1 we return eh = el = -DBL_MAX, which similarly gives
558
            res_min = res_max = -Inf or res_min = res_max = -DBL_MAX,
559
            which is the correct rounding. */
560
0
            DoubleDouble::new(
561
0
                f64::from_bits(0x7fefffffffffffff) * s,
562
0
                f64::from_bits(0x7fefffffffffffff) * s,
563
            )
564
        } else {
565
            /* If RHO2 < rh <= RHO3, we are in the intermediate region
566
            where there might be overflow or not, thus we set eh = el = NaN,
567
            which will set res_min = res_max = NaN, the comparison
568
            res_min == res_max will fail: we defer to the 2nd phase. */
569
0
            DoubleDouble::new(f64::NAN, f64::NAN)
570
        };
571
0
    }
572
573
0
    if r.hi < RHO1 {
574
0
        if r.hi < RHO0 {
575
            /* For s=1, we have eh=el=+0 except for rounding up,
576
               thus res_min=+0 or -0, res_max=+0 in the main code,
577
               the rounding test succeeds, and we return res_max which is the
578
               expected result in the underflow case.
579
               For s=1 and rounding up, we have eh=+0, el=2^-1074,
580
               thus res_min = res_max = 2^-1074, which is the expected result too.
581
               For s=-1, we have eh=el=-0 except for rounding down,
582
               thus res_min=-0 or +0, res_max=-0 in the main code,
583
               the rounding test succeeds, and we return res_max which is the
584
               expected result in the underflow case.
585
               For s=-1 and rounding down, we have eh=-0, el=-2^-1074,
586
               thus res_min = res_max = -2^-1074, which is the expected result too.
587
            */
588
0
            return DoubleDouble::full_add_f64(
589
0
                DoubleDouble::new(f64::from_bits(0x0000000000000001) * (0.5 * s), 0.0 * s),
590
                -1.0,
591
            );
592
        } else {
593
            /* RHO0 <= rh < RHO1 or s < 0: we return -1 */
594
0
            return DoubleDouble::new(0., -1.);
595
        };
596
0
    }
597
598
0
    let ax = r.hi.to_bits() & 0x7fffffffffffffffu64;
599
600
    const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
601
    const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
602
603
0
    if ax <= 0x3f80000000000000 {
604
        // |x| < 2^-7
605
0
        if ax < 0x3970000000000000 {
606
            // |x| < 2^-104
607
0
            return r;
608
0
        }
609
0
        let d = expm1_poly_dd_tiny(r);
610
0
        return d;
611
0
    }
612
613
    const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
614
615
0
    let k = (r.hi * INVLOG2).cpu_round_ties_even();
616
617
0
    let z = DoubleDouble::mul_f64_add(DoubleDouble::new(LOG2L, LOG2H), -k, r);
618
619
0
    let bk = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */
620
0
    let mk = (bk >> 12) + 0x3ff;
621
0
    let i2 = (bk >> 6) & 0x3f;
622
0
    let i1 = bk & 0x3f;
623
624
0
    let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i2 as usize]);
625
0
    let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
626
0
    let tbh = DoubleDouble::quick_mult(t1, t0);
627
0
    let mut de = tbh;
628
    // exp(k)=2^k*exp(r) + (2^k - 1)
629
0
    let q = expm1_poly_fast(z);
630
0
    de = DoubleDouble::quick_mult(de, q);
631
0
    de = DoubleDouble::add(tbh, de);
632
633
0
    let ie = mk - 0x3ff;
634
635
0
    let off: f64 = f64::from_bits((2048i64 + 1023i64).wrapping_sub(ie).wrapping_shl(52) as u64);
636
637
    let e: f64;
638
0
    if ie < 53 {
639
0
        let fhz = DoubleDouble::from_exact_add(off, de.hi);
640
0
        de.hi = fhz.hi;
641
0
        e = fhz.lo;
642
0
    } else if ie < 104 {
643
0
        let fhz = DoubleDouble::from_exact_add(de.hi, off);
644
0
        de.hi = fhz.hi;
645
0
        e = fhz.lo;
646
0
    } else {
647
0
        e = 0.;
648
0
    }
649
0
    de.lo += e;
650
0
    de.hi = ldexp(de.to_f64(), ie as i32);
651
0
    de.lo = 0.;
652
0
    de
653
0
}
654
655
0
fn exp_dyadic_poly(x: DyadicFloat128) -> DyadicFloat128 {
656
    const Q_2: [DyadicFloat128; 8] = [
657
        DyadicFloat128 {
658
            sign: DyadicSign::Pos,
659
            exponent: -128,
660
            mantissa: 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffd0_u128,
661
        },
662
        DyadicFloat128 {
663
            sign: DyadicSign::Pos,
664
            exponent: -127,
665
            mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0088_u128,
666
        },
667
        DyadicFloat128 {
668
            sign: DyadicSign::Pos,
669
            exponent: -128,
670
            mantissa: 0x8000_0000_0000_0000_0000_000c_06f3_cd29_u128,
671
        },
672
        DyadicFloat128 {
673
            sign: DyadicSign::Pos,
674
            exponent: -130,
675
            mantissa: 0xaaaa_aaaa_aaaa_aaaa_aaaa_aa6a_1e07_76ae_u128,
676
        },
677
        DyadicFloat128 {
678
            sign: DyadicSign::Pos,
679
            exponent: -132,
680
            mantissa: 0xaaaa_aaaa_aaaa_aaa3_0000_0000_0000_0000_u128,
681
        },
682
        DyadicFloat128 {
683
            sign: DyadicSign::Pos,
684
            exponent: -134,
685
            mantissa: 0x8888_8888_8888_8897_0000_0000_0000_0000_u128,
686
        },
687
        DyadicFloat128 {
688
            sign: DyadicSign::Pos,
689
            exponent: -137,
690
            mantissa: 0xb60b_60b9_3214_6a54_0000_0000_0000_0000_u128,
691
        },
692
        DyadicFloat128 {
693
            sign: DyadicSign::Pos,
694
            exponent: -140,
695
            mantissa: 0xd00d_00cd_9841_6862_0000_0000_0000_0000_u128,
696
        },
697
    ];
698
0
    let mut p = Q_2[7];
699
0
    for i in (0..7).rev() {
700
0
        p = x * p + Q_2[i];
701
0
    }
702
0
    p
703
0
}
704
705
/* put in r an approximation of exp(x), for |x| < 744.45,
706
with relative error < 2^-121.70 */
707
#[inline]
708
0
pub(crate) fn exp_dyadic(x: DyadicFloat128) -> DyadicFloat128 {
709
0
    let ex = x.exponent + 127;
710
0
    if ex >= 10
711
    // underflow or overflow
712
    {
713
        return DyadicFloat128 {
714
0
            sign: DyadicSign::Pos,
715
0
            exponent: if x.sign == DyadicSign::Neg {
716
0
                -1076
717
            } else {
718
0
                1025
719
            },
720
0
            mantissa: x.mantissa,
721
        };
722
0
    }
723
724
    const LOG2_INV: DyadicFloat128 = DyadicFloat128 {
725
        sign: DyadicSign::Pos,
726
        exponent: -115,
727
        mantissa: 0xb8aa_3b29_5c17_f0bc_0000_0000_0000_0000_u128,
728
    };
729
730
    const LOG2: DyadicFloat128 = DyadicFloat128 {
731
        sign: DyadicSign::Pos,
732
        exponent: -128,
733
        mantissa: 0xb172_17f7_d1cf_79ab_c9e3_b398_03f2_f6af_u128,
734
    };
735
736
0
    let mut bk = x * LOG2_INV;
737
0
    let k = bk.trunc_to_i64(); /* k = trunc(K) [rounded towards zero, exact] */
738
    /* The rounding error of mul_dint_int64() is bounded by 6 ulps, thus since
739
    |K| <= 4399162*log(2) < 3049267, the error on K is bounded by 2^-103.41.
740
    This error is divided by 2^12 below, thus yields < 2^-115.41. */
741
0
    bk = LOG2.mul_int64(k);
742
0
    bk.exponent -= 12;
743
0
    bk.sign = bk.sign.negate();
744
0
    let y = x + bk;
745
746
0
    let bm = k >> 12;
747
0
    let i2 = (k >> 6) & 0x3f;
748
0
    let i1 = k & 0x3f;
749
0
    let mut r = exp_dyadic_poly(y);
750
0
    r = EXP_T1_2_DYADIC[i2 as usize] * r;
751
0
    r = EXP_T2_2_DYADIC[i1 as usize] * r;
752
0
    r.exponent += bm as i16; /* exact */
753
0
    r
754
0
}
755
756
#[cfg(test)]
757
mod tests {
758
    use super::*;
759
    use crate::f_expm1;
760
    #[test]
761
    fn test_log() {
762
        let k = DyadicFloat128::new_from_f64(2.5);
763
        assert_eq!(exp_dyadic(k).fast_as_f64(), 12.182493960703473);
764
    }
765
766
    #[test]
767
    fn test_exp() {
768
        let k = pow_expm1_1(DoubleDouble::new(0., 2.543543543543), 1.);
769
        println!("{}", k.to_f64());
770
        println!("{}", f_expm1(2.543543543543));
771
    }
772
}