Coverage Report

Created: 2025-10-12 08:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/double_double.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::bits::get_exponent_f64;
30
#[allow(unused_imports)]
31
use crate::common::*;
32
use std::ops::{Mul, Neg};
33
// https://hal.science/hal-01351529v3/document
34
35
#[derive(Copy, Clone, Default, Debug)]
36
pub(crate) struct DoubleDouble {
37
    pub(crate) lo: f64,
38
    pub(crate) hi: f64,
39
}
40
41
impl Neg for DoubleDouble {
42
    type Output = Self;
43
44
    #[inline]
45
0
    fn neg(self) -> Self::Output {
46
0
        Self {
47
0
            hi: -self.hi,
48
0
            lo: -self.lo,
49
0
        }
50
0
    }
51
}
52
53
impl DoubleDouble {
54
    #[inline]
55
0
    pub(crate) const fn from_bit_pair(pair: (u64, u64)) -> Self {
56
0
        Self {
57
0
            lo: f64::from_bits(pair.0),
58
0
            hi: f64::from_bits(pair.1),
59
0
        }
60
0
    }
61
62
    #[inline]
63
0
    pub(crate) const fn new(lo: f64, hi: f64) -> Self {
64
0
        DoubleDouble { lo, hi }
65
0
    }
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::new
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::new
66
67
    // Non FMA helper
68
    #[allow(dead_code)]
69
    #[inline]
70
0
    pub(crate) const fn split(a: f64) -> DoubleDouble {
71
        // CN = 2^N.
72
        const CN: f64 = (1 << 27) as f64;
73
        const C: f64 = CN + 1.0;
74
0
        let t1 = C * a;
75
0
        let t2 = a - t1;
76
0
        let r_hi = t1 + t2;
77
0
        let r_lo = a - r_hi;
78
0
        DoubleDouble::new(r_lo, r_hi)
79
0
    }
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::split
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::split
80
81
    // Non FMA helper
82
    #[allow(dead_code)]
83
    #[inline]
84
0
    fn from_exact_mult_impl_non_fma(asz: DoubleDouble, a: f64, b: f64) -> Self {
85
0
        let bs = DoubleDouble::split(b);
86
87
0
        let r_hi = a * b;
88
0
        let t1 = asz.hi * bs.hi - r_hi;
89
0
        let t2 = asz.hi * bs.lo + t1;
90
0
        let t3 = asz.lo * bs.hi + t2;
91
0
        let r_lo = asz.lo * bs.lo + t3;
92
0
        DoubleDouble::new(r_lo, r_hi)
93
0
    }
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::from_exact_mult_impl_non_fma
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::from_exact_mult_impl_non_fma
94
95
    // valid only for |a| > b
96
    #[inline]
97
0
    pub(crate) const fn from_exact_add(a: f64, b: f64) -> DoubleDouble {
98
0
        let r_hi = a + b;
99
0
        let t = r_hi - a;
100
0
        let r_lo = b - t;
101
0
        DoubleDouble::new(r_lo, r_hi)
102
0
    }
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::from_exact_add
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::from_exact_add
103
104
    // valid only for |a| > b
105
    #[inline]
106
0
    pub(crate) const fn from_exact_sub(a: f64, b: f64) -> DoubleDouble {
107
0
        let r_hi = a - b;
108
0
        let t = a - r_hi;
109
0
        let r_lo = t - b;
110
0
        DoubleDouble::new(r_lo, r_hi)
111
0
    }
112
113
    #[inline]
114
0
    pub(crate) const fn from_full_exact_add(a: f64, b: f64) -> DoubleDouble {
115
0
        let r_hi = a + b;
116
0
        let t1 = r_hi - a;
117
0
        let t2 = r_hi - t1;
118
0
        let t3 = b - t1;
119
0
        let t4 = a - t2;
120
0
        let r_lo = t3 + t4;
121
0
        DoubleDouble::new(r_lo, r_hi)
122
0
    }
123
124
    #[allow(unused)]
125
    #[inline]
126
0
    pub(crate) fn dd_f64_mul_add(a: f64, b: f64, c: f64) -> f64 {
127
0
        let ddx2 = DoubleDouble::from_exact_mult(a, b);
128
0
        let zv = DoubleDouble::full_add_f64(ddx2, c);
129
0
        zv.to_f64()
130
0
    }
131
132
    #[inline]
133
0
    pub(crate) const fn from_full_exact_sub(a: f64, b: f64) -> Self {
134
0
        let r_hi = a - b;
135
0
        let t1 = r_hi - a;
136
0
        let t2 = r_hi - t1;
137
0
        let t3 = -b - t1;
138
0
        let t4 = a - t2;
139
0
        let r_lo = t3 + t4;
140
0
        DoubleDouble::new(r_lo, r_hi)
141
0
    }
142
143
    #[inline]
144
0
    pub(crate) fn add(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
145
0
        let s = a.hi + b.hi;
146
0
        let d = s - a.hi;
147
0
        let l = ((b.hi - d) + (a.hi + (d - s))) + (a.lo + b.lo);
148
0
        DoubleDouble::new(l, s)
149
0
    }
150
151
    #[inline]
152
0
    pub(crate) fn quick_dd_add(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
153
0
        let DoubleDouble { hi: sh, lo: sl } = DoubleDouble::from_full_exact_add(a.hi, b.hi);
154
0
        let v = a.lo + b.lo;
155
0
        let w = sl + v;
156
0
        DoubleDouble::from_exact_add(sh, w)
157
0
    }
158
159
    #[inline]
160
0
    pub(crate) fn quick_dd_sub(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
161
0
        let DoubleDouble { hi: sh, lo: sl } = DoubleDouble::from_full_exact_sub(a.hi, b.hi);
162
0
        let v = a.lo - b.lo;
163
0
        let w = sl + v;
164
0
        DoubleDouble::from_exact_add(sh, w)
165
0
    }
166
167
    #[inline]
168
0
    pub(crate) fn full_dd_add(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
169
0
        let DoubleDouble { hi: sh, lo: sl } = DoubleDouble::from_full_exact_add(a.hi, b.hi);
170
0
        let DoubleDouble { hi: th, lo: tl } = DoubleDouble::from_full_exact_add(a.lo, b.lo);
171
0
        let c = sl + th;
172
0
        let v = DoubleDouble::from_exact_add(sh, c);
173
0
        let w = tl + v.lo;
174
0
        DoubleDouble::from_exact_add(v.hi, w)
175
0
    }
176
177
    #[inline]
178
0
    pub(crate) fn full_dd_sub(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
179
0
        DoubleDouble::full_dd_add(a, -b)
180
0
    }
181
182
    #[inline]
183
0
    pub(crate) fn sub(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
184
0
        let s = a.hi - b.hi;
185
0
        let d = s - a.hi;
186
0
        let l = ((-b.hi - d) + (a.hi + (d - s))) + (a.lo - b.lo);
187
0
        DoubleDouble::new(l, s)
188
0
    }
189
190
    /// DoubleDouble-style square root for a double-double number
191
    #[inline]
192
0
    pub(crate) fn sqrt(self) -> DoubleDouble {
193
0
        let a = self.hi + self.lo;
194
195
0
        if a == 0.0 {
196
0
            return DoubleDouble { hi: 0.0, lo: 0.0 };
197
0
        }
198
0
        if a < 0.0 || a.is_nan() {
199
0
            return DoubleDouble {
200
0
                hi: f64::NAN,
201
0
                lo: 0.0,
202
0
            };
203
0
        }
204
0
        if a.is_infinite() {
205
0
            return DoubleDouble {
206
0
                hi: f64::INFINITY,
207
0
                lo: 0.0,
208
0
            };
209
0
        }
210
211
0
        let x = a.sqrt();
212
213
0
        let x2 = DoubleDouble::from_exact_mult(x, x);
214
215
        // Residual = self - x²
216
0
        let mut r = self.hi - x2.hi;
217
0
        r += self.lo;
218
0
        r -= x2.lo;
219
220
0
        let dx = r / (2.0 * x);
221
0
        let hi = x + dx;
222
0
        let lo = (x - hi) + dx;
223
224
0
        DoubleDouble { hi, lo }
225
0
    }
226
227
    /// DoubleDouble-style square root for a double-double number
228
    #[inline]
229
0
    pub(crate) fn fast_sqrt(self) -> DoubleDouble {
230
0
        let a = self.hi + self.lo;
231
0
        let x = a.sqrt();
232
233
0
        let x2 = DoubleDouble::from_exact_mult(x, x);
234
235
        // Residual = self - x²
236
0
        let mut r = self.hi - x2.hi;
237
0
        r += self.lo;
238
0
        r -= x2.lo;
239
240
0
        let dx = r / (2.0 * x);
241
0
        let hi = x + dx;
242
0
        let lo = (x - hi) + dx;
243
244
0
        DoubleDouble { hi, lo }
245
0
    }
246
247
    /// `a*b+c`
248
    ///
249
    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
250
    #[inline]
251
0
    pub(crate) fn mul_add_f64(a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
252
0
        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
253
0
        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
254
0
        DoubleDouble::new(r + q, p)
255
0
    }
256
257
    /// `a*b+c`
258
    ///
259
    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
260
    #[inline]
261
0
    pub(crate) fn quick_mul_add_f64(a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
262
0
        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
263
0
        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_exact_add(c, h);
264
0
        DoubleDouble::new(r + q, p)
265
0
    }
266
267
    /// `a*b+c`
268
    ///
269
    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
270
    #[inline]
271
0
    pub(crate) fn mul_f64_add_f64(a: DoubleDouble, b: f64, c: f64) -> DoubleDouble {
272
0
        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
273
0
        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
274
0
        DoubleDouble::new(r + q, p)
275
0
    }
276
277
    // /// Accurate reciprocal: 1 / self
278
    // #[inline]
279
    // pub(crate) fn recip_raphson(self) -> DoubleDouble {
280
    //     let y0 = DoubleDouble::recip(self);
281
    //     let z = DoubleDouble::mul_add_f64(-self, y0, 1.0);
282
    //     DoubleDouble::mul_add(y0, z, y0)
283
    // }
284
285
    /// Accurate reciprocal: 1 / self
286
    #[inline]
287
0
    pub(crate) fn recip(self) -> DoubleDouble {
288
        #[cfg(any(
289
            all(
290
                any(target_arch = "x86", target_arch = "x86_64"),
291
                target_feature = "fma"
292
            ),
293
            target_arch = "aarch64"
294
        ))]
295
        {
296
            let y = 1. / self.hi;
297
            let e1 = f_fmla(-self.hi, y, 1.0);
298
            let e2 = f_fmla(-self.lo, y, e1);
299
            let e = y * e2;
300
            DoubleDouble::new(e, y)
301
        }
302
        #[cfg(not(any(
303
            all(
304
                any(target_arch = "x86", target_arch = "x86_64"),
305
                target_feature = "fma"
306
            ),
307
            target_arch = "aarch64"
308
        )))]
309
        {
310
0
            let y = 1.0 / self.hi;
311
312
0
            let DoubleDouble { hi: p1, lo: err1 } = DoubleDouble::from_exact_mult(self.hi, y);
313
0
            let e1 = (1.0 - p1) - err1;
314
0
            let DoubleDouble { hi: p2, lo: err2 } = DoubleDouble::from_exact_mult(self.lo, y);
315
0
            let e2 = (e1 - p2) - err2;
316
0
            let e = y * e2;
317
318
0
            DoubleDouble::new(e, y)
319
        }
320
0
    }
321
322
    #[inline]
323
0
    pub(crate) fn from_recip(b: f64) -> Self {
324
        #[cfg(any(
325
            all(
326
                any(target_arch = "x86", target_arch = "x86_64"),
327
                target_feature = "fma"
328
            ),
329
            target_arch = "aarch64"
330
        ))]
331
        {
332
            let x_hi = 1.0 / b;
333
            let err = f_fmla(-x_hi, b, 1.0);
334
            let x_lo = err / b;
335
            Self::new(x_lo, x_hi)
336
        }
337
        #[cfg(not(any(
338
            all(
339
                any(target_arch = "x86", target_arch = "x86_64"),
340
                target_feature = "fma"
341
            ),
342
            target_arch = "aarch64"
343
        )))]
344
        {
345
0
            let x_hi = 1.0 / b;
346
0
            let prod = Self::from_exact_mult(x_hi, b);
347
0
            let err = (1.0 - prod.hi) - prod.lo;
348
0
            let x_lo = err / b;
349
0
            Self::new(x_lo, x_hi)
350
        }
351
0
    }
352
353
    #[inline]
354
0
    pub(crate) fn from_quick_recip(b: f64) -> Self {
355
        #[cfg(any(
356
            all(
357
                any(target_arch = "x86", target_arch = "x86_64"),
358
                target_feature = "fma"
359
            ),
360
            target_arch = "aarch64"
361
        ))]
362
        {
363
            let h = 1.0 / b;
364
            let hl = f_fmla(h, -b, 1.) * h;
365
            DoubleDouble::new(hl, h)
366
        }
367
        #[cfg(not(any(
368
            all(
369
                any(target_arch = "x86", target_arch = "x86_64"),
370
                target_feature = "fma"
371
            ),
372
            target_arch = "aarch64"
373
        )))]
374
        {
375
0
            let h = 1.0 / b;
376
0
            let pr = DoubleDouble::from_exact_mult(h, b);
377
0
            let err = (1.0 - pr.hi) - pr.lo;
378
0
            let hl = err * h;
379
0
            DoubleDouble::new(hl, h)
380
        }
381
0
    }
382
383
    #[inline]
384
0
    pub(crate) fn from_exact_div(a: f64, b: f64) -> Self {
385
        #[cfg(any(
386
            all(
387
                any(target_arch = "x86", target_arch = "x86_64"),
388
                target_feature = "fma"
389
            ),
390
            target_arch = "aarch64"
391
        ))]
392
        {
393
            let q_hi = a / b;
394
            let r = f_fmla(-q_hi, b, a);
395
            let q_lo = r / b;
396
            Self::new(q_lo, q_hi)
397
        }
398
399
        #[cfg(not(any(
400
            all(
401
                any(target_arch = "x86", target_arch = "x86_64"),
402
                target_feature = "fma"
403
            ),
404
            target_arch = "aarch64"
405
        )))]
406
        {
407
0
            let q_hi = a / b;
408
409
0
            let p = DoubleDouble::from_exact_mult(q_hi, b);
410
0
            let r = DoubleDouble::from_exact_sub(a, p.hi);
411
0
            let r = r.hi + (r.lo - p.lo);
412
0
            let q_lo = r / b;
413
414
0
            Self::new(q_lo, q_hi)
415
        }
416
0
    }
417
418
    // Resistant to overflow without FMA
419
    #[inline]
420
0
    pub(crate) fn from_exact_safe_div(a: f64, b: f64) -> Self {
421
0
        let q_hi = a / b;
422
0
        let r = f64::mul_add(-q_hi, b, a);
423
0
        let q_lo = r / b;
424
0
        Self::new(q_lo, q_hi)
425
0
    }
426
427
    #[inline]
428
0
    pub(crate) fn from_sqrt(x: f64) -> Self {
429
        #[cfg(any(
430
            all(
431
                any(target_arch = "x86", target_arch = "x86_64"),
432
                target_feature = "fma"
433
            ),
434
            target_arch = "aarch64"
435
        ))]
436
        {
437
            let h = x.sqrt();
438
            /* h = sqrt(x) * (1 + e1) with |e1| < 2^-52
439
            thus h^2 = x * (1 + e2) with |e2| < 2^-50.999 */
440
            let e = -f_fmla(h, h, -x); // exact
441
442
            /* e = x - h^2 */
443
            let l = e / (h + h);
444
            DoubleDouble::new(l, h)
445
        }
446
        #[cfg(not(any(
447
            all(
448
                any(target_arch = "x86", target_arch = "x86_64"),
449
                target_feature = "fma"
450
            ),
451
            target_arch = "aarch64"
452
        )))]
453
        {
454
0
            let h = x.sqrt();
455
0
            let prod_hh = DoubleDouble::from_exact_mult(h, h);
456
0
            let e = (x - prod_hh.hi) - prod_hh.lo; // exact
457
458
            /* e = x - h^2 */
459
0
            let l = e / (h + h);
460
0
            DoubleDouble::new(l, h)
461
        }
462
0
    }
463
464
    /// Safe to overflow underflow division using mandatory FMA.
465
    #[inline]
466
    #[allow(dead_code)]
467
0
    pub(crate) fn div_safe_dd_f64(a: DoubleDouble, b: f64) -> Self {
468
0
        let q1 = a.hi / b;
469
0
        let r = f64::mul_add(-q1, b, a.hi);
470
0
        let r = r + a.lo;
471
0
        let q2 = r / b;
472
473
0
        DoubleDouble::new(q2, q1)
474
0
    }
475
476
    #[inline]
477
0
    pub(crate) fn div_dd_f64(a: DoubleDouble, b: f64) -> Self {
478
        #[cfg(any(
479
            all(
480
                any(target_arch = "x86", target_arch = "x86_64"),
481
                target_feature = "fma"
482
            ),
483
            target_arch = "aarch64"
484
        ))]
485
        {
486
            let q1 = a.hi / b;
487
            let r = f_fmla(-q1, b, a.hi);
488
            let r = r + a.lo;
489
            let q2 = r / b;
490
491
            DoubleDouble::new(q2, q1)
492
        }
493
        #[cfg(not(any(
494
            all(
495
                any(target_arch = "x86", target_arch = "x86_64"),
496
                target_feature = "fma"
497
            ),
498
            target_arch = "aarch64"
499
        )))]
500
        {
501
0
            let th = a.hi / b;
502
0
            let prod = DoubleDouble::from_exact_mult(th, b);
503
0
            let beta_h = a.hi - prod.hi;
504
0
            let beta_l = beta_h - prod.lo;
505
0
            let beta = beta_l + a.lo;
506
0
            let tl = beta / b;
507
0
            DoubleDouble::new(tl, th)
508
        }
509
0
    }
510
511
    // /// Dekker division with one refinement step
512
    // #[inline]
513
    // pub(crate) fn div_dd_f64_newton_raphson(a: DoubleDouble, b: f64) -> Self {
514
    //     // Initial estimate q = a / b
515
    //     let q = DoubleDouble::div_dd_f64(a, b);
516
    //
517
    //     // One Newton-Raphson refinement step:
518
    //     // e = a - q * b
519
    //     let qb = DoubleDouble::quick_mult_f64(q, b);
520
    //     let e = DoubleDouble::sub(a, qb);
521
    //     let e_div_b = DoubleDouble::div_dd_f64(e, b);
522
    //
523
    //     DoubleDouble::add(q, e_div_b)
524
    // }
525
526
    // /// Dekker division with two Newton-Raphson refinement steps
527
    // #[inline]
528
    // pub(crate) fn div_dd_f64_newton_raphson_2(a: Dekker, b: f64) -> Self {
529
    //     // First estimate: q = a / b (one round of Dekker division)
530
    //     let q1 = Dekker::div_dd_f64(a, b);
531
    //
532
    //     // First refinement: q2 = q1 + (a - q1 * b) / b
533
    //     let qb1 = Dekker::quick_mult_f64(q1, b);
534
    //     let e1 = Dekker::sub(a, qb1);
535
    //     let dq1 = Dekker::div_dd_f64(e1, b);
536
    //     let q2 = Dekker::add(q1, dq1);
537
    //
538
    //     // Second refinement: q3 = q2 + (a - q2 * b) / b
539
    //     let qb2 = Dekker::quick_mult_f64(q2, b);
540
    //     let e2 = Dekker::sub(a, qb2);
541
    //     let dq2 = Dekker::div_dd_f64(e2, b);
542
    //
543
    //     Dekker::add(q2, dq2)
544
    // }
545
546
    // #[inline]
547
    // pub(crate) fn neg(self) -> Self {
548
    //     Self {
549
    //         lo: -self.lo, hi: -self.hi,
550
    //     }
551
    // }
552
553
    #[inline]
554
0
    pub(crate) fn from_f64_div_dd(a: f64, b: DoubleDouble) -> Self {
555
0
        let q1 = a / b.hi;
556
557
0
        let prod = DoubleDouble::from_exact_mult(q1, b.hi);
558
0
        let prod_lo = f_fmla(q1, b.lo, prod.lo);
559
0
        let rem = f_fmla(-1.0, prod.hi, a) - prod_lo;
560
561
0
        let q2 = rem / b.hi;
562
563
0
        DoubleDouble::new(q2, q1)
564
0
    }
565
566
    // #[inline]
567
    // pub(crate) fn mla_f64(a: Dekker, b: f64, c: f64) -> Self {
568
    //     let q = Dekker::mult_f64(a, b);
569
    //     Dekker::add_f64(q, c)
570
    // }
571
    //
572
    // #[inline]
573
    // pub(crate) fn mla_dd_f64(a: Dekker, b: Dekker, c: f64) -> Self {
574
    //     let q = Dekker::quick_mult(a, b);
575
    //     Dekker::add_f64(q, c)
576
    // }
577
578
    #[inline]
579
0
    pub(crate) fn div(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
580
0
        let q = 1.0 / b.hi;
581
0
        let r_hi = a.hi * q;
582
        #[cfg(any(
583
            all(
584
                any(target_arch = "x86", target_arch = "x86_64"),
585
                target_feature = "fma"
586
            ),
587
            target_arch = "aarch64"
588
        ))]
589
        {
590
            let e_hi = f_fmla(b.hi, -r_hi, a.hi);
591
            let e_lo = f_fmla(b.lo, -r_hi, a.lo);
592
            let r_lo = q * (e_hi + e_lo);
593
            DoubleDouble::new(r_lo, r_hi)
594
        }
595
596
        #[cfg(not(any(
597
            all(
598
                any(target_arch = "x86", target_arch = "x86_64"),
599
                target_feature = "fma"
600
            ),
601
            target_arch = "aarch64"
602
        )))]
603
        {
604
0
            let b_hi_r_hi = DoubleDouble::from_exact_mult(b.hi, -r_hi);
605
0
            let b_lo_r_hi = DoubleDouble::from_exact_mult(b.lo, -r_hi);
606
0
            let e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo;
607
0
            let e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo;
608
0
            let r_lo = q * (e_hi + e_lo);
609
0
            DoubleDouble::new(r_lo, r_hi)
610
        }
611
0
    }
612
613
    #[inline]
614
0
    pub(crate) fn from_exact_mult(a: f64, b: f64) -> Self {
615
        #[cfg(any(
616
            all(
617
                any(target_arch = "x86", target_arch = "x86_64"),
618
                target_feature = "fma"
619
            ),
620
            target_arch = "aarch64"
621
        ))]
622
        {
623
            let r_hi = a * b;
624
            let r_lo = f_fmla(a, b, -r_hi);
625
            DoubleDouble::new(r_lo, r_hi)
626
        }
627
        #[cfg(not(any(
628
            all(
629
                any(target_arch = "x86", target_arch = "x86_64"),
630
                target_feature = "fma"
631
            ),
632
            target_arch = "aarch64"
633
        )))]
634
        {
635
0
            let splat = DoubleDouble::split(a);
636
0
            DoubleDouble::from_exact_mult_impl_non_fma(splat, a, b)
637
        }
638
0
    }
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::from_exact_mult
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::from_exact_mult
639
640
    // #[inline]
641
    // pub(crate) fn add_f64(&self, other: f64) -> DoubleDouble {
642
    //     let r = DoubleDouble::from_exact_add(self.hi, other);
643
    //     Dekker::from_exact_add(r.hi, r.lo + self.lo)
644
    // }
645
646
    // #[inline]
647
    // pub(crate) fn to_triple(self) -> TripleDouble {
648
    //     TripleDouble::new(0., self.lo, self.hi)
649
    // }
650
651
    /// Computes `a * b + c`
652
    /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
653
    ///
654
    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
655
    #[inline]
656
0
    pub(crate) fn mul_f64_add(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
657
0
        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
658
0
        let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
659
0
        DoubleDouble::new(r + q, p)
660
0
    }
661
662
    /// Computes `a * b + c`
663
    /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
664
    ///
665
    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
666
    ///
667
    /// *Correctness*
668
    /// |c.hi| > |a.hi * b.hi|
669
    #[inline]
670
0
    pub(crate) fn quick_mul_f64_add(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
671
0
        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
672
0
        let DoubleDouble { hi: p, lo: q } = DoubleDouble::add_f64(c, h);
673
0
        DoubleDouble::new(r + q, p)
674
0
    }
675
676
    /// Computes `a * b + c`
677
    ///
678
    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
679
    ///
680
    /// *Correctness*
681
    /// |c.hi| > |a.hi * b.hi|
682
    #[inline]
683
0
    pub(crate) fn quick_mul_f64_add_f64(a: DoubleDouble, b: f64, c: f64) -> Self {
684
0
        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
685
0
        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_exact_add(c, h);
686
0
        DoubleDouble::new(r + q, p)
687
0
    }
688
689
    // #[inline]
690
    // pub(crate) fn mul_f64_add_full(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
691
    //     /*
692
    //         double _t1, _t2, _t3, _t4, _t5, _t6, _t7, _t8;   \
693
    //                                                  \
694
    //         Mul12(&_t1,&_t2,(a),(bh));                       \
695
    //         Add12(_t3,_t4,(ch),_t1);                         \
696
    //         _t5 = (bl) * (a);                                \
697
    //         _t6 = (cl) + _t2;                                \
698
    //         _t7 = _t5 + _t6;                                 \
699
    //         _t8 = _t7 + _t4;                                 \
700
    //         Add12((*(resh)),(*(resl)),_t3,_t8);              \
701
    //     */
702
    //     let DoubleDouble { hi: t1, lo: t2 } = DoubleDouble::from_exact_mult(a.hi, b);
703
    //     let DoubleDouble { hi: t3, lo: t4 } = DoubleDouble::from_full_exact_add(c.hi, t1);
704
    //     let t5 = a.lo * b;
705
    //     let t6 = c.lo + t2;
706
    //     let t7 = t5 + t6;
707
    //     let t8 = t7 + t4;
708
    //     DoubleDouble::from_full_exact_add(t3, t8)
709
    // }
710
711
    /// Computes `a * b + c`
712
    /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
713
    ///
714
    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
715
    #[inline]
716
0
    pub(crate) fn f64_mul_f64_add(a: f64, b: f64, c: DoubleDouble) -> Self {
717
0
        let DoubleDouble { hi: h, lo: r } = DoubleDouble::from_exact_mult(a, b);
718
0
        let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
719
0
        DoubleDouble::new(r + q, p)
720
0
    }
721
722
    // /// Computes `a * b + c`
723
    // /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
724
    // ///
725
    // /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
726
    // #[inline]
727
    // pub(crate) fn single_mul_add(a: f64, b: f64, c: f64) -> Self {
728
    //     let DoubleDouble { hi: h, lo: r } = DoubleDouble::from_exact_mult(a, b);
729
    //     let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
730
    //     DoubleDouble::new(r + q, p)
731
    // }
732
733
    // /// Computes `a * b + c` safe to overflow without FMA
734
    // /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
735
    // ///
736
    // /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
737
    // #[inline]
738
    // pub(crate) fn mul_f64_safe_add(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
739
    //     let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_safe_f64(a, b);
740
    //     let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
741
    //     DoubleDouble::new(r + q, p)
742
    // }
743
744
    /// `a*b+c`
745
    ///
746
    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
747
    #[inline]
748
0
    pub(crate) fn mul_add(a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> Self {
749
0
        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
750
0
        let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
751
0
        DoubleDouble::new(r + q, p)
752
0
    }
753
754
    /// `a*b+c`
755
    ///
756
    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
757
    ///
758
    /// *Correctness*
759
    /// |c.hi| > |a.hi * b.hi|
760
    #[inline]
761
0
    pub(crate) fn quick_mul_add(a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> Self {
762
0
        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
763
0
        let DoubleDouble { hi: p, lo: q } = DoubleDouble::add_f64(c, h);
764
0
        DoubleDouble::new(r + q, p)
765
0
    }
766
767
    #[inline]
768
0
    pub(crate) fn quick_mult(a: DoubleDouble, b: DoubleDouble) -> Self {
769
        #[cfg(any(
770
            all(
771
                any(target_arch = "x86", target_arch = "x86_64"),
772
                target_feature = "fma"
773
            ),
774
            target_arch = "aarch64"
775
        ))]
776
        {
777
            let mut r = DoubleDouble::from_exact_mult(a.hi, b.hi);
778
            let t1 = f_fmla(a.hi, b.lo, r.lo);
779
            let t2 = f_fmla(a.lo, b.hi, t1);
780
            r.lo = t2;
781
            r
782
        }
783
        #[cfg(not(any(
784
            all(
785
                any(target_arch = "x86", target_arch = "x86_64"),
786
                target_feature = "fma"
787
            ),
788
            target_arch = "aarch64"
789
        )))]
790
        {
791
0
            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b.hi);
792
0
            let tl1 = a.hi * b.lo;
793
0
            let tl2 = a.lo * b.hi;
794
0
            let cl2 = tl1 + tl2;
795
0
            let cl3 = cl1 + cl2;
796
0
            DoubleDouble::new(cl3, ch)
797
        }
798
0
    }
799
800
    #[inline]
801
0
    pub(crate) fn mult(a: DoubleDouble, b: DoubleDouble) -> Self {
802
        #[cfg(any(
803
            all(
804
                any(target_arch = "x86", target_arch = "x86_64"),
805
                target_feature = "fma"
806
            ),
807
            target_arch = "aarch64"
808
        ))]
809
        {
810
            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b.hi);
811
            let tl0 = a.lo * b.lo;
812
            let tl1 = f_fmla(a.hi, b.lo, tl0);
813
            let cl2 = f_fmla(a.lo, b.hi, tl1);
814
            let cl3 = cl1 + cl2;
815
            DoubleDouble::from_exact_add(ch, cl3)
816
        }
817
        #[cfg(not(any(
818
            all(
819
                any(target_arch = "x86", target_arch = "x86_64"),
820
                target_feature = "fma"
821
            ),
822
            target_arch = "aarch64"
823
        )))]
824
        {
825
0
            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b.hi);
826
0
            let tl1 = a.hi * b.lo;
827
0
            let tl2 = a.lo * b.hi;
828
0
            let cl2 = tl1 + tl2;
829
0
            let cl3 = cl1 + cl2;
830
0
            DoubleDouble::from_exact_add(ch, cl3)
831
        }
832
0
    }
833
834
    #[inline]
835
0
    pub(crate) fn mult_f64(a: DoubleDouble, b: f64) -> Self {
836
        #[cfg(any(
837
            all(
838
                any(target_arch = "x86", target_arch = "x86_64"),
839
                target_feature = "fma"
840
            ),
841
            target_arch = "aarch64"
842
        ))]
843
        {
844
            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b);
845
            let cl3 = f_fmla(a.lo, b, cl1);
846
            DoubleDouble::from_exact_add(ch, cl3)
847
        }
848
        #[cfg(not(any(
849
            all(
850
                any(target_arch = "x86", target_arch = "x86_64"),
851
                target_feature = "fma"
852
            ),
853
            target_arch = "aarch64"
854
        )))]
855
        {
856
0
            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b);
857
0
            let cl2 = a.lo * b;
858
0
            let t = DoubleDouble::from_exact_add(ch, cl2);
859
0
            let tl2 = t.lo + cl1;
860
0
            DoubleDouble::from_exact_add(t.hi, tl2)
861
        }
862
0
    }
863
864
    #[inline]
865
0
    pub(crate) fn quick_f64_mult(a: f64, b: DoubleDouble) -> DoubleDouble {
866
0
        DoubleDouble::quick_mult_f64(b, a)
867
0
    }
868
869
    #[inline]
870
0
    pub(crate) fn quick_mult_f64(a: DoubleDouble, b: f64) -> Self {
871
        #[cfg(any(
872
            all(
873
                any(target_arch = "x86", target_arch = "x86_64"),
874
                target_feature = "fma"
875
            ),
876
            target_arch = "aarch64"
877
        ))]
878
        {
879
            let h = b * a.hi;
880
            let l = f_fmla(b, a.lo, f_fmla(b, a.hi, -h));
881
            Self { lo: l, hi: h }
882
        }
883
        #[cfg(not(any(
884
            all(
885
                any(target_arch = "x86", target_arch = "x86_64"),
886
                target_feature = "fma"
887
            ),
888
            target_arch = "aarch64"
889
        )))]
890
        {
891
0
            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b);
892
0
            let cl2 = a.lo * b;
893
0
            let cl3 = cl1 + cl2;
894
0
            DoubleDouble::new(cl3, ch)
895
        }
896
0
    }
897
898
    // /// Double-double multiplication safe to overflow without FMA
899
    // #[inline]
900
    // pub(crate) fn quick_mult_safe_f64(a: DoubleDouble, b: f64) -> Self {
901
    //     let h = b * a.hi;
902
    //     let l = f64::mul_add(b, a.lo, f64::mul_add(b, a.hi, -h));
903
    //     Self { lo: l, hi: h }
904
    // }
905
906
    /// Valid only |a.hi| > |b|
907
    #[inline]
908
0
    pub(crate) fn add_f64(a: DoubleDouble, b: f64) -> Self {
909
0
        let t = DoubleDouble::from_exact_add(a.hi, b);
910
0
        let l = a.lo + t.lo;
911
0
        Self { lo: l, hi: t.hi }
912
0
    }
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::add_f64
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::add_f64
913
914
    #[inline]
915
0
    pub(crate) fn full_add_f64(a: DoubleDouble, b: f64) -> Self {
916
0
        let t = DoubleDouble::from_full_exact_add(a.hi, b);
917
0
        let l = a.lo + t.lo;
918
0
        Self { lo: l, hi: t.hi }
919
0
    }
920
921
    /// Valid only |b| > |a.hi|
922
    #[inline]
923
0
    pub(crate) fn f64_add(b: f64, a: DoubleDouble) -> Self {
924
0
        let t = DoubleDouble::from_exact_add(b, a.hi);
925
0
        let l = a.lo + t.lo;
926
0
        Self { lo: l, hi: t.hi }
927
0
    }
928
929
    #[inline]
930
0
    pub(crate) const fn to_f64(self) -> f64 {
931
0
        self.lo + self.hi
932
0
    }
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::to_f64
Unexecuted instantiation: <pxfm::double_double::DoubleDouble>::to_f64
933
934
    // #[inline]
935
    // pub(crate) fn from_rsqrt(x: f64) -> DoubleDouble {
936
    //     let r = DoubleDouble::div_dd_f64(DoubleDouble::from_sqrt(x), x);
937
    //     let rx = DoubleDouble::quick_mult_safe_f64(r, x);
938
    //     let drx = DoubleDouble::mul_f64_safe_add(r, x, -rx);
939
    //     let h = DoubleDouble::mul_add(r, drx, DoubleDouble::mul_add_f64(r, rx, -1.0));
940
    //     let dr = DoubleDouble::quick_mult(DoubleDouble::quick_mult_f64(r, 0.5), h);
941
    //     DoubleDouble::add(r, dr)
942
    // }
943
944
    #[inline]
945
0
    pub(crate) fn from_rsqrt_fast(x: f64) -> DoubleDouble {
946
0
        let sqrt_x = DoubleDouble::from_sqrt(x);
947
0
        sqrt_x.recip()
948
0
    }
949
}
950
951
impl Mul<DoubleDouble> for DoubleDouble {
952
    type Output = Self;
953
954
    #[inline]
955
0
    fn mul(self, rhs: DoubleDouble) -> Self::Output {
956
0
        DoubleDouble::quick_mult(self, rhs)
957
0
    }
958
}
959
960
/// check if number is valid for Exact mult
961
#[allow(dead_code)]
962
#[inline]
963
0
pub(crate) fn two_product_compatible(x: f64) -> bool {
964
0
    let exp = get_exponent_f64(x);
965
0
    !(exp >= 970 || exp <= -970)
966
0
}
967
968
#[cfg(test)]
969
mod tests {
970
    use super::*;
971
    #[test]
972
    fn test_f64_mult() {
973
        let d1 = 1.1231;
974
        let d2 = DoubleDouble::new(1e-22, 3.2341);
975
        let p = DoubleDouble::quick_f64_mult(d1, d2);
976
        assert_eq!(p.hi, 3.6322177100000004);
977
        assert_eq!(p.lo, -1.971941841373783e-16);
978
    }
979
980
    #[test]
981
    fn test_mult_64() {
982
        let d1 = 1.1231;
983
        let d2 = DoubleDouble::new(1e-22, 3.2341);
984
        let p = DoubleDouble::mult_f64(d2, d1);
985
        assert_eq!(p.hi, 3.6322177100000004);
986
        assert_eq!(p.lo, -1.971941841373783e-16);
987
    }
988
989
    #[test]
990
    fn recip_test() {
991
        let d1 = 1.54352432142;
992
        let recip = DoubleDouble::new(0., d1).recip();
993
        assert_eq!(recip.hi, d1.recip());
994
        assert_ne!(recip.lo, 0.);
995
    }
996
997
    #[test]
998
    fn from_recip_test() {
999
        let d1 = 1.54352432142;
1000
        let recip = DoubleDouble::from_recip(d1);
1001
        assert_eq!(recip.hi, d1.recip());
1002
        assert_ne!(recip.lo, 0.);
1003
    }
1004
1005
    #[test]
1006
    fn from_quick_recip_test() {
1007
        let d1 = 1.54352432142;
1008
        let recip = DoubleDouble::from_quick_recip(d1);
1009
        assert_eq!(recip.hi, d1.recip());
1010
        assert_ne!(recip.lo, 0.);
1011
    }
1012
}