Coverage Report

Created: 2026-03-10 07:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.28/src/pow.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::bits::get_exponent_f64;
30
use crate::common::{f_fmla, is_integer, is_odd_integer};
31
use crate::double_double::DoubleDouble;
32
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
33
use crate::exponents::exp;
34
use crate::logs::log_dyadic;
35
use crate::pow_exec::{exp_dyadic, pow_exp_1, pow_log_1};
36
use crate::triple_double::TripleDouble;
37
use crate::{f_exp2, f_exp10, log};
38
39
#[cold]
40
0
fn pow_exp10_fallback(x: f64) -> f64 {
41
0
    f_exp10(x)
42
0
}
43
44
#[cold]
45
0
fn pow_exp2_fallback(x: f64) -> f64 {
46
0
    f_exp2(x)
47
0
}
48
49
#[cold]
50
#[inline(never)]
51
0
fn f_powi(x: f64, y: f64) -> f64 {
52
0
    let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
53
54
0
    let mut s = DoubleDouble::new(0., x);
55
56
    // exponentiation by squaring: O(log(y)) complexity
57
0
    let mut acc = if iter_count % 2 != 0 {
58
0
        s
59
    } else {
60
0
        DoubleDouble::new(0., 1.)
61
    };
62
63
0
    while {
64
0
        iter_count >>= 1;
65
0
        iter_count
66
0
    } != 0
67
    {
68
0
        s = DoubleDouble::mult(s, s);
69
0
        if iter_count % 2 != 0 {
70
0
            acc = DoubleDouble::mult(acc, s);
71
0
        }
72
    }
73
74
0
    let dz = if y.is_sign_negative() {
75
0
        acc.recip()
76
    } else {
77
0
        acc
78
    };
79
0
    let ub = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), -dz.hi, dz.lo); // 2^-59
80
0
    let lb = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), dz.hi, dz.lo); // 2^-59
81
0
    if ub == lb {
82
0
        return dz.to_f64();
83
0
    }
84
0
    f_powi_hard(x, y)
85
0
}
86
87
#[cold]
88
#[inline(never)]
89
0
fn f_powi_hard(x: f64, y: f64) -> f64 {
90
0
    let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
91
92
0
    let mut s = TripleDouble::new(0., 0., x);
93
94
    // exponentiation by squaring: O(log(y)) complexity
95
0
    let mut acc = if iter_count % 2 != 0 {
96
0
        s
97
    } else {
98
0
        TripleDouble::new(0., 0., 1.)
99
    };
100
101
0
    while {
102
0
        iter_count >>= 1;
103
0
        iter_count
104
0
    } != 0
105
    {
106
0
        s = TripleDouble::quick_mult(s, s);
107
0
        if iter_count % 2 != 0 {
108
0
            acc = TripleDouble::quick_mult(acc, s);
109
0
        }
110
    }
111
112
0
    let dz = if y.is_sign_negative() {
113
0
        acc.recip()
114
    } else {
115
0
        acc
116
    };
117
0
    dz.to_f64()
118
0
}
119
120
/// Power function
121
///
122
/// max found ULP 0.5
123
0
pub fn f_pow(x: f64, y: f64) -> f64 {
124
0
    let mut y = y;
125
0
    let x_sign = x.is_sign_negative();
126
0
    let y_sign = y.is_sign_negative();
127
128
0
    let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff;
129
0
    let y_abs = y.to_bits() & 0x7fff_ffff_ffff_ffff;
130
131
    const MANTISSA_MASK: u64 = (1u64 << 52) - 1;
132
133
0
    let y_mant = y.to_bits() & MANTISSA_MASK;
134
0
    let x_u = x.to_bits();
135
0
    let x_a = x_abs;
136
0
    let y_a = y_abs;
137
138
0
    let mut x = x;
139
140
    // If x or y is signaling NaN
141
0
    if x.is_nan() || y.is_nan() {
142
0
        return f64::NAN;
143
0
    }
144
145
0
    let mut s = 1.0;
146
147
    // The double precision number that is closest to 1 is (1 - 2^-53), which has
148
    //   log2(1 - 2^-53) ~ -1.715...p-53.
149
    // So if |y| > |1075 / log2(1 - 2^-53)|, and x is finite:
150
    //   |y * log2(x)| = 0 or > 1075.
151
    // Hence, x^y will either overflow or underflow if x is not zero.
152
0
    if y_mant == 0
153
0
        || y_a > 0x43d7_4910_d52d_3052
154
0
        || x_u == 1f64.to_bits()
155
0
        || x_u >= f64::INFINITY.to_bits()
156
0
        || x_u < f64::MIN.to_bits()
157
    {
158
        // Exceptional exponents.
159
0
        if y == 0.0 {
160
0
            return 1.0;
161
0
        }
162
163
0
        match y_a {
164
            0x3fe0_0000_0000_0000 => {
165
0
                if x == 0.0 || x_u == f64::NEG_INFINITY.to_bits() {
166
                    // pow(-0, 1/2) = +0
167
                    // pow(-inf, 1/2) = +inf
168
0
                    return if y_sign { 1.0 / (x * x) } else { x * x };
169
0
                }
170
0
                return if y_sign {
171
0
                    if x.is_infinite() {
172
0
                        return if x.is_sign_positive() { 0. } else { f64::NAN };
173
0
                    }
174
                    #[cfg(any(
175
                        all(
176
                            any(target_arch = "x86", target_arch = "x86_64"),
177
                            target_feature = "fma"
178
                        ),
179
                        target_arch = "aarch64"
180
                    ))]
181
                    {
182
                        let r = x.sqrt() / x;
183
                        let rx = r * x;
184
                        let drx = f_fmla(r, x, -rx);
185
                        let h = f_fmla(r, rx, -1.0) + r * drx;
186
                        let dr = (r * 0.5) * h;
187
                        r - dr
188
                    }
189
                    #[cfg(not(any(
190
                        all(
191
                            any(target_arch = "x86", target_arch = "x86_64"),
192
                            target_feature = "fma"
193
                        ),
194
                        target_arch = "aarch64"
195
                    )))]
196
                    {
197
0
                        let r = x.sqrt() / x;
198
0
                        let d2x = DoubleDouble::from_exact_mult(r, x);
199
0
                        let DoubleDouble { hi: h, lo: pr } = DoubleDouble::quick_mult_f64(d2x, r);
200
0
                        let DoubleDouble { hi: p, lo: q } =
201
0
                            DoubleDouble::from_full_exact_add(-1.0, h);
202
0
                        let h = DoubleDouble::from_exact_add(p, pr + q);
203
0
                        let dr = DoubleDouble::quick_mult_f64(h, r * 0.5);
204
0
                        r - dr.hi - dr.lo
205
                    }
206
                } else {
207
0
                    x.sqrt()
208
                };
209
            }
210
            0x3ff0_0000_0000_0000 => {
211
0
                return if y_sign { 1.0 / x } else { x };
212
            }
213
            0x4000_0000_0000_0000 => {
214
0
                let x_e = get_exponent_f64(x);
215
0
                if x_e > 511 {
216
0
                    return if y_sign { 0. } else { f64::INFINITY };
217
0
                }
218
                // not enough precision to make 0.5 ULP for subnormals
219
0
                let a_xe = x_e & 0x7fff_ffff_ffff_ffff;
220
0
                if a_xe < 70 {
221
0
                    let x_sqr = DoubleDouble::from_exact_mult(x, x);
222
0
                    return if y_sign {
223
0
                        let recip = x_sqr.recip();
224
0
                        recip.to_f64()
225
                    } else {
226
0
                        x_sqr.to_f64()
227
                    };
228
0
                }
229
            }
230
0
            _ => {}
231
        }
232
233
        // |y| > |1075 / log2(1 - 2^-53)|.
234
0
        if y_a > 0x43d7_4910_d52d_3052 {
235
0
            if y_a >= 0x7ff0_0000_0000_0000 {
236
                // y is inf or nan
237
0
                if y_mant != 0 {
238
                    // y is NaN
239
                    // pow(1, NaN) = 1
240
                    // pow(x, NaN) = NaN
241
0
                    return if x_u == 1f64.to_bits() { 1.0 } else { y };
242
0
                }
243
244
                // Now y is +-Inf
245
0
                if f64::from_bits(x_abs).is_nan() {
246
                    // pow(NaN, +-Inf) = NaN
247
0
                    return x;
248
0
                }
249
250
0
                if x_a == 0x3ff0_0000_0000_0000 {
251
                    // pow(+-1, +-Inf) = 1.0
252
0
                    return 1.0;
253
0
                }
254
255
0
                if x == 0.0 && y_sign {
256
                    // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
257
0
                    return f64::INFINITY;
258
0
                }
259
                // pow (|x| < 1, -inf) = +inf
260
                // pow (|x| < 1, +inf) = 0.0
261
                // pow (|x| > 1, -inf) = 0.0
262
                // pow (|x| > 1, +inf) = +inf
263
0
                return if (x_a < 1f64.to_bits()) == y_sign {
264
0
                    f64::INFINITY
265
                } else {
266
0
                    0.0
267
                };
268
0
            }
269
            // x^y will overflow / underflow in double precision.  Set y to a
270
            // large enough exponent but not too large, so that the computations
271
            // won't overflow in double precision.
272
0
            y = if y_sign {
273
0
                f64::from_bits(0xc630000000000000)
274
            } else {
275
0
                f64::from_bits(0x4630000000000000)
276
            };
277
0
        }
278
279
        // y is finite and non-zero.
280
281
0
        if x_u == 1f64.to_bits() {
282
            // pow(1, y) = 1
283
0
            return 1.0;
284
0
        }
285
286
0
        if x == 0.0 {
287
0
            let out_is_neg = x_sign && is_odd_integer(y);
288
0
            if y_sign {
289
                // pow(0, negative number) = inf
290
0
                return if out_is_neg {
291
0
                    f64::NEG_INFINITY
292
                } else {
293
0
                    f64::INFINITY
294
                };
295
0
            }
296
            // pow(0, positive number) = 0
297
0
            return if out_is_neg { -0.0 } else { 0.0 };
298
0
        } else if x == 2.0 {
299
0
            return pow_exp2_fallback(y);
300
0
        } else if x == 10.0 {
301
0
            return pow_exp10_fallback(y);
302
0
        }
303
304
0
        if x_a == f64::INFINITY.to_bits() {
305
0
            let out_is_neg = x_sign && is_odd_integer(y);
306
0
            if y_sign {
307
0
                return if out_is_neg { -0.0 } else { 0.0 };
308
0
            }
309
0
            return if out_is_neg {
310
0
                f64::NEG_INFINITY
311
            } else {
312
0
                f64::INFINITY
313
            };
314
0
        }
315
316
0
        if x_a > f64::INFINITY.to_bits() {
317
            // x is NaN.
318
            // pow (aNaN, 0) is already taken care above.
319
0
            return x;
320
0
        }
321
322
        // x is finite and negative, and y is a finite integer.
323
324
0
        let is_y_integer = is_integer(y);
325
        // y is integer and in [-102;102] and |x|<2^10
326
327
        // this is correct only for positive exponent number without FMA,
328
        // otherwise reciprocal may overflow.
329
        #[cfg(any(
330
            all(
331
                any(target_arch = "x86", target_arch = "x86_64"),
332
                target_feature = "fma"
333
            ),
334
            target_arch = "aarch64"
335
        ))]
336
        if is_y_integer
337
            && y_a <= 0x4059800000000000u64
338
            && x_a <= 0x4090000000000000u64
339
            && x_a > 0x3cc0_0000_0000_0000
340
        {
341
            return f_powi(x, y);
342
        }
343
        #[cfg(not(any(
344
            all(
345
                any(target_arch = "x86", target_arch = "x86_64"),
346
                target_feature = "fma"
347
            ),
348
            target_arch = "aarch64"
349
        )))]
350
0
        if is_y_integer
351
0
            && y_a <= 0x4059800000000000u64
352
0
            && x_a <= 0x4090000000000000u64
353
0
            && x_a > 0x3cc0_0000_0000_0000
354
0
            && y.is_sign_positive()
355
        {
356
0
            return f_powi(x, y);
357
0
        }
358
359
0
        if x_sign {
360
0
            if is_y_integer {
361
0
                x = -x;
362
0
                if is_odd_integer(y) {
363
                    // sign = -1.0;
364
                    static CS: [f64; 2] = [1.0, -1.0];
365
366
                    // set sign to 1 for y even, to -1 for y odd
367
0
                    let y_parity = if (y.abs()) >= f64::from_bits(0x4340000000000000) {
368
0
                        0usize
369
                    } else {
370
0
                        (y as i64 & 0x1) as usize
371
                    };
372
0
                    s = CS[y_parity];
373
0
                }
374
            } else {
375
                // pow( negative, non-integer ) = NaN
376
0
                return f64::NAN;
377
            }
378
0
        }
379
380
        // y is finite and non-zero.
381
382
0
        if x_u == 1f64.to_bits() {
383
            // pow(1, y) = 1
384
0
            return 1.0;
385
0
        }
386
387
0
        if x == 0.0 {
388
0
            let out_is_neg = x_sign && is_odd_integer(y);
389
0
            if y_sign {
390
                // pow(0, negative number) = inf
391
0
                return if out_is_neg {
392
0
                    f64::NEG_INFINITY
393
                } else {
394
0
                    f64::INFINITY
395
                };
396
0
            }
397
            // pow(0, positive number) = 0
398
0
            return if out_is_neg { -0.0 } else { 0.0 };
399
0
        }
400
401
0
        if x_a == f64::INFINITY.to_bits() {
402
0
            let out_is_neg = x_sign && is_odd_integer(y);
403
0
            if y_sign {
404
0
                return if out_is_neg { -0.0 } else { 0.0 };
405
0
            }
406
0
            return if out_is_neg {
407
0
                f64::NEG_INFINITY
408
            } else {
409
0
                f64::INFINITY
410
            };
411
0
        }
412
413
0
        if x_a > f64::INFINITY.to_bits() {
414
            // x is NaN.
415
            // pow (aNaN, 0) is already taken care above.
416
0
            return x;
417
0
        }
418
0
    }
419
420
    // approximate log(x)
421
0
    let (mut l, cancel) = pow_log_1(x);
422
423
    /* We should avoid a spurious underflow/overflow in y*log(x).
424
    Underflow: for x<>1, the smallest absolute value of log(x) is obtained
425
    for x=1-2^-53, with |log(x)| ~ 2^-53. Thus to avoid a spurious underflow
426
    we require |y| >= 2^-969.
427
    Overflow: the largest absolute value of log(x) is obtained for x=2^-1074,
428
    with |log(x)| < 745. Thus to avoid a spurious overflow we require
429
    |y| < 2^1014. */
430
0
    let ey = ((y.to_bits() >> 52) & 0x7ff) as i32;
431
0
    if ey < 0x36 || ey >= 0x7f5 {
432
0
        l.lo = f64::NAN;
433
0
        l.hi = f64::NAN;
434
0
    }
435
436
0
    let r = DoubleDouble::quick_mult_f64(l, y);
437
0
    let res = pow_exp_1(r, s);
438
    static ERR: [u64; 2] = [0x3bf2700000000000, 0x3c55700000000000];
439
0
    let res_min = res.hi + f_fmla(f64::from_bits(ERR[cancel as usize]), -res.hi, res.lo);
440
0
    let res_max = res.hi + f_fmla(f64::from_bits(ERR[cancel as usize]), res.hi, res.lo);
441
0
    if res_min == res_max {
442
0
        return res_max;
443
0
    }
444
0
    pow_rational128(x, y, s)
445
0
}
446
447
#[cold]
448
0
fn pow_rational128(x: f64, y: f64, s: f64) -> f64 {
449
0
    let f_y = DyadicFloat128::new_from_f64(y);
450
451
0
    let r = log_dyadic(x) * f_y;
452
0
    let mut result = exp_dyadic(r);
453
454
    // 2^R.ex <= R < 2^(R.ex+1)
455
456
    // /* case R < 2^-1075: underflow case */
457
    // if result.exponent < -1075 {
458
    //     return 0.5 * (s * f64::from_bits(0x0000000000000001));
459
    // }
460
461
0
    result.sign = if s == -1.0 {
462
0
        DyadicSign::Neg
463
    } else {
464
0
        DyadicSign::Pos
465
    };
466
467
0
    result.fast_as_f64()
468
0
}
469
470
/// Pow for given value for const context.
471
/// This is simplified version just to make a good approximation on const context.
472
0
pub const fn pow(d: f64, n: f64) -> f64 {
473
0
    let value = d.abs();
474
475
0
    let r = n * log(value);
476
0
    let c = exp(r);
477
0
    if n == 0. {
478
0
        return 1.;
479
0
    }
480
0
    if d < 0.0 {
481
0
        let y = n as i32;
482
0
        if y % 2 == 0 { c } else { -c }
483
    } else {
484
0
        c
485
    }
486
0
}
487
488
#[cfg(test)]
489
mod tests {
490
    use super::*;
491
492
    #[test]
493
    fn powf_test() {
494
        assert!(
495
            (pow(2f64, 3f64) - 8f64).abs() < 1e-9,
496
            "Invalid result {}",
497
            pow(2f64, 3f64)
498
        );
499
        assert!(
500
            (pow(0.5f64, 2f64) - 0.25f64).abs() < 1e-9,
501
            "Invalid result {}",
502
            pow(0.5f64, 2f64)
503
        );
504
    }
505
506
    #[test]
507
    fn f_pow_test() {
508
        assert_eq!(f_pow(
509
             0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000135264499699371,
510
            -0.5,
511
        ), 27189929701044785000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
512
        assert_eq!(f_pow(
513
            0.000000000000000000000000000000000000000000000000000021798599361155193,
514
            -2.,
515
        ),2104470396771397700000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
516
        assert_eq!(
517
            f_pow(-25192281723900620000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
518
            -2.),
519
            0.
520
        );
521
        assert_eq!(
522
            f_pow(0.000000000000000000000000000000000000000000000000000021799650661798696,
523
                  -2.),
524
            2104267423084451500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.
525
        );
526
        assert_eq!(
527
            f_pow(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000014916691520383755,
528
            -2.),
529
            44942267764413600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.
530
        );
531
        assert_eq!(
532
            f_pow(
533
                0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000135264499699371,
534
                -0.5,
535
            ),
536
            27189929701044785000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.
537
        );
538
        assert_eq!(f_pow(1., f64::INFINITY), 1.);
539
        assert_eq!(f_pow(2., f64::INFINITY), f64::INFINITY);
540
        assert_eq!(f_pow(f64::INFINITY, -0.5), 0.);
541
        assert!(
542
            (f_pow(2f64, 3f64) - 8f64).abs() < 1e-9,
543
            "Invalid result {}",
544
            f_pow(2f64, 3f64)
545
        );
546
        assert!(
547
            (f_pow(0.5f64, 2f64) - 0.25f64).abs() < 1e-9,
548
            "Invalid result {}",
549
            f_pow(0.5f64, 2f64)
550
        );
551
        assert_eq!(f_pow(2.1f64, 2.7f64), 7.412967494768546);
552
        assert_eq!(f_pow(27., 1. / 3.), 3.);
553
    }
554
555
    #[test]
556
    fn powi_test() {
557
        assert_eq!(f_pow(f64::from_bits(0x3cc0_0000_0000_0000), 102.), 0.0);
558
        assert_eq!(f_pow(3., 3.), 27.);
559
        assert_eq!(f_pow(3., -3.), 1. / 27.);
560
        assert_eq!(f_pow(3., 102.), 4.638397686588102e48);
561
        assert_eq!(f_pow(0.000000000000011074474670636028, -22.), 10589880229528372000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
562
    }
563
}