Coverage Report

Created: 2026-01-17 06:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/pow.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::bits::get_exponent_f64;
30
use crate::common::{f_fmla, is_integer, is_odd_integer};
31
use crate::double_double::DoubleDouble;
32
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
33
use crate::exponents::exp;
34
use crate::logs::log_dyadic;
35
use crate::pow_exec::{exp_dyadic, pow_exp_1, pow_log_1};
36
use crate::triple_double::TripleDouble;
37
use crate::{f_exp2, f_exp10, log};
38
39
#[cold]
40
0
fn pow_exp10_fallback(x: f64) -> f64 {
41
0
    f_exp10(x)
42
0
}
43
44
#[cold]
45
0
fn pow_exp2_fallback(x: f64) -> f64 {
46
0
    f_exp2(x)
47
0
}
48
49
#[cold]
50
#[inline(never)]
51
0
fn f_powi(x: f64, y: f64) -> f64 {
52
0
    let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
53
54
0
    let mut s = DoubleDouble::new(0., x);
55
56
    // exponentiation by squaring: O(log(y)) complexity
57
0
    let mut acc = if iter_count % 2 != 0 {
58
0
        s
59
    } else {
60
0
        DoubleDouble::new(0., 1.)
61
    };
62
63
0
    while {
64
0
        iter_count >>= 1;
65
0
        iter_count
66
0
    } != 0
67
    {
68
0
        s = DoubleDouble::mult(s, s);
69
0
        if iter_count % 2 != 0 {
70
0
            acc = DoubleDouble::mult(acc, s);
71
0
        }
72
    }
73
74
0
    let dz = if y.is_sign_negative() {
75
0
        acc.recip()
76
    } else {
77
0
        acc
78
    };
79
0
    let ub = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), -dz.hi, dz.lo); // 2^-59
80
0
    let lb = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), dz.hi, dz.lo); // 2^-59
81
0
    if ub == lb {
82
0
        return dz.to_f64();
83
0
    }
84
0
    f_powi_hard(x, y)
85
0
}
86
87
#[cold]
88
#[inline(never)]
89
0
fn f_powi_hard(x: f64, y: f64) -> f64 {
90
0
    let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
91
92
0
    let mut s = TripleDouble::new(0., 0., x);
93
94
    // exponentiation by squaring: O(log(y)) complexity
95
0
    let mut acc = if iter_count % 2 != 0 {
96
0
        s
97
    } else {
98
0
        TripleDouble::new(0., 0., 1.)
99
    };
100
101
0
    while {
102
0
        iter_count >>= 1;
103
0
        iter_count
104
0
    } != 0
105
    {
106
0
        s = TripleDouble::quick_mult(s, s);
107
0
        if iter_count % 2 != 0 {
108
0
            acc = TripleDouble::quick_mult(acc, s);
109
0
        }
110
    }
111
112
0
    let dz = if y.is_sign_negative() {
113
0
        acc.recip()
114
    } else {
115
0
        acc
116
    };
117
0
    dz.to_f64()
118
0
}
119
120
/// Power function
121
///
122
/// max found ULP 0.5
123
0
pub fn f_pow(x: f64, y: f64) -> f64 {
124
0
    let mut y = y;
125
0
    let x_sign = x.is_sign_negative();
126
0
    let y_sign = y.is_sign_negative();
127
128
0
    let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff;
129
0
    let y_abs = y.to_bits() & 0x7fff_ffff_ffff_ffff;
130
131
    const MANTISSA_MASK: u64 = (1u64 << 52) - 1;
132
133
0
    let y_mant = y.to_bits() & MANTISSA_MASK;
134
0
    let x_u = x.to_bits();
135
0
    let x_a = x_abs;
136
0
    let y_a = y_abs;
137
138
0
    let mut x = x;
139
140
    // If x or y is signaling NaN
141
0
    if x.is_nan() || y.is_nan() {
142
0
        return f64::NAN;
143
0
    }
144
145
0
    let mut s = 1.0;
146
147
    // The double precision number that is closest to 1 is (1 - 2^-53), which has
148
    //   log2(1 - 2^-53) ~ -1.715...p-53.
149
    // So if |y| > |1075 / log2(1 - 2^-53)|, and x is finite:
150
    //   |y * log2(x)| = 0 or > 1075.
151
    // Hence, x^y will either overflow or underflow if x is not zero.
152
0
    if y_mant == 0
153
0
        || y_a > 0x43d7_4910_d52d_3052
154
0
        || x_u == 1f64.to_bits()
155
0
        || x_u >= f64::INFINITY.to_bits()
156
0
        || x_u < f64::MIN.to_bits()
157
    {
158
        // Exceptional exponents.
159
0
        if y == 0.0 {
160
0
            return 1.0;
161
0
        }
162
163
0
        match y_a {
164
            0x3fe0_0000_0000_0000 => {
165
0
                if x == 0.0 || x_u == f64::NEG_INFINITY.to_bits() {
166
                    // pow(-0, 1/2) = +0
167
                    // pow(-inf, 1/2) = +inf
168
0
                    return if y_sign { 1.0 / (x * x) } else { x * x };
169
0
                }
170
0
                return if y_sign {
171
0
                    if x.is_infinite() {
172
0
                        return if x.is_sign_positive() { 0. } else { f64::NAN };
173
0
                    }
174
                    #[cfg(any(
175
                        all(
176
                            any(target_arch = "x86", target_arch = "x86_64"),
177
                            target_feature = "fma"
178
                        ),
179
                        target_arch = "aarch64"
180
                    ))]
181
                    {
182
                        let r = x.sqrt() / x;
183
                        let rx = r * x;
184
                        let drx = f_fmla(r, x, -rx);
185
                        let h = f_fmla(r, rx, -1.0) + r * drx;
186
                        let dr = (r * 0.5) * h;
187
                        r - dr
188
                    }
189
                    #[cfg(not(any(
190
                        all(
191
                            any(target_arch = "x86", target_arch = "x86_64"),
192
                            target_feature = "fma"
193
                        ),
194
                        target_arch = "aarch64"
195
                    )))]
196
                    {
197
0
                        let r = x.sqrt() / x;
198
0
                        let d2x = DoubleDouble::from_exact_mult(r, x);
199
0
                        let DoubleDouble { hi: h, lo: pr } = DoubleDouble::quick_mult_f64(d2x, r);
200
0
                        let DoubleDouble { hi: p, lo: q } =
201
0
                            DoubleDouble::from_full_exact_add(-1.0, h);
202
0
                        let h = DoubleDouble::from_exact_add(p, pr + q);
203
0
                        let dr = DoubleDouble::quick_mult_f64(h, r * 0.5);
204
0
                        r - dr.hi - dr.lo
205
                    }
206
                } else {
207
0
                    x.sqrt()
208
                };
209
            }
210
            0x3ff0_0000_0000_0000 => {
211
0
                return if y_sign { 1.0 / x } else { x };
212
            }
213
            0x4000_0000_0000_0000 => {
214
0
                let x_e = get_exponent_f64(x);
215
0
                if x_e > 511 {
216
0
                    return if y_sign { 0. } else { f64::INFINITY };
217
0
                }
218
                // not enough precision to make 0.5 ULP for subnormals
219
0
                if x_e.abs() < 70 {
220
0
                    let x_sqr = DoubleDouble::from_exact_mult(x, x);
221
0
                    return if y_sign {
222
0
                        let recip = x_sqr.recip();
223
0
                        recip.to_f64()
224
                    } else {
225
0
                        x_sqr.to_f64()
226
                    };
227
0
                }
228
            }
229
0
            _ => {}
230
        }
231
232
        // |y| > |1075 / log2(1 - 2^-53)|.
233
0
        if y_a > 0x43d7_4910_d52d_3052 {
234
0
            if y_a >= 0x7ff0_0000_0000_0000 {
235
                // y is inf or nan
236
0
                if y_mant != 0 {
237
                    // y is NaN
238
                    // pow(1, NaN) = 1
239
                    // pow(x, NaN) = NaN
240
0
                    return if x_u == 1f64.to_bits() { 1.0 } else { y };
241
0
                }
242
243
                // Now y is +-Inf
244
0
                if f64::from_bits(x_abs).is_nan() {
245
                    // pow(NaN, +-Inf) = NaN
246
0
                    return x;
247
0
                }
248
249
0
                if x_a == 0x3ff0_0000_0000_0000 {
250
                    // pow(+-1, +-Inf) = 1.0
251
0
                    return 1.0;
252
0
                }
253
254
0
                if x == 0.0 && y_sign {
255
                    // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
256
0
                    return f64::INFINITY;
257
0
                }
258
                // pow (|x| < 1, -inf) = +inf
259
                // pow (|x| < 1, +inf) = 0.0
260
                // pow (|x| > 1, -inf) = 0.0
261
                // pow (|x| > 1, +inf) = +inf
262
0
                return if (x_a < 1f64.to_bits()) == y_sign {
263
0
                    f64::INFINITY
264
                } else {
265
0
                    0.0
266
                };
267
0
            }
268
            // x^y will overflow / underflow in double precision.  Set y to a
269
            // large enough exponent but not too large, so that the computations
270
            // won't overflow in double precision.
271
0
            y = if y_sign {
272
0
                f64::from_bits(0xc630000000000000)
273
            } else {
274
0
                f64::from_bits(0x4630000000000000)
275
            };
276
0
        }
277
278
        // y is finite and non-zero.
279
280
0
        if x_u == 1f64.to_bits() {
281
            // pow(1, y) = 1
282
0
            return 1.0;
283
0
        }
284
285
0
        if x == 0.0 {
286
0
            let out_is_neg = x_sign && is_odd_integer(y);
287
0
            if y_sign {
288
                // pow(0, negative number) = inf
289
0
                return if out_is_neg {
290
0
                    f64::NEG_INFINITY
291
                } else {
292
0
                    f64::INFINITY
293
                };
294
0
            }
295
            // pow(0, positive number) = 0
296
0
            return if out_is_neg { -0.0 } else { 0.0 };
297
0
        } else if x == 2.0 {
298
0
            return pow_exp2_fallback(y);
299
0
        } else if x == 10.0 {
300
0
            return pow_exp10_fallback(y);
301
0
        }
302
303
0
        if x_a == f64::INFINITY.to_bits() {
304
0
            let out_is_neg = x_sign && is_odd_integer(y);
305
0
            if y_sign {
306
0
                return if out_is_neg { -0.0 } else { 0.0 };
307
0
            }
308
0
            return if out_is_neg {
309
0
                f64::NEG_INFINITY
310
            } else {
311
0
                f64::INFINITY
312
            };
313
0
        }
314
315
0
        if x_a > f64::INFINITY.to_bits() {
316
            // x is NaN.
317
            // pow (aNaN, 0) is already taken care above.
318
0
            return x;
319
0
        }
320
321
        // x is finite and negative, and y is a finite integer.
322
323
0
        let is_y_integer = is_integer(y);
324
        // y is integer and in [-102;102] and |x|<2^10
325
326
        // this is correct only for positive exponent number without FMA,
327
        // otherwise reciprocal may overflow.
328
        #[cfg(any(
329
            all(
330
                any(target_arch = "x86", target_arch = "x86_64"),
331
                target_feature = "fma"
332
            ),
333
            target_arch = "aarch64"
334
        ))]
335
        if is_y_integer
336
            && y_a <= 0x4059800000000000u64
337
            && x_a <= 0x4090000000000000u64
338
            && x_a > 0x3cc0_0000_0000_0000
339
        {
340
            return f_powi(x, y);
341
        }
342
        #[cfg(not(any(
343
            all(
344
                any(target_arch = "x86", target_arch = "x86_64"),
345
                target_feature = "fma"
346
            ),
347
            target_arch = "aarch64"
348
        )))]
349
0
        if is_y_integer
350
0
            && y_a <= 0x4059800000000000u64
351
0
            && x_a <= 0x4090000000000000u64
352
0
            && x_a > 0x3cc0_0000_0000_0000
353
0
            && y.is_sign_positive()
354
        {
355
0
            return f_powi(x, y);
356
0
        }
357
358
0
        if x_sign {
359
0
            if is_y_integer {
360
0
                x = -x;
361
0
                if is_odd_integer(y) {
362
                    // sign = -1.0;
363
                    static CS: [f64; 2] = [1.0, -1.0];
364
365
                    // set sign to 1 for y even, to -1 for y odd
366
0
                    let y_parity = if (y.abs()) >= f64::from_bits(0x4340000000000000) {
367
0
                        0usize
368
                    } else {
369
0
                        (y as i64 & 0x1) as usize
370
                    };
371
0
                    s = CS[y_parity];
372
0
                }
373
            } else {
374
                // pow( negative, non-integer ) = NaN
375
0
                return f64::NAN;
376
            }
377
0
        }
378
379
        // y is finite and non-zero.
380
381
0
        if x_u == 1f64.to_bits() {
382
            // pow(1, y) = 1
383
0
            return 1.0;
384
0
        }
385
386
0
        if x == 0.0 {
387
0
            let out_is_neg = x_sign && is_odd_integer(y);
388
0
            if y_sign {
389
                // pow(0, negative number) = inf
390
0
                return if out_is_neg {
391
0
                    f64::NEG_INFINITY
392
                } else {
393
0
                    f64::INFINITY
394
                };
395
0
            }
396
            // pow(0, positive number) = 0
397
0
            return if out_is_neg { -0.0 } else { 0.0 };
398
0
        }
399
400
0
        if x_a == f64::INFINITY.to_bits() {
401
0
            let out_is_neg = x_sign && is_odd_integer(y);
402
0
            if y_sign {
403
0
                return if out_is_neg { -0.0 } else { 0.0 };
404
0
            }
405
0
            return if out_is_neg {
406
0
                f64::NEG_INFINITY
407
            } else {
408
0
                f64::INFINITY
409
            };
410
0
        }
411
412
0
        if x_a > f64::INFINITY.to_bits() {
413
            // x is NaN.
414
            // pow (aNaN, 0) is already taken care above.
415
0
            return x;
416
0
        }
417
0
    }
418
419
    // approximate log(x)
420
0
    let (mut l, cancel) = pow_log_1(x);
421
422
    /* We should avoid a spurious underflow/overflow in y*log(x).
423
    Underflow: for x<>1, the smallest absolute value of log(x) is obtained
424
    for x=1-2^-53, with |log(x)| ~ 2^-53. Thus to avoid a spurious underflow
425
    we require |y| >= 2^-969.
426
    Overflow: the largest absolute value of log(x) is obtained for x=2^-1074,
427
    with |log(x)| < 745. Thus to avoid a spurious overflow we require
428
    |y| < 2^1014. */
429
0
    let ey = ((y.to_bits() >> 52) & 0x7ff) as i32;
430
0
    if ey < 0x36 || ey >= 0x7f5 {
431
0
        l.lo = f64::NAN;
432
0
        l.hi = f64::NAN;
433
0
    }
434
435
0
    let r = DoubleDouble::quick_mult_f64(l, y);
436
0
    let res = pow_exp_1(r, s);
437
    static ERR: [u64; 2] = [0x3bf2700000000000, 0x3c55700000000000];
438
0
    let res_min = res.hi + f_fmla(f64::from_bits(ERR[cancel as usize]), -res.hi, res.lo);
439
0
    let res_max = res.hi + f_fmla(f64::from_bits(ERR[cancel as usize]), res.hi, res.lo);
440
0
    if res_min == res_max {
441
0
        return res_max;
442
0
    }
443
0
    pow_rational128(x, y, s)
444
0
}
445
446
#[cold]
447
0
fn pow_rational128(x: f64, y: f64, s: f64) -> f64 {
448
0
    let f_y = DyadicFloat128::new_from_f64(y);
449
450
0
    let r = log_dyadic(x) * f_y;
451
0
    let mut result = exp_dyadic(r);
452
453
    // 2^R.ex <= R < 2^(R.ex+1)
454
455
    // /* case R < 2^-1075: underflow case */
456
    // if result.exponent < -1075 {
457
    //     return 0.5 * (s * f64::from_bits(0x0000000000000001));
458
    // }
459
460
0
    result.sign = if s == -1.0 {
461
0
        DyadicSign::Neg
462
    } else {
463
0
        DyadicSign::Pos
464
    };
465
466
0
    result.fast_as_f64()
467
0
}
468
469
/// Pow for given value for const context.
470
/// This is simplified version just to make a good approximation on const context.
471
0
pub const fn pow(d: f64, n: f64) -> f64 {
472
0
    let value = d.abs();
473
474
0
    let r = n * log(value);
475
0
    let c = exp(r);
476
0
    if n == 0. {
477
0
        return 1.;
478
0
    }
479
0
    if d < 0.0 {
480
0
        let y = n as i32;
481
0
        if y % 2 == 0 { c } else { -c }
482
    } else {
483
0
        c
484
    }
485
0
}
486
487
#[cfg(test)]
488
mod tests {
489
    use super::*;
490
491
    #[test]
492
    fn powf_test() {
493
        assert!(
494
            (pow(2f64, 3f64) - 8f64).abs() < 1e-9,
495
            "Invalid result {}",
496
            pow(2f64, 3f64)
497
        );
498
        assert!(
499
            (pow(0.5f64, 2f64) - 0.25f64).abs() < 1e-9,
500
            "Invalid result {}",
501
            pow(0.5f64, 2f64)
502
        );
503
    }
504
505
    #[test]
506
    fn f_pow_test() {
507
        assert_eq!(f_pow(
508
             0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000135264499699371,
509
            -0.5,
510
        ), 27189929701044785000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
511
        assert_eq!(f_pow(
512
            0.000000000000000000000000000000000000000000000000000021798599361155193,
513
            -2.,
514
        ),2104470396771397700000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
515
        assert_eq!(
516
            f_pow(-25192281723900620000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
517
            -2.),
518
            0.
519
        );
520
        assert_eq!(
521
            f_pow(0.000000000000000000000000000000000000000000000000000021799650661798696,
522
                  -2.),
523
            2104267423084451500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.
524
        );
525
        assert_eq!(
526
            f_pow(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000014916691520383755,
527
            -2.),
528
            44942267764413600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.
529
        );
530
        assert_eq!(
531
            f_pow(
532
                0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000135264499699371,
533
                -0.5,
534
            ),
535
            27189929701044785000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.
536
        );
537
        assert_eq!(f_pow(1., f64::INFINITY), 1.);
538
        assert_eq!(f_pow(2., f64::INFINITY), f64::INFINITY);
539
        assert_eq!(f_pow(f64::INFINITY, -0.5), 0.);
540
        assert!(
541
            (f_pow(2f64, 3f64) - 8f64).abs() < 1e-9,
542
            "Invalid result {}",
543
            f_pow(2f64, 3f64)
544
        );
545
        assert!(
546
            (f_pow(0.5f64, 2f64) - 0.25f64).abs() < 1e-9,
547
            "Invalid result {}",
548
            f_pow(0.5f64, 2f64)
549
        );
550
        assert_eq!(f_pow(2.1f64, 2.7f64), 7.412967494768546);
551
        assert_eq!(f_pow(27., 1. / 3.), 3.);
552
    }
553
554
    #[test]
555
    fn powi_test() {
556
        assert_eq!(f_pow(f64::from_bits(0x3cc0_0000_0000_0000), 102.), 0.0);
557
        assert_eq!(f_pow(3., 3.), 27.);
558
        assert_eq!(f_pow(3., -3.), 1. / 27.);
559
        assert_eq!(f_pow(3., 102.), 4.638397686588102e48);
560
        assert_eq!(f_pow(0.000000000000011074474670636028, -22.), 10589880229528372000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
561
    }
562
}