Coverage Report

Created: 2025-10-10 07:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/powf.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::bits::biased_exponent_f64;
30
use crate::common::*;
31
use crate::double_double::DoubleDouble;
32
use crate::exponents::expf;
33
use crate::logf;
34
use crate::logs::LOG2_R;
35
use crate::polyeval::{f_polyeval3, f_polyeval6, f_polyeval10};
36
use crate::pow_tables::EXP2_MID1;
37
use crate::powf_tables::{LOG2_R_TD, LOG2_R2_DD, POWF_R2};
38
use crate::rounding::CpuRound;
39
40
/// Power function for given value for const context.
41
/// This is simplified version just to make a good approximation on const context.
42
0
pub const fn powf(d: f32, n: f32) -> f32 {
43
0
    let value = d.abs();
44
0
    let c = expf(n * logf(value));
45
0
    if n == 1. {
46
0
        return d;
47
0
    }
48
0
    if d < 0.0 {
49
0
        let y = n as i32;
50
0
        if y % 2 == 0 { c } else { -c }
51
    } else {
52
0
        c
53
    }
54
0
}
55
56
#[inline]
57
0
const fn larger_exponent(a: f64, b: f64) -> bool {
58
0
    biased_exponent_f64(a) >= biased_exponent_f64(b)
59
0
}
60
61
// Calculate 2^(y * log2(x)) in double-double precision.
62
// At this point we can reuse the following values:
63
//   idx_x: index for extra precision of log2 for the middle part of log2(x).
64
//   dx: the reduced argument for log2(x)
65
//   y6: 2^6 * y.
66
//   lo6_hi: the high part of 2^6 * (y - (hi + mid))
67
//   exp2_hi_mid: high part of 2^(hi + mid)
68
#[cold]
69
#[inline(never)]
70
0
fn powf_dd(idx_x: i32, dx: f64, y6: f64, lo6_hi: f64, exp2_hi_mid: DoubleDouble) -> f64 {
71
    // Perform a second range reduction step:
72
    //   idx2 = round(2^14 * (dx  + 2^-8)) = round ( dx * 2^14 + 2^6)
73
    //   dx2 = (1 + dx) * r2 - 1
74
    // Output range:
75
    //   -0x1.3ffcp-15 <= dx2 <= 0x1.3e3dp-15
76
0
    let idx2 = f_fmla(
77
0
        dx,
78
0
        f64::from_bits(0x40d0000000000000),
79
0
        f64::from_bits(0x4050000000000000),
80
0
    )
81
0
    .cpu_round() as usize;
82
0
    let dx2 = f_fmla(1.0 + dx, f64::from_bits(POWF_R2[idx2]), -1.0); // Exact
83
84
    const COEFFS: [(u64, u64); 6] = [
85
        (0x3c7777d0ffda25e0, 0x3ff71547652b82fe),
86
        (0xbc6777d101cf0a84, 0xbfe71547652b82fe),
87
        (0x3c7ce04b5140d867, 0x3fdec709dc3a03fd),
88
        (0x3c7137b47e635be5, 0xbfd71547652b82fb),
89
        (0xbc5b5a30b3bdb318, 0x3fd2776c516a92a2),
90
        (0x3c62d2fbd081e657, 0xbfcec70af1929ca6),
91
    ];
92
0
    let dx_dd = DoubleDouble::new(0.0, dx2);
93
0
    let p = f_polyeval6(
94
0
        dx_dd,
95
0
        DoubleDouble::from_bit_pair(COEFFS[0]),
96
0
        DoubleDouble::from_bit_pair(COEFFS[1]),
97
0
        DoubleDouble::from_bit_pair(COEFFS[2]),
98
0
        DoubleDouble::from_bit_pair(COEFFS[3]),
99
0
        DoubleDouble::from_bit_pair(COEFFS[4]),
100
0
        DoubleDouble::from_bit_pair(COEFFS[5]),
101
    );
102
    // log2(1 + dx2) ~ dx2 * P(dx2)
103
0
    let log2_x_lo = DoubleDouble::quick_mult_f64(p, dx2);
104
    // Lower parts of (e_x - log2(r1)) of the first range reduction constant
105
0
    let log2_r_td = LOG2_R_TD[idx_x as usize];
106
0
    let log2_x_mid = DoubleDouble::new(f64::from_bits(log2_r_td.0), f64::from_bits(log2_r_td.1));
107
    // -log2(r2) + lower part of (e_x - log2(r1))
108
0
    let log2_x_m = DoubleDouble::add(DoubleDouble::from_bit_pair(LOG2_R2_DD[idx2]), log2_x_mid);
109
    // log2(1 + dx2) - log2(r2) + lower part of (e_x - log2(r1))
110
    // Since we don't know which one has larger exponent to apply Fast2Sum
111
    // algorithm, we need to check them before calling double-double addition.
112
0
    let log2_x = if larger_exponent(log2_x_m.hi, log2_x_lo.hi) {
113
0
        DoubleDouble::add(log2_x_m, log2_x_lo)
114
    } else {
115
0
        DoubleDouble::add(log2_x_lo, log2_x_m)
116
    };
117
0
    let lo6_hi_dd = DoubleDouble::new(0.0, lo6_hi);
118
    // 2^6 * y * (log2(1 + dx2) - log2(r2) + lower part of (e_x - log2(r1)))
119
0
    let prod = DoubleDouble::quick_mult_f64(log2_x, y6);
120
    // 2^6 * (y * log2(x) - (hi + mid)) = 2^6 * lo
121
0
    let lo6 = if larger_exponent(prod.hi, lo6_hi) {
122
0
        DoubleDouble::add(prod, lo6_hi_dd)
123
    } else {
124
0
        DoubleDouble::add(lo6_hi_dd, prod)
125
    };
126
127
    const EXP2_COEFFS: [(u64, u64); 10] = [
128
        (0x0000000000000000, 0x3ff0000000000000),
129
        (0x3c1abc9e3b398024, 0x3f862e42fefa39ef),
130
        (0xbba5e43a5429bddb, 0x3f0ebfbdff82c58f),
131
        (0xbb2d33162491268f, 0x3e8c6b08d704a0c0),
132
        (0x3a94fb32d240a14e, 0x3e03b2ab6fba4e77),
133
        (0x39ee84e916be83e0, 0x3d75d87fe78a6731),
134
        (0xb989a447bfddc5e6, 0x3ce430912f86bfb8),
135
        (0xb8e31a55719de47f, 0x3c4ffcbfc588ded9),
136
        (0xb850ba57164eb36b, 0x3bb62c034beb8339),
137
        (0xb7b8483eabd9642d, 0x3b1b5251ff97bee1),
138
    ];
139
140
0
    let pp = f_polyeval10(
141
0
        lo6,
142
0
        DoubleDouble::from_bit_pair(EXP2_COEFFS[0]),
143
0
        DoubleDouble::from_bit_pair(EXP2_COEFFS[1]),
144
0
        DoubleDouble::from_bit_pair(EXP2_COEFFS[2]),
145
0
        DoubleDouble::from_bit_pair(EXP2_COEFFS[3]),
146
0
        DoubleDouble::from_bit_pair(EXP2_COEFFS[4]),
147
0
        DoubleDouble::from_bit_pair(EXP2_COEFFS[5]),
148
0
        DoubleDouble::from_bit_pair(EXP2_COEFFS[6]),
149
0
        DoubleDouble::from_bit_pair(EXP2_COEFFS[7]),
150
0
        DoubleDouble::from_bit_pair(EXP2_COEFFS[8]),
151
0
        DoubleDouble::from_bit_pair(EXP2_COEFFS[9]),
152
    );
153
0
    let rr = DoubleDouble::quick_mult(exp2_hi_mid, pp);
154
155
    // Make sure the sum is normalized:
156
0
    let r = DoubleDouble::from_exact_add(rr.hi, rr.lo);
157
158
    const FRACTION_MASK: u64 = (1u64 << 52) - 1;
159
160
0
    let mut r_bits = r.hi.to_bits();
161
0
    if ((r_bits & 0xfff_ffff) == 0) && (r.lo != 0.0) {
162
0
        let hi_sign = r.hi.to_bits() >> 63;
163
0
        let lo_sign = r.lo.to_bits() >> 63;
164
0
        if hi_sign == lo_sign {
165
0
            r_bits = r_bits.wrapping_add(1);
166
0
        } else if (r_bits & FRACTION_MASK) > 0 {
167
0
            r_bits = r_bits.wrapping_sub(1);
168
0
        }
169
0
    }
170
171
0
    f64::from_bits(r_bits)
172
0
}
173
174
/// Power function
175
///
176
/// Max found ULP 0.5
177
0
pub fn f_powf(x: f32, y: f32) -> f32 {
178
0
    let mut x_u = x.to_bits();
179
0
    let x_abs = x_u & 0x7fff_ffff;
180
0
    let mut y = y;
181
0
    let y_u = y.to_bits();
182
0
    let y_abs = y_u & 0x7fff_ffff;
183
0
    let mut x = x;
184
185
0
    if ((y_abs & 0x0007_ffff) == 0) || (y_abs > 0x4f170000) {
186
        // y is signaling NaN
187
0
        if x.is_nan() || y.is_nan() {
188
0
            if y.abs() == 0. {
189
0
                return 1.;
190
0
            }
191
0
            if x == 1. {
192
0
                return 1.;
193
0
            }
194
0
            return f32::NAN;
195
0
        }
196
197
        // Exceptional exponents.
198
0
        if y == 0.0 {
199
0
            return 1.0;
200
0
        }
201
202
0
        match y_abs {
203
            0x7f80_0000 => {
204
0
                if x_abs > 0x7f80_0000 {
205
                    // pow(NaN, +-Inf) = NaN
206
0
                    return x;
207
0
                }
208
0
                if x_abs == 0x3f80_0000 {
209
                    // pow(+-1, +-Inf) = 1.0f
210
0
                    return 1.0;
211
0
                }
212
0
                if x == 0.0 && y_u == 0xff80_0000 {
213
                    // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
214
0
                    return f32::INFINITY;
215
0
                }
216
                // pow (|x| < 1, -inf) = +inf
217
                // pow (|x| < 1, +inf) = 0.0f
218
                // pow (|x| > 1, -inf) = 0.0f
219
                // pow (|x| > 1, +inf) = +inf
220
0
                return if (x_abs < 0x3f80_0000) == (y_u == 0xff80_0000) {
221
0
                    f32::INFINITY
222
                } else {
223
0
                    0.
224
                };
225
            }
226
            _ => {
227
0
                match y_u {
228
                    0x3f00_0000 => {
229
                        // pow(x, 1/2) = sqrt(x)
230
0
                        if x == 0.0 || x_u == 0xff80_0000 {
231
                            // pow(-0, 1/2) = +0
232
                            // pow(-inf, 1/2) = +inf
233
                            // Make sure it is correct for FTZ/DAZ.
234
0
                            return x * x;
235
0
                        }
236
0
                        let r = x.sqrt();
237
0
                        return if r.to_bits() != 0x8000_0000 { r } else { 0.0 };
238
                    }
239
                    0x3f80_0000 => {
240
0
                        return x;
241
                    } // y = 1.0f
242
0
                    0x4000_0000 => return x * x, // y = 2.0f
243
                    _ => {
244
0
                        let is_int = is_integerf(y);
245
0
                        if is_int && (y_u > 0x4000_0000) && (y_u <= 0x41c0_0000) {
246
                            // Check for exact cases when 2 < y < 25 and y is an integer.
247
0
                            let mut msb: i32 = if x_abs == 0 {
248
0
                                32 - 2
249
                            } else {
250
0
                                x_abs.leading_zeros() as i32
251
                            };
252
0
                            msb = if msb > 8 { msb } else { 8 };
253
0
                            let mut lsb: i32 = if x_abs == 0 {
254
0
                                0
255
                            } else {
256
0
                                x_abs.trailing_zeros() as i32
257
                            };
258
0
                            lsb = if lsb > 23 { 23 } else { lsb };
259
0
                            let extra_bits: i32 = 32 - 2 - lsb - msb;
260
0
                            let iter = y as i32;
261
262
0
                            if extra_bits * iter <= 23 + 2 {
263
                                // The result is either exact or exactly half-way.
264
                                // But it is exactly representable in double precision.
265
0
                                let x_d = x as f64;
266
0
                                let mut result = x_d;
267
0
                                for _ in 1..iter {
268
0
                                    result *= x_d;
269
0
                                }
270
0
                                return result as f32;
271
0
                            }
272
0
                        }
273
274
0
                        if y_abs > 0x4f17_0000 {
275
                            // if y is NaN
276
0
                            if y_abs > 0x7f80_0000 {
277
0
                                if x_u == 0x3f80_0000 {
278
                                    // x = 1.0f
279
                                    // pow(1, NaN) = 1
280
0
                                    return 1.0;
281
0
                                }
282
                                // pow(x, NaN) = NaN
283
0
                                return y;
284
0
                            }
285
                            // x^y will be overflow / underflow in single precision.  Set y to a
286
                            // large enough exponent but not too large, so that the computations
287
                            // won't be overflow in double precision.
288
0
                            y = f32::from_bits((y_u & 0x8000_0000).wrapping_add(0x4f800000u32));
289
0
                        }
290
                    }
291
                }
292
            }
293
        }
294
0
    }
295
296
    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
297
0
    let mut ex = -(E_BIAS as i32);
298
0
    let mut sign: u64 = 0;
299
300
0
    if ((x_u & 0x801f_ffffu32) == 0) || x_u >= 0x7f80_0000u32 || x_u < 0x0080_0000u32 {
301
0
        if x.is_nan() {
302
0
            return f32::NAN;
303
0
        }
304
305
0
        if x_u == 0x3f80_0000 {
306
0
            return 1.;
307
0
        }
308
309
0
        let x_is_neg = x.to_bits() > 0x8000_0000;
310
311
0
        if x == 0.0 {
312
0
            let out_is_neg = x_is_neg && is_odd_integerf(f32::from_bits(y_u));
313
0
            if y_u > 0x8000_0000u32 {
314
                // pow(0, negative number) = inf
315
0
                return if x_is_neg {
316
0
                    f32::NEG_INFINITY
317
                } else {
318
0
                    f32::INFINITY
319
                };
320
0
            }
321
            // pow(0, positive number) = 0
322
0
            return if out_is_neg { -0.0 } else { 0.0 };
323
0
        }
324
325
0
        if x_abs == 0x7f80_0000u32 {
326
            // x = +-Inf
327
0
            let out_is_neg = x_is_neg && is_odd_integerf(f32::from_bits(y_u));
328
0
            if y_u >= 0x7fff_ffff {
329
0
                return if out_is_neg { -0.0 } else { 0.0 };
330
0
            }
331
0
            return if out_is_neg {
332
0
                f32::NEG_INFINITY
333
            } else {
334
0
                f32::INFINITY
335
            };
336
0
        }
337
338
0
        if x_abs > 0x7f80_0000 {
339
            // x is NaN.
340
            // pow (aNaN, 0) is already taken care above.
341
0
            return x;
342
0
        }
343
344
        // Normalize denormal inputs.
345
0
        if x_abs < 0x0080_0000u32 {
346
0
            ex = ex.wrapping_sub(64);
347
0
            x *= f32::from_bits(0x5f800000);
348
0
        }
349
350
        // x is finite and negative, and y is a finite integer.
351
0
        if x.is_sign_negative() {
352
0
            if is_integerf(y) {
353
0
                x = -x;
354
0
                if is_odd_integerf(y) {
355
0
                    sign = 0x8000_0000_0000_0000u64;
356
0
                }
357
            } else {
358
                // pow( negative, non-integer ) = NaN
359
0
                return f32::NAN;
360
            }
361
0
        }
362
0
    }
363
364
    // x^y = 2^( y * log2(x) )
365
    //     = 2^( y * ( e_x + log2(m_x) ) )
366
    // First we compute log2(x) = e_x + log2(m_x)
367
0
    x_u = x.to_bits();
368
369
    // Extract exponent field of x.
370
0
    ex = ex.wrapping_add((x_u >> 23) as i32);
371
0
    let e_x = ex as f64;
372
    // Use the highest 7 fractional bits of m_x as the index for look up tables.
373
0
    let x_mant = x_u & ((1u32 << 23) - 1);
374
0
    let idx_x = (x_mant >> (23 - 7)) as i32;
375
    // Add the hidden bit to the mantissa.
376
    // 1 <= m_x < 2
377
0
    let m_x = f32::from_bits(x_mant | 0x3f800000);
378
379
    // Reduced argument for log2(m_x):
380
    //   dx = r * m_x - 1.
381
    // The computation is exact, and -2^-8 <= dx < 2^-7.
382
    // Then m_x = (1 + dx) / r, and
383
    //   log2(m_x) = log2( (1 + dx) / r )
384
    //             = log2(1 + dx) - log2(r).
385
386
    let dx;
387
    #[cfg(any(
388
        all(
389
            any(target_arch = "x86", target_arch = "x86_64"),
390
            target_feature = "fma"
391
        ),
392
        target_arch = "aarch64"
393
    ))]
394
    {
395
        use crate::logs::LOG_REDUCTION_F32;
396
        dx = f_fmlaf(
397
            m_x,
398
            f32::from_bits(LOG_REDUCTION_F32.0[idx_x as usize]),
399
            -1.0,
400
        ) as f64; // Exact.
401
    }
402
    #[cfg(not(any(
403
        all(
404
            any(target_arch = "x86", target_arch = "x86_64"),
405
            target_feature = "fma"
406
        ),
407
        target_arch = "aarch64"
408
    )))]
409
    {
410
        use crate::logs::LOG_RANGE_REDUCTION;
411
0
        dx = f_fmla(
412
0
            m_x as f64,
413
0
            f64::from_bits(LOG_RANGE_REDUCTION[idx_x as usize]),
414
0
            -1.0,
415
0
        ); // Exact
416
    }
417
418
    // Degree-5 polynomial approximation:
419
    //   dx * P(dx) ~ log2(1 + dx)
420
    // Generated by Sollya with:
421
    // > P = fpminimax(log2(1 + x)/x, 5, [|D...|], [-2^-8, 2^-7]);
422
    // > dirtyinfnorm(log2(1 + x)/x - P, [-2^-8, 2^-7]);
423
    //   0x1.653...p-52
424
    const COEFFS: [u64; 6] = [
425
        0x3ff71547652b82fe,
426
        0xbfe71547652b7a07,
427
        0x3fdec709dc458db1,
428
        0xbfd715479c2266c9,
429
        0x3fd2776ae1ddf8f0,
430
        0xbfce7b2178870157,
431
    ];
432
433
0
    let dx2 = dx * dx; // Exact
434
0
    let c0 = f_fmla(dx, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0]));
435
0
    let c1 = f_fmla(dx, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2]));
436
0
    let c2 = f_fmla(dx, f64::from_bits(COEFFS[5]), f64::from_bits(COEFFS[4]));
437
438
0
    let p = f_polyeval3(dx2, c0, c1, c2);
439
440
    // s = e_x - log2(r) + dx * P(dx)
441
    // Approximation errors:
442
    //   |log2(x) - s| < ulp(e_x) + (bounds on dx) * (error bounds of P(dx))
443
    //                 = ulp(e_x) + 2^-7 * 2^-51
444
    //                 < 2^8 * 2^-52 + 2^-7 * 2^-43
445
    //                 ~ 2^-44 + 2^-50
446
0
    let s = f_fmla(dx, p, f64::from_bits(LOG2_R[idx_x as usize]) + e_x);
447
448
    // To compute 2^(y * log2(x)), we break the exponent into 3 parts:
449
    //   y * log(2) = hi + mid + lo, where
450
    //   hi is an integer
451
    //   mid * 2^6 is an integer
452
    //   |lo| <= 2^-7
453
    // Then:
454
    //   x^y = 2^(y * log2(x)) = 2^hi * 2^mid * 2^lo,
455
    // In which 2^mid is obtained from a look-up table of size 2^6 = 64 elements,
456
    // and 2^lo ~ 1 + lo * P(lo).
457
    // Thus, we have:
458
    //   hi + mid = 2^-6 * round( 2^6 * y * log2(x) )
459
    // If we restrict the output such that |hi| < 150, (hi + mid) uses (8 + 6)
460
    // bits, hence, if we use double precision to perform
461
    //   round( 2^6 * y * log2(x))
462
    // the lo part is bounded by 2^-7 + 2^(-(52 - 14)) = 2^-7 + 2^-38
463
464
    // In the following computations:
465
    //   y6  = 2^6 * y
466
    //   hm  = 2^6 * (hi + mid) = round(2^6 * y * log2(x)) ~ round(y6 * s)
467
    //   lo6 = 2^6 * lo = 2^6 * (y - (hi + mid)) = y6 * log2(x) - hm.
468
0
    let y6 = (y * f32::from_bits(0x42800000)) as f64; // Exact.
469
0
    let hm = (s * y6).cpu_round();
470
471
    // let log2_rr = LOG2_R2_DD[idx_x as usize];
472
473
    // // lo6 = 2^6 * lo.
474
    // let lo6_hi = f_fmla(y6, e_x + f64::from_bits(log2_rr.1), -hm); // Exact
475
    // // Error bounds:
476
    // //   | (y*log2(x) - hm * 2^-6 - lo) / y| < err(dx * p) + err(LOG2_R_DD.lo)
477
    // //                                       < 2^-51 + 2^-75
478
    // let lo6 = f_fmla(y6, f_fmla(dx, p, f64::from_bits(log2_rr.0)), lo6_hi);
479
480
    // lo6 = 2^6 * lo.
481
0
    let lo6_hi = f_fmla(y6, e_x + f64::from_bits(LOG2_R_TD[idx_x as usize].2), -hm); // Exact
482
    // Error bounds:
483
    //   | (y*log2(x) - hm * 2^-6 - lo) / y| < err(dx * p) + err(LOG2_R_DD.lo)
484
    //                                       < 2^-51 + 2^-75
485
0
    let lo6 = f_fmla(
486
0
        y6,
487
0
        f_fmla(dx, p, f64::from_bits(LOG2_R_TD[idx_x as usize].1)),
488
0
        lo6_hi,
489
    );
490
491
    // |2^(hi + mid) - exp2_hi_mid| <= ulp(exp2_hi_mid) / 2
492
    // Clamp the exponent part into smaller range that fits double precision.
493
    // For those exponents that are out of range, the final conversion will round
494
    // them correctly to inf/max float or 0/min float accordingly.
495
0
    let mut hm_i = unsafe { hm.to_int_unchecked::<i64>() };
496
0
    hm_i = if hm_i > (1i64 << 15) {
497
0
        1 << 15
498
0
    } else if hm_i < (-(1i64 << 15)) {
499
0
        -(1 << 15)
500
    } else {
501
0
        hm_i
502
    };
503
504
0
    let idx_y = hm_i & 0x3f;
505
506
    // 2^hi
507
0
    let exp_hi_i = (hm_i >> 6).wrapping_shl(52);
508
    // 2^mid
509
0
    let exp_mid_i = EXP2_MID1[idx_y as usize].1;
510
    // (-1)^sign * 2^hi * 2^mid
511
    // Error <= 2^hi * 2^-53
512
0
    let exp2_hi_mid_i = (exp_hi_i.wrapping_add(exp_mid_i as i64) as u64).wrapping_add(sign);
513
0
    let exp2_hi_mid = f64::from_bits(exp2_hi_mid_i);
514
515
    // Degree-5 polynomial approximation P(lo6) ~ 2^(lo6 / 2^6) = 2^(lo).
516
    // Generated by Sollya with:
517
    // > P = fpminimax(2^(x/64), 5, [|1, D...|], [-2^-1, 2^-1]);
518
    // > dirtyinfnorm(2^(x/64) - P, [-0.5, 0.5]);
519
    // 0x1.a2b77e618f5c4c176fd11b7659016cde5de83cb72p-60
520
    const EXP2_COEFFS: [u64; 6] = [
521
        0x3ff0000000000000,
522
        0x3f862e42fefa39ef,
523
        0x3f0ebfbdff82a23a,
524
        0x3e8c6b08d7076268,
525
        0x3e03b2ad33f8b48b,
526
        0x3d75d870c4d84445,
527
    ];
528
529
0
    let lo6_sqr = lo6 * lo6;
530
0
    let d0 = f_fmla(
531
0
        lo6,
532
0
        f64::from_bits(EXP2_COEFFS[1]),
533
0
        f64::from_bits(EXP2_COEFFS[0]),
534
    );
535
0
    let d1 = f_fmla(
536
0
        lo6,
537
0
        f64::from_bits(EXP2_COEFFS[3]),
538
0
        f64::from_bits(EXP2_COEFFS[2]),
539
    );
540
0
    let d2 = f_fmla(
541
0
        lo6,
542
0
        f64::from_bits(EXP2_COEFFS[5]),
543
0
        f64::from_bits(EXP2_COEFFS[4]),
544
    );
545
0
    let pp = f_polyeval3(lo6_sqr, d0, d1, d2);
546
547
0
    let r = pp * exp2_hi_mid;
548
0
    let r_u = r.to_bits();
549
550
    #[cfg(any(
551
        all(
552
            any(target_arch = "x86", target_arch = "x86_64"),
553
            target_feature = "fma"
554
        ),
555
        target_arch = "aarch64"
556
    ))]
557
    const ERR: u64 = 64;
558
    #[cfg(not(any(
559
        all(
560
            any(target_arch = "x86", target_arch = "x86_64"),
561
            target_feature = "fma"
562
        ),
563
        target_arch = "aarch64"
564
    )))]
565
    const ERR: u64 = 128;
566
567
0
    let r_upper = f64::from_bits(r_u + ERR) as f32;
568
0
    let r_lower = f64::from_bits(r_u - ERR) as f32;
569
0
    if r_upper == r_lower {
570
0
        return r_upper;
571
0
    }
572
573
    // Scale lower part of 2^(hi + mid)
574
0
    let exp2_hi_mid_dd = DoubleDouble {
575
0
        lo: if idx_y != 0 {
576
0
            f64::from_bits((exp_hi_i as u64).wrapping_add(EXP2_MID1[idx_y as usize].0))
577
        } else {
578
0
            0.
579
        },
580
0
        hi: exp2_hi_mid,
581
    };
582
583
0
    let r_dd = powf_dd(idx_x, dx, y6, lo6_hi, exp2_hi_mid_dd);
584
0
    r_dd as f32
585
0
}
586
587
/// Dirty fast pow
588
#[inline]
589
0
pub fn dirty_powf(d: f32, n: f32) -> f32 {
590
    use crate::exponents::dirty_exp2f;
591
    use crate::logs::dirty_log2f;
592
0
    let value = d.abs();
593
0
    let lg = dirty_log2f(value);
594
0
    let c = dirty_exp2f(n * lg);
595
0
    if d < 0.0 {
596
0
        let y = n as i32;
597
0
        if y % 2 == 0 { c } else { -c }
598
    } else {
599
0
        c
600
    }
601
0
}
Unexecuted instantiation: pxfm::powf::dirty_powf
Unexecuted instantiation: pxfm::powf::dirty_powf
602
603
#[cfg(test)]
604
mod tests {
605
    use super::*;
606
607
    #[test]
608
    fn powf_test() {
609
        assert!(
610
            (powf(2f32, 3f32) - 8f32).abs() < 1e-6,
611
            "Invalid result {}",
612
            powf(2f32, 3f32)
613
        );
614
        assert!(
615
            (powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6,
616
            "Invalid result {}",
617
            powf(0.5f32, 2f32)
618
        );
619
    }
620
621
    #[test]
622
    fn f_powf_test() {
623
        assert!(
624
            (f_powf(2f32, 3f32) - 8f32).abs() < 1e-6,
625
            "Invalid result {}",
626
            f_powf(2f32, 3f32)
627
        );
628
        assert!(
629
            (f_powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6,
630
            "Invalid result {}",
631
            f_powf(0.5f32, 2f32)
632
        );
633
        assert_eq!(f_powf(0.5f32, 1.5432f32), 0.34312353);
634
        assert_eq!(
635
            f_powf(f32::INFINITY, 0.00000000000000000000000000000000038518824),
636
            f32::INFINITY
637
        );
638
        assert_eq!(f_powf(f32::NAN, 0.0), 1.);
639
        assert_eq!(f_powf(1., f32::NAN), 1.);
640
    }
641
642
    #[test]
643
    fn dirty_powf_test() {
644
        assert!(
645
            (dirty_powf(2f32, 3f32) - 8f32).abs() < 1e-6,
646
            "Invalid result {}",
647
            dirty_powf(2f32, 3f32)
648
        );
649
        assert!(
650
            (dirty_powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6,
651
            "Invalid result {}",
652
            dirty_powf(0.5f32, 2f32)
653
        );
654
    }
655
}