Coverage Report

Created: 2025-12-11 07:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/exponents/exp2m1f.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::exponents::exp2f::EXP2F_TABLE;
30
use crate::exponents::expf::{ExpfBackend, GenericExpfBackend};
31
32
#[inline(always)]
33
0
fn exp2m1f_gen<B: ExpfBackend>(x: f32, backend: B) -> f32 {
34
0
    let x_u = x.to_bits();
35
0
    let x_abs = x_u & 0x7fff_ffffu32;
36
0
    if x_abs >= 0x4300_0000u32 || x_abs <= 0x3d00_0000u32 {
37
        // |x| <= 2^-5
38
0
        if x_abs <= 0x3d00_0000u32 {
39
            // Minimax polynomial generated by Sollya with:
40
            // > display = hexadecimal;
41
            // > fpminimax((2^x - 1)/x, 5, [|D...|], [-2^-5, 2^-5]);
42
            const C: [u64; 6] = [
43
                0x3fe62e42fefa39f3,
44
                0x3fcebfbdff82c57b,
45
                0x3fac6b08d6f2d7aa,
46
                0x3f83b2ab6fc92f5d,
47
                0x3f55d897cfe27125,
48
                0x3f243090e61e6af1,
49
            ];
50
0
            let xd = x as f64;
51
0
            let xsq = xd * xd;
52
0
            let c0 = backend.fma(xd, f64::from_bits(C[1]), f64::from_bits(C[0]));
53
0
            let c1 = backend.fma(xd, f64::from_bits(C[3]), f64::from_bits(C[2]));
54
0
            let c2 = backend.fma(xd, f64::from_bits(C[5]), f64::from_bits(C[4]));
55
0
            let p = backend.polyeval3(xsq, c0, c1, c2);
56
0
            return (p * xd) as f32;
57
0
        }
58
59
        // x >= 128, or x is nan
60
0
        if x.is_sign_positive() {
61
            // x >= 128 and 2^x - 1 rounds to +inf, or x is +inf or nan
62
0
            return x + f32::INFINITY;
63
0
        }
64
0
    }
65
66
0
    if x <= -25.0 {
67
        // 2^(-inf) - 1 = -1
68
0
        if x.is_infinite() {
69
0
            return -1.0;
70
0
        }
71
        // 2^nan - 1 = nan
72
0
        if x.is_nan() {
73
0
            return x;
74
0
        }
75
0
        return -1.0;
76
0
    }
77
78
    // For -25 < x < 128, to compute 2^x, we perform the following range
79
    // reduction: find hi, mid, lo such that:
80
    //   x = hi + mid + lo, in which:
81
    //     hi is an integer,
82
    //     0 <= mid * 2^5 < 32 is an integer,
83
    //     -2^(-6) <= lo <= 2^(-6).
84
    // In particular,
85
    //   hi + mid = round(x * 2^5) * 2^(-5).
86
    // Then,
87
    //   2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo.
88
    // 2^mid is stored in the lookup table of 32 elements.
89
    // 2^lo is computed using a degree-4 minimax polynomial generated by Sollya.
90
    // We perform 2^hi * 2^mid by simply add hi to the exponent field of 2^mid.
91
92
    // kf = (hi + mid) * 2^5 = round(x * 2^5)
93
94
0
    let xd = x as f64;
95
96
0
    let kf = backend.roundf(x * 64.0);
97
0
    let k = unsafe { kf.to_int_unchecked::<i32>() }; // it's already not indeterminate.
98
    // dx = lo = x - (hi + mid) = x - kf * 2^(-6)
99
0
    let dx = backend.fma(f64::from_bits(0xbf90000000000000), kf as f64, xd);
100
101
    const TABLE_BITS: u32 = 6;
102
    const TABLE_MASK: u64 = (1u64 << TABLE_BITS) - 1;
103
104
    // hi = floor(kf * 2^(-5))
105
    // exp_hi = shift hi to the exponent field of double precision.
106
0
    let exp_hi: i64 = ((k >> TABLE_BITS) as i64).wrapping_shl(52);
107
108
    // mh = 2^hi * 2^mid
109
    // mh_bits = bit field of mh
110
0
    let mh_bits = (EXP2F_TABLE[((k as u64) & TABLE_MASK) as usize] as i64).wrapping_add(exp_hi);
111
0
    let mh = f64::from_bits(mh_bits as u64);
112
113
    // Degree-4 polynomial approximating (2^x - 1)/x generated by Sollya with:
114
    // > P = fpminimax((2^y - 1)/y, 4, [|D...|], [-1/64. 1/64]);
115
    // see ./notes/exp2f.sollya
116
    const C: [u64; 5] = [
117
        0x3fe62e42fefa39ef,
118
        0x3fcebfbdff8131c4,
119
        0x3fac6b08d7061695,
120
        0x3f83b2b1bee74b2a,
121
        0x3f55d88091198529,
122
    ];
123
0
    let dx_sq = dx * dx;
124
0
    let c1 = backend.fma(dx, f64::from_bits(C[0]), 1.0);
125
0
    let c2 = backend.fma(dx, f64::from_bits(C[2]), f64::from_bits(C[1]));
126
0
    let c3 = backend.fma(dx, f64::from_bits(C[4]), f64::from_bits(C[3]));
127
0
    let p = backend.polyeval3(dx_sq, c1, c2, c3);
128
    // 2^x = 2^(hi + mid + lo)
129
    //     = 2^(hi + mid) * 2^lo
130
    //     ~ mh * (1 + lo * P(lo))
131
    //     = mh + (mh*lo) * P(lo)
132
0
    backend.fma(p, mh, -1.) as f32
133
0
}
Unexecuted instantiation: pxfm::exponents::exp2m1f::exp2m1f_gen::<pxfm::exponents::expf::FmaBackend>
Unexecuted instantiation: pxfm::exponents::exp2m1f::exp2m1f_gen::<pxfm::exponents::expf::GenericExpfBackend>
134
135
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
136
#[target_feature(enable = "avx", enable = "fma")]
137
0
unsafe fn exp2m1f_fma_impl(x: f32) -> f32 {
138
    use crate::exponents::expf::FmaBackend;
139
0
    exp2m1f_gen(x, FmaBackend {})
140
0
}
141
142
/// Computes 2^x - 1
143
///
144
/// Max found ULP 0.5
145
#[inline]
146
0
pub fn f_exp2m1f(x: f32) -> f32 {
147
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
148
    {
149
        exp2m1f_gen(x, GenericExpfBackend {})
150
    }
151
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
152
    {
153
        use std::sync::OnceLock;
154
        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
155
0
        let q = EXECUTOR.get_or_init(|| {
156
0
            if std::arch::is_x86_feature_detected!("avx")
157
0
                && std::arch::is_x86_feature_detected!("fma")
158
            {
159
0
                exp2m1f_fma_impl
160
            } else {
161
0
                fn def_exp2f(x: f32) -> f32 {
162
0
                    exp2m1f_gen(x, GenericExpfBackend {})
163
0
                }
164
0
                def_exp2f
165
            }
166
0
        });
167
0
        unsafe { q(x) }
168
    }
169
0
}
170
171
#[cfg(test)]
172
mod tests {
173
    use super::*;
174
175
    #[test]
176
    fn test_exp2m1f() {
177
        assert_eq!(f_exp2m1f(0.432423), 0.34949815);
178
        assert_eq!(f_exp2m1f(-4.), -0.9375);
179
        assert_eq!(f_exp2m1f(5.43122), 42.14795);
180
        assert_eq!(f_exp2m1f(4.), 15.0);
181
        assert_eq!(f_exp2m1f(3.), 7.);
182
        assert_eq!(f_exp2m1f(0.1), 0.07177346);
183
        assert_eq!(f_exp2m1f(0.0543432432), 0.038386293);
184
        assert!(f_exp2m1f(f32::NAN).is_nan());
185
        assert_eq!(f_exp2m1f(f32::INFINITY), f32::INFINITY);
186
        assert_eq!(f_exp2m1f(f32::NEG_INFINITY), -1.0);
187
    }
188
}