Coverage Report

Created: 2025-12-14 07:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/exponents/expm1f.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::exponents::expf::{ExpfBackend, GenericExpfBackend};
30
31
#[inline(always)]
32
0
fn expm1f_gen<B: ExpfBackend>(x: f32, backend: B) -> f32 {
33
0
    let x_u: u32 = x.to_bits();
34
0
    let x_abs = x_u & 0x7fff_ffffu32;
35
36
    // When |x| > 25*log(2), or nan
37
0
    if x_abs >= 0x418a_a123u32 {
38
        // x < log(2^-25)
39
0
        if x.is_sign_negative() {
40
            // exp(-Inf) = 0
41
0
            if x.is_infinite() {
42
0
                return -1.0;
43
0
            }
44
            // exp(nan) = nan
45
0
            if x.is_nan() {
46
0
                return x;
47
0
            }
48
0
            return -1.0;
49
        } else {
50
            // x >= 89 or nan
51
0
            if x_u >= 0x42b2_0000 {
52
0
                return x + f32::INFINITY;
53
0
            }
54
        }
55
0
    }
56
57
    // |x| < 2^-4
58
0
    if x_abs < 0x3d80_0000u32 {
59
        // |x| < 2^-25
60
0
        if x_abs < 0x3300_0000u32 {
61
            // x = -0.0f
62
0
            if x_u == 0x8000_0000u32 {
63
0
                return x;
64
0
            }
65
            // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
66
            // is:
67
            //   |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
68
            //                               = |x|
69
            //                               < 2^-25
70
            //                               < epsilon(1)/2.
71
            // To simplify the rounding decision and make it more efficient, we use
72
            //   fma(x, x, x) ~ x + x^2 instead.
73
            // Note: to use the formula x + x^2 to decide the correct rounding, we
74
            // do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
75
            // 2^-76. For targets without FMA instructions, we simply use double for
76
            // intermediate results as it is more efficient than using an emulated
77
            // version of FMA.
78
            #[cfg(any(
79
                all(
80
                    any(target_arch = "x86", target_arch = "x86_64"),
81
                    target_feature = "fma"
82
                ),
83
                target_arch = "aarch64"
84
            ))]
85
            {
86
                return backend.fmaf(x, x, x);
87
            }
88
            #[cfg(not(any(
89
                all(
90
                    any(target_arch = "x86", target_arch = "x86_64"),
91
                    target_feature = "fma"
92
                ),
93
                target_arch = "aarch64"
94
            )))]
95
            {
96
0
                let xd = x as f64;
97
0
                return backend.fma(xd, xd, xd) as f32;
98
            }
99
0
        }
100
101
        const C: [u64; 7] = [
102
            0x3fe0000000000000,
103
            0x3fc55555555557dd,
104
            0x3fa55555555552fa,
105
            0x3f8111110fcd58b7,
106
            0x3f56c16c1717660b,
107
            0x3f2a0241f0006d62,
108
            0x3efa01e3f8d3c060,
109
        ];
110
111
        // 2^-25 <= |x| < 2^-4
112
0
        let xd = x as f64;
113
0
        let xsq = xd * xd;
114
        // Degree-8 minimax polynomial generated by Sollya with:
115
        // > display = hexadecimal;
116
        // > P = fpminimax((expm1(x) - x)/x^2, 6, [|D...|], [-2^-4, 2^-4]);
117
118
0
        return backend.fma(
119
0
            backend.polyeval7(
120
0
                xd,
121
0
                f64::from_bits(C[0]),
122
0
                f64::from_bits(C[1]),
123
0
                f64::from_bits(C[2]),
124
0
                f64::from_bits(C[3]),
125
0
                f64::from_bits(C[4]),
126
0
                f64::from_bits(C[5]),
127
0
                f64::from_bits(C[6]),
128
0
            ),
129
0
            xsq,
130
0
            xd,
131
0
        ) as f32;
132
0
    }
133
134
    // For -104 < x < 89, to compute expm1(x), we perform the following range
135
    // reduction: find hi, mid, lo such that:
136
    //   x = hi + mid + lo, in which
137
    //     hi is an integer,
138
    //     mid * 2^7 is an integer
139
    //     -2^(-8) <= lo < 2^-8.
140
    // In particular,
141
    //   hi + mid = round(x * 2^7) * 2^(-7).
142
    // Then,
143
    //   expm1(x) = expm1(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo) - 1.
144
    // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2
145
    // respectively.  exp(lo) is computed using a degree-4 minimax polynomial
146
    // generated by Sollya.
147
148
    // x_hi = (hi + mid) * 2^7 = round(x * 2^7).
149
0
    let kf = backend.roundf(x * 128.);
150
    // Subtract (hi + mid) from x to get lo.
151
0
    let xd = backend.fmaf(kf, -0.0078125 /* - 1/128 */, x) as f64;
152
0
    let mut x_hi = unsafe { kf.to_int_unchecked::<i32>() }; // it's already not indeterminate.
153
0
    x_hi += 104 << 7;
154
    // hi = x_hi >> 7
155
0
    let exp_hi = f64::from_bits(crate::exponents::expf::EXP_M1[(x_hi >> 7) as usize]);
156
    // mid * 2^7 = x_hi & 0x0000'007fU;
157
0
    let exp_mid = f64::from_bits(crate::exponents::expf::EXP_M2[(x_hi & 0x7f) as usize]);
158
    // Degree-4 minimax polynomial generated by Sollya with the following
159
    // commands:
160
    // d = [-2^-8, 2^-8];
161
    // f_exp = expm1(x)/x;
162
    // Q = fpminimax(f_exp, 3, [|D...|], [-2^-8, 2^-8]);
163
0
    let p = backend.polyeval5(
164
0
        xd,
165
        1.,
166
0
        f64::from_bits(0x3feffffffffff777),
167
0
        f64::from_bits(0x3fe000000000071c),
168
0
        f64::from_bits(0x3fc555566668e5e7),
169
0
        f64::from_bits(0x3fa55555555ef243),
170
    );
171
0
    backend.fma(p * exp_hi, exp_mid, -1.) as f32
172
0
}
Unexecuted instantiation: pxfm::exponents::expm1f::expm1f_gen::<pxfm::exponents::expf::FmaBackend>
Unexecuted instantiation: pxfm::exponents::expm1f::expm1f_gen::<pxfm::exponents::expf::GenericExpfBackend>
173
174
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
175
#[target_feature(enable = "avx", enable = "fma")]
176
0
unsafe fn expm1f_fma_impl(x: f32) -> f32 {
177
    use crate::exponents::expf::FmaBackend;
178
0
    expm1f_gen(x, FmaBackend {})
179
0
}
180
181
/// Computes e^x - 1
182
///
183
/// Max ULP 0.5
184
#[inline]
185
0
pub fn f_expm1f(x: f32) -> f32 {
186
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
187
    {
188
        expm1f_gen(x, GenericExpfBackend {})
189
    }
190
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
191
    {
192
        use std::sync::OnceLock;
193
        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
194
0
        let q = EXECUTOR.get_or_init(|| {
195
0
            if std::arch::is_x86_feature_detected!("avx")
196
0
                && std::arch::is_x86_feature_detected!("fma")
197
            {
198
0
                expm1f_fma_impl
199
            } else {
200
0
                fn def_expm1f(x: f32) -> f32 {
201
0
                    expm1f_gen(x, GenericExpfBackend {})
202
0
                }
203
0
                def_expm1f
204
            }
205
0
        });
206
0
        unsafe { q(x) }
207
    }
208
0
}
209
210
#[cfg(test)]
211
mod tests {
212
    use crate::f_expm1f;
213
214
    #[test]
215
    fn test_expm1f() {
216
        assert_eq!(f_expm1f(-3.0923562e-5), -3.0923085e-5);
217
        assert_eq!(f_expm1f(2.213121), 8.144211);
218
        assert_eq!(f_expm1f(-3.213121), -0.9597691);
219
        assert_eq!(f_expm1f(-2.35099e-38), -2.35099e-38);
220
        assert_eq!(
221
            f_expm1f(0.00000000000000000000000000000000000004355616),
222
            0.00000000000000000000000000000000000004355616
223
        );
224
        assert_eq!(f_expm1f(25.12315), 81441420000.0);
225
        assert_eq!(f_expm1f(12.986543), 436498.6);
226
        assert_eq!(f_expm1f(-12.986543), -0.99999774);
227
        assert_eq!(f_expm1f(-25.12315), -1.0);
228
        assert_eq!(f_expm1f(f32::INFINITY), f32::INFINITY);
229
        assert_eq!(f_expm1f(f32::NEG_INFINITY), -1.);
230
        assert!(f_expm1f(f32::NAN).is_nan());
231
    }
232
}