Coverage Report

Created: 2025-12-11 07:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/exponents/exp10f.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::exponents::expf::{ExpfBackend, GenericExpfBackend};
30
31
pub(crate) struct ExpBReduc {
32
    pub(crate) hi: f64,
33
    pub(crate) lo: f64,
34
}
35
36
const MID_BITS: u32 = 5;
37
const MID_MASK: usize = (1 << MID_BITS) - 1;
38
const LOG2_B: f64 = f64::from_bits(0x400a934f0979a371) * (1 << MID_BITS) as f64;
39
const M_LOGB_2_HI: f64 = f64::from_bits(0xbfd34413509f8000) / (1 << MID_BITS) as f64;
40
const M_LOGB_2_LO: f64 = f64::from_bits(0x3d380433b83b532a) / (1 << MID_BITS) as f64;
41
const EXP_2_MID: [u64; 32] = [
42
    0x3ff0000000000000,
43
    0x3ff059b0d3158574,
44
    0x3ff0b5586cf9890f,
45
    0x3ff11301d0125b51,
46
    0x3ff172b83c7d517b,
47
    0x3ff1d4873168b9aa,
48
    0x3ff2387a6e756238,
49
    0x3ff29e9df51fdee1,
50
    0x3ff306fe0a31b715,
51
    0x3ff371a7373aa9cb,
52
    0x3ff3dea64c123422,
53
    0x3ff44e086061892d,
54
    0x3ff4bfdad5362a27,
55
    0x3ff5342b569d4f82,
56
    0x3ff5ab07dd485429,
57
    0x3ff6247eb03a5585,
58
    0x3ff6a09e667f3bcd,
59
    0x3ff71f75e8ec5f74,
60
    0x3ff7a11473eb0187,
61
    0x3ff82589994cce13,
62
    0x3ff8ace5422aa0db,
63
    0x3ff93737b0cdc5e5,
64
    0x3ff9c49182a3f090,
65
    0x3ffa5503b23e255d,
66
    0x3ffae89f995ad3ad,
67
    0x3ffb7f76f2fb5e47,
68
    0x3ffc199bdd85529c,
69
    0x3ffcb720dcef9069,
70
    0x3ffd5818dcfba487,
71
    0x3ffdfc97337b9b5f,
72
    0x3ffea4afa2a490da,
73
    0x3fff50765b6e4540,
74
];
75
76
// Approximating 10^dx with degree-5 minimax polynomial generated by Sollya:
77
// > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]);
78
// Then:
79
//   10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
80
pub(crate) const EXP10F_COEFFS: [u64; 5] = [
81
    0x40026bb1bbb55515,
82
    0x40053524c73bd3ea,
83
    0x4000470591dff149,
84
    0x3ff2bd7c0a9fbc4d,
85
    0x3fe1429e74a98f43,
86
];
87
88
/// Range reduction function equivalent to exp_b_range_reduc
89
#[inline(always)]
90
0
pub(crate) fn exp_b_range_reduc<B: ExpfBackend>(x: f32, backend: &B) -> ExpBReduc {
91
0
    let xd = x as f64;
92
93
    // kd = round(log2(b) * x)
94
0
    let kd = backend.round(LOG2_B * xd);
95
0
    let k = unsafe { kd.to_int_unchecked::<i32>() }; // it's already not indeterminate.
96
97
    // hi = floor(kd / 2^MID_BITS)
98
0
    let exp_hi = (k.wrapping_shr(MID_BITS) as u64).wrapping_shl(52); // 52 = fraction bits in f64
99
100
    // mh = 2^hi * 2^mid
101
0
    let mid_index = (k as usize) & MID_MASK;
102
0
    let mh_bits = EXP_2_MID[mid_index].wrapping_add(exp_hi);
103
0
    let mh = f64::from_bits(mh_bits);
104
105
    // dx = x - (hi + mid) * log(2)
106
0
    let z0 = backend.fma(kd, M_LOGB_2_HI, xd);
107
0
    let dx = backend.fma(kd, M_LOGB_2_LO, z0);
108
109
0
    ExpBReduc { lo: dx, hi: mh }
110
0
}
Unexecuted instantiation: pxfm::exponents::exp10f::exp_b_range_reduc::<pxfm::exponents::expf::FmaBackend>
Unexecuted instantiation: pxfm::exponents::exp10f::exp_b_range_reduc::<pxfm::exponents::expf::GenericExpfBackend>
111
112
#[inline(always)]
113
0
fn exp10f_gen<B: ExpfBackend>(x: f32, backend: B) -> f32 {
114
0
    let x_u = x.to_bits();
115
0
    let x_abs = x_u & 0x7fffffff;
116
117
    // When |x| >= log10(2^128), or x is nan
118
0
    if x_abs >= 0x421a209bu32 {
119
        // When x < log10(2^-150) or nan
120
0
        if x_u > 0xc2349e35u32 {
121
            // exp(-Inf) = 0
122
0
            if x.is_infinite() {
123
0
                return 0.0;
124
0
            }
125
            // exp(nan) = nan
126
0
            if x.is_nan() {
127
0
                return x;
128
0
            }
129
0
            return 0.0;
130
0
        }
131
        // x >= log10(2^128) or nan
132
0
        if x > 0. && (x_u >= 0x421a209bu32) {
133
            // x is +inf or nan
134
0
            return x + f32::INFINITY;
135
0
        }
136
0
    }
137
138
0
    if x_abs <= 0x3d000000u32 {
139
        // |x| < 1/32
140
0
        if x_abs <= 0x3b9a209bu32 {
141
0
            if x_u == 0xb25e5bd9u32 {
142
                // x = -1.2943e-08
143
0
                return 1.;
144
0
            }
145
            // |x| < 2^-25
146
            // 10^x ~ 1 + log(10) * x
147
0
            if x_abs <= 0x32800000u32 {
148
0
                return backend.fmaf(x, f32::from_bits(0x40135da2), 1.0);
149
0
            }
150
0
        }
151
152
0
        let xd = x as f64;
153
154
        // Special polynomial for small x.
155
        // Generated by Sollya:
156
        // d = [-1/32, 1/32];
157
        // f_exp10f = (10^y - 1)/y;
158
        // Q = fpminimax(f_exp10f, 6, [|D...|], d, relative, floating);
159
160
        // See ./notes/exp10f_small.sollya
161
0
        let p = backend.polyeval7(
162
0
            xd,
163
0
            f64::from_bits(0x40026bb1bbb55516),
164
0
            f64::from_bits(0x40053524c73cfbf6),
165
0
            f64::from_bits(0x4000470591de0b07),
166
0
            f64::from_bits(0x3ff2bd760599f3a5),
167
0
            f64::from_bits(0x3fe142a001511a6f),
168
0
            f64::from_bits(0x3fca7feffa781d53),
169
0
            f64::from_bits(0x3fb16e53492c0f0e),
170
        );
171
0
        return backend.fma(p, xd, 1.) as f32;
172
0
    }
173
174
    // Range reduction: 10^x = 2^(mid + hi) * 10^lo
175
    //   rr = (2^(mid + hi), lo)
176
0
    let rr = exp_b_range_reduc(x, &backend);
177
178
    // The low part is approximated by a degree-5 minimax polynomial.
179
    // 10^lo ~ 1 + COEFFS[0] * lo + ... + COEFFS[4] * lo^5
180
0
    let lo2 = rr.lo * rr.lo;
181
    // c0 = 1 + COEFFS[0] * lo
182
0
    let c0 = backend.fma(rr.lo, f64::from_bits(EXP10F_COEFFS[0]), 1.0);
183
    // c1 = COEFFS[1] + COEFFS[2] * lo
184
0
    let c1 = backend.fma(
185
0
        rr.lo,
186
0
        f64::from_bits(EXP10F_COEFFS[2]),
187
0
        f64::from_bits(EXP10F_COEFFS[1]),
188
    );
189
    // c2 = COEFFS[3] + COEFFS[4] * lo
190
0
    let c2 = backend.fma(
191
0
        rr.lo,
192
0
        f64::from_bits(EXP10F_COEFFS[4]),
193
0
        f64::from_bits(EXP10F_COEFFS[3]),
194
    );
195
    // p = c1 + c2 * lo^2
196
    //   = COEFFS[1] + COEFFS[2] * lo + COEFFS[3] * lo^2 + COEFFS[4] * lo^3
197
0
    let p = backend.fma(lo2, c2, c1);
198
    // 10^lo ~ c0 + p * lo^2
199
    // 10^x = 2^(mid + hi) * 10^lo
200
    //      ~ mh * (c0 + p * lo^2)
201
    //      = (mh * c0) + p * (mh * lo^2)
202
0
    backend.fma(p, lo2 * rr.hi, c0 * rr.hi) as f32
203
0
}
Unexecuted instantiation: pxfm::exponents::exp10f::exp10f_gen::<pxfm::exponents::expf::FmaBackend>
Unexecuted instantiation: pxfm::exponents::exp10f::exp10f_gen::<pxfm::exponents::expf::GenericExpfBackend>
204
205
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
206
#[target_feature(enable = "avx", enable = "fma")]
207
0
unsafe fn exp10f_fma_impl(x: f32) -> f32 {
208
    use crate::exponents::expf::FmaBackend;
209
0
    exp10f_gen(x, FmaBackend {})
210
0
}
211
212
/// Computes exp10
213
///
214
/// Max found ULP 0.49999508
215
#[inline]
216
0
pub fn f_exp10f(x: f32) -> f32 {
217
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
218
    {
219
        exp10f_gen(x, GenericExpfBackend {})
220
    }
221
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
222
    {
223
        use std::sync::OnceLock;
224
        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
225
0
        let q = EXECUTOR.get_or_init(|| {
226
0
            if std::arch::is_x86_feature_detected!("avx")
227
0
                && std::arch::is_x86_feature_detected!("fma")
228
            {
229
0
                exp10f_fma_impl
230
            } else {
231
0
                fn def_exp10f(x: f32) -> f32 {
232
0
                    exp10f_gen(x, GenericExpfBackend {})
233
0
                }
234
0
                def_exp10f
235
            }
236
0
        });
Unexecuted instantiation: pxfm::exponents::exp10f::f_exp10f::{closure#0}
Unexecuted instantiation: pxfm::exponents::exp10f::f_exp10f::{closure#0}
237
0
        unsafe { q(x) }
238
    }
239
0
}
Unexecuted instantiation: pxfm::exponents::exp10f::f_exp10f
Unexecuted instantiation: pxfm::exponents::exp10f::f_exp10f
240
241
#[cfg(test)]
242
mod tests {
243
    use super::*;
244
245
    #[test]
246
    fn test_exp10f() {
247
        assert_eq!(f_exp10f(-1. / 64.), 0.9646616);
248
        assert_eq!(f_exp10f(1. / 64.), 1.0366329);
249
        assert_eq!(f_exp10f(1.), 10.0);
250
        assert_eq!(f_exp10f(2.), 100.0);
251
        assert_eq!(f_exp10f(3.), 1000.0);
252
        assert_eq!(f_exp10f(f32::INFINITY), f32::INFINITY);
253
        assert_eq!(f_exp10f(f32::NEG_INFINITY), 0.);
254
        assert!(f_exp10f(f32::NAN).is_nan());
255
    }
256
}