Coverage Report

Created: 2025-12-20 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/exponents/exp2.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::common::dd_fmla;
30
use crate::double_double::DoubleDouble;
31
use crate::exponents::auxiliary::fast_ldexp;
32
use crate::exponents::exp::{EXP_REDUCE_T0, EXP_REDUCE_T1, to_denormal};
33
use crate::exponents::expf::{ExpfBackend, GenericExpfBackend};
34
use crate::rounding::CpuRoundTiesEven;
35
36
#[inline]
37
0
fn exp2_poly_dd(z: f64) -> DoubleDouble {
38
    const C: [(u64, u64); 6] = [
39
        (0x3bbabc9e3b39873e, 0x3f262e42fefa39ef),
40
        (0xbae5e43a53e44950, 0x3e4ebfbdff82c58f),
41
        (0xba0d3a15710d3d83, 0x3d6c6b08d704a0c0),
42
        (0x3914dd5d2a5e025a, 0x3c83b2ab6fba4e77),
43
        (0xb83dc47e47beb9dd, 0x3b95d87fe7a66459),
44
        (0xb744fcd51fcb7640, 0x3aa430912f9fb79d),
45
    ];
46
47
0
    let mut r = DoubleDouble::quick_mul_f64_add(
48
0
        DoubleDouble::from_bit_pair(C[5]),
49
0
        z,
50
0
        DoubleDouble::from_bit_pair(C[4]),
51
    );
52
0
    r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[3]));
53
0
    r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[2]));
54
0
    r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[1]));
55
0
    DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[0]))
56
0
}
57
58
#[cold]
59
0
fn exp2_accurate(x: f64) -> f64 {
60
0
    let mut ix = x.to_bits();
61
0
    let sx = 4096.0 * x;
62
0
    let fx = sx.cpu_round_ties_even();
63
0
    let z = sx - fx;
64
0
    let k: i64 = unsafe {
65
0
        fx.to_int_unchecked::<i64>() // this is already finite here
66
    };
67
0
    let i1 = k & 0x3f;
68
0
    let i0 = (k >> 6) & 0x3f;
69
0
    let ie = k >> 12;
70
71
0
    let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
72
0
    let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
73
0
    let dt = DoubleDouble::quick_mult(t0, t1);
74
75
0
    let mut f = exp2_poly_dd(z);
76
0
    f = DoubleDouble::quick_mult_f64(f, z);
77
0
    if ix <= 0xc08ff00000000000u64 {
78
        // x >= -1022
79
        // for -0x1.71547652b82fep-54 <= x <= 0x1.71547652b82fdp-53,
80
        // exp2(x) round to x to nearest
81
0
        if f64::from_bits(0xbc971547652b82fe) <= x && x <= f64::from_bits(0x3ca71547652b82fd) {
82
0
            return dd_fmla(x, 0.5, 1.0);
83
0
        } else if (k & 0xfff) == 0 {
84
            // 4096*x rounds to 4096*integer
85
0
            let zf = DoubleDouble::from_exact_add(dt.hi, f.hi);
86
0
            let zfl = DoubleDouble::from_exact_add(zf.lo, f.lo);
87
0
            f.hi = zf.hi;
88
0
            f.lo = zfl.hi;
89
0
            ix = zfl.hi.to_bits();
90
0
            if ix & 0x000fffffffffffff == 0 {
91
                // fl is a power of 2
92
0
                if ((ix >> 52) & 0x7ff) != 0 {
93
0
                    // |fl| is Inf
94
0
                    let v = zfl.lo.to_bits();
95
0
                    let d: i64 = ((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64)
96
0
                        .wrapping_shl(1)
97
0
                        .wrapping_add(1) as i64;
98
0
                    ix = ix.wrapping_add(d as u64);
99
0
                    f.lo = f64::from_bits(ix);
100
0
                }
101
0
            }
102
0
        } else {
103
0
            f = DoubleDouble::quick_mult(f, dt);
104
0
            f = DoubleDouble::add(dt, f);
105
0
        }
106
0
        let hf = DoubleDouble::from_exact_add(f.hi, f.lo);
107
108
0
        fast_ldexp(hf.hi, ie as i32)
109
    } else {
110
0
        ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
111
0
        f = DoubleDouble::quick_mult(f, dt);
112
0
        f = DoubleDouble::add(dt, f);
113
0
        let zve = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
114
0
        f.hi = zve.hi;
115
0
        f.lo += zve.lo;
116
117
0
        to_denormal(f.to_f64())
118
    }
119
0
}
120
121
#[inline(always)]
122
0
fn exp2_gen<B: ExpfBackend>(x: f64, backend: B) -> f64 {
123
0
    let mut ix = x.to_bits();
124
0
    let ax = ix.wrapping_shl(1);
125
0
    if ax == 0 {
126
0
        return 1.0;
127
0
    }
128
0
    if ax >= 0x8120000000000000u64 {
129
        // |x| >= 1024
130
0
        if ax > 0xffe0000000000000u64 {
131
0
            return x + x; // nan
132
0
        }
133
0
        if ax == 0xffe0000000000000u64 {
134
0
            return if (ix >> 63) != 0 { 0.0 } else { x };
135
0
        }
136
        // +/-inf
137
0
        if (ix >> 63) != 0 {
138
            // x <= -1024
139
0
            if ix >= 0xc090cc0000000000u64 {
140
                // x <= -1075
141
                const Z: f64 = f64::from_bits(0x0010000000000000);
142
0
                return Z * Z;
143
0
            }
144
        } else {
145
            // x >= 1024
146
0
            return f64::from_bits(0x7fe0000000000000) * x;
147
        }
148
0
    }
149
150
    // for |x| <= 0x1.71547652b82fep-54, 2^x rounds to 1 to nearest
151
    // this avoids a spurious underflow in z*z below
152
0
    if ax <= 0x792e2a8eca5705fcu64 {
153
0
        return 1.0 + f64::copysign(f64::from_bits(0x3c90000000000000), x);
154
0
    }
155
156
0
    let m = ix.wrapping_shl(12);
157
0
    let ex = (ax >> 53).wrapping_sub(0x3ff);
158
0
    let frac = ex >> 63 | m << (ex & 63);
159
0
    let sx = 4096.0 * x;
160
0
    let fx = backend.round_ties_even(sx);
161
0
    let z = sx - fx;
162
0
    let z2 = z * z;
163
0
    let k = unsafe {
164
0
        fx.to_int_unchecked::<i64>() // this already finite here
165
    };
166
0
    let i1 = k & 0x3f;
167
0
    let i0 = (k >> 6) & 0x3f;
168
0
    let ie = k >> 12;
169
0
    let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
170
0
    let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
171
0
    let ti0 = backend.quick_mult(t0, t1);
172
    const C: [u64; 4] = [
173
        0x3f262e42fefa39ef,
174
        0x3e4ebfbdff82c58f,
175
        0x3d6c6b08d73b3e01,
176
        0x3c83b2ab6fdda001,
177
    ];
178
0
    let tz = ti0.hi * z;
179
0
    let mut fh = ti0.hi;
180
181
0
    let p0 = backend.fma(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
182
0
    let p1 = backend.fma(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
183
0
    let p2 = backend.fma(z2, p1, p0);
184
185
0
    let mut fl = backend.fma(tz, p2, ti0.lo);
186
187
    const EPS: f64 = f64::from_bits(0x3c0833beace2b6fe);
188
189
0
    if ix <= 0xc08ff00000000000u64 {
190
        // x >= -1022
191
0
        if frac != 0 {
192
0
            let ub = fh + (fl + EPS);
193
0
            fh += fl - EPS;
194
0
            if ub != fh {
195
0
                return exp2_accurate(x);
196
0
            }
197
0
        }
198
0
        fh = fast_ldexp(fh, ie as i32);
199
    } else {
200
        // subnormal case
201
0
        ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
202
0
        let rs = DoubleDouble::from_exact_add(f64::from_bits(ix), fh);
203
0
        fl += rs.lo;
204
0
        fh = rs.hi;
205
0
        if frac != 0 {
206
0
            let ub = fh + (fl + EPS);
207
0
            fh += fl - EPS;
208
0
            if ub != fh {
209
0
                return exp2_accurate(x);
210
0
            }
211
0
        }
212
        // when 2^x is exact, no underflow should be raised
213
0
        fh = to_denormal(fh);
214
    }
215
0
    fh
216
0
}
Unexecuted instantiation: pxfm::exponents::exp2::exp2_gen::<pxfm::exponents::expf::FmaBackend>
Unexecuted instantiation: pxfm::exponents::exp2::exp2_gen::<pxfm::exponents::expf::GenericExpfBackend>
217
218
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
219
#[target_feature(enable = "avx", enable = "fma")]
220
0
unsafe fn exp2_fma_impl(x: f64) -> f64 {
221
    use crate::exponents::expf::FmaBackend;
222
0
    exp2_gen(x, FmaBackend {})
223
0
}
224
225
/// Computes exp2
226
///
227
/// Max found ULP 0.5
228
0
pub fn f_exp2(x: f64) -> f64 {
229
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
230
    {
231
        exp2_gen(x, GenericExpfBackend {})
232
    }
233
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
234
    {
235
        use std::sync::OnceLock;
236
        static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
237
0
        let q = EXECUTOR.get_or_init(|| {
238
0
            if std::arch::is_x86_feature_detected!("avx")
239
0
                && std::arch::is_x86_feature_detected!("fma")
240
            {
241
0
                exp2_fma_impl
242
            } else {
243
0
                fn def_exp2(x: f64) -> f64 {
244
0
                    exp2_gen(x, GenericExpfBackend {})
245
0
                }
246
0
                def_exp2
247
            }
248
0
        });
249
0
        unsafe { q(x) }
250
    }
251
0
}
252
253
#[cfg(test)]
254
mod tests {
255
    use super::*;
256
257
    #[test]
258
    fn test_exp2d() {
259
        assert_eq!(f_exp2(2.0), 4.0);
260
        assert_eq!(f_exp2(3.0), 8.0);
261
        assert_eq!(f_exp2(4.0), 16.0);
262
        assert_eq!(f_exp2(0.35f64), 1.2745606273192622);
263
        assert_eq!(f_exp2(-0.6f64), 0.6597539553864471);
264
        assert_eq!(f_exp2(f64::INFINITY), f64::INFINITY);
265
        assert_eq!(f_exp2(f64::NEG_INFINITY), 0.);
266
        assert!(f_exp2(f64::NAN).is_nan());
267
    }
268
}