Coverage Report

Created: 2025-10-10 07:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/exponents/exp2f.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::common::{f_fmla, f_fmlaf, pow2if};
30
use crate::polyeval::f_polyeval6;
31
use crate::rounding::CpuRound;
32
use std::hint::black_box;
33
34
const TBLSIZE: usize = 64;
35
36
#[repr(align(64))]
37
struct Exp2Table([(u32, u32); TBLSIZE]);
38
39
#[rustfmt::skip]
40
static EXP2FT: Exp2Table = Exp2Table([(0x3F3504F3, 0xB2D4175E),(0x3F36FD92, 0x3268D5EF),(0x3F38FBAF, 0xB30E8719),(0x3F3AFF5B, 0x3319E7DA),(0x3F3D08A4, 0x333CD82F),(0x3F3F179A, 0x330E1902),(0x3F412C4D, 0x32CCF4D7),(0x3F4346CD, 0x328F330E),(0x3F45672A, 0xB201B5B7),(0x3F478D75, 0x32CCCE34),(0x3F49B9BE, 0x335E937C),(0x3F4BEC15, 0x2FF41909),(0x3F4E248C, 0xB21760EA),(0x3F506334, 0x3283628B),(0x3F52A81E, 0x3340F500),(0x3F54F35B, 0x331202BD),(0x3F5744FD, 0x32B66A3E),(0x3F599D16, 0x32D0D9B1),(0x3F5BFBB8, 0x332ED93F),(0x3F5E60F5, 0x3350A709),(0x3F60CCDF, 0x32025744),(0x3F633F89, 0xB33A7C4D),(0x3F65B907, 0x321DA4E9),(0x3F68396A, 0xB2FF36A7),(0x3F6AC0C7, 0x3217E40E),(0x3F6D4F30, 0xB2400CBB),(0x3F6FE4BA, 0x331A2ACC),(0x3F728177, 0xB2B7D3E5),(0x3F75257D, 0xB1FED2BE),(0x3F77D0DF, 0xB32B73BA),(0x3F7A83B3, 0x32579081),(0x3F7D3E0C, 0xB19726B5),(0x3F800000, 0x00000000),(0x3F8164D2, 0x320C09FB),(0x3F82CD87, 0x3391E031),(0x3F843A29, 0x33287EEF),(0x3F85AAC3, 0xB38F6665),(0x3F871F62, 0x339004AB),(0x3F88980F, 0x33AC4561),(0x3F8A14D5, 0xB39CDAEA),(0x3F8B95C2, 0x32949D5C),(0x3F8D1ADF, 0xB36F79FA),(0x3F8EA43A, 0x33971DC2),(0x3F9031DC, 0xB32BD022),(0x3F91C3D3, 0xB3928952),(0x3F935A2B, 0xB2EBFECF),(0x3F94F4F0, 0x3357B8BB),(0x3F96942D, 0xB307353B),(0x3F9837F0, 0xB345DFE9),(0x3F99E046, 0x3382A804),(0x3F9B8D3A, 0x3326993E),(0x3F9D3EDA, 0x3350A029),(0x3F9EF532, 0xB3605F62),(0x3FA0B051, 0xB210909B),(0x3FA27043, 0xB0DDC369),(0x3FA43516, 0x33385844),(0x3FA5FED7, 0x33400757),(0x3FA7CD94, 0x3325446E),(0x3FA9A15B, 0x33237A50),(0x3FAB7A3A, 0x33201CA4),(0x3FAD583F, 0x32394687),(0x3FAF3B79, 0x332E1225),(0x3FB123F6, 0x33838969),(0x3FB311C4, 0xB219F2BA)]);
41
42
/**
43
Generated by SageMath:
44
```python
45
print("[")
46
for k in range(64):
47
    k = RealField(150)(2)**(RealField(150)(k) / RealField(150)(64))
48
    print(double_to_hex(k) + ",")
49
print("];")
50
```
51
**/
52
pub(crate) static EXP2F_TABLE: [u64; 64] = [
53
    0x3ff0000000000000,
54
    0x3ff02c9a3e778061,
55
    0x3ff059b0d3158574,
56
    0x3ff0874518759bc8,
57
    0x3ff0b5586cf9890f,
58
    0x3ff0e3ec32d3d1a2,
59
    0x3ff11301d0125b51,
60
    0x3ff1429aaea92de0,
61
    0x3ff172b83c7d517b,
62
    0x3ff1a35beb6fcb75,
63
    0x3ff1d4873168b9aa,
64
    0x3ff2063b88628cd6,
65
    0x3ff2387a6e756238,
66
    0x3ff26b4565e27cdd,
67
    0x3ff29e9df51fdee1,
68
    0x3ff2d285a6e4030b,
69
    0x3ff306fe0a31b715,
70
    0x3ff33c08b26416ff,
71
    0x3ff371a7373aa9cb,
72
    0x3ff3a7db34e59ff7,
73
    0x3ff3dea64c123422,
74
    0x3ff4160a21f72e2a,
75
    0x3ff44e086061892d,
76
    0x3ff486a2b5c13cd0,
77
    0x3ff4bfdad5362a27,
78
    0x3ff4f9b2769d2ca7,
79
    0x3ff5342b569d4f82,
80
    0x3ff56f4736b527da,
81
    0x3ff5ab07dd485429,
82
    0x3ff5e76f15ad2148,
83
    0x3ff6247eb03a5585,
84
    0x3ff6623882552225,
85
    0x3ff6a09e667f3bcd,
86
    0x3ff6dfb23c651a2f,
87
    0x3ff71f75e8ec5f74,
88
    0x3ff75feb564267c9,
89
    0x3ff7a11473eb0187,
90
    0x3ff7e2f336cf4e62,
91
    0x3ff82589994cce13,
92
    0x3ff868d99b4492ed,
93
    0x3ff8ace5422aa0db,
94
    0x3ff8f1ae99157736,
95
    0x3ff93737b0cdc5e5,
96
    0x3ff97d829fde4e50,
97
    0x3ff9c49182a3f090,
98
    0x3ffa0c667b5de565,
99
    0x3ffa5503b23e255d,
100
    0x3ffa9e6b5579fdbf,
101
    0x3ffae89f995ad3ad,
102
    0x3ffb33a2b84f15fb,
103
    0x3ffb7f76f2fb5e47,
104
    0x3ffbcc1e904bc1d2,
105
    0x3ffc199bdd85529c,
106
    0x3ffc67f12e57d14b,
107
    0x3ffcb720dcef9069,
108
    0x3ffd072d4a07897c,
109
    0x3ffd5818dcfba487,
110
    0x3ffda9e603db3285,
111
    0x3ffdfc97337b9b5f,
112
    0x3ffe502ee78b3ff6,
113
    0x3ffea4afa2a490da,
114
    0x3ffefa1bee615a27,
115
    0x3fff50765b6e4540,
116
    0x3fffa7c1819e90d8,
117
];
118
119
/* ULP 0.508 method
120
  let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
121
122
  let ui = f32::to_bits(d + redux);
123
  let mut i0 = ui;
124
  i0 = i0.wrapping_add(TBLSIZE as u32 / 2);
125
  let k = i0 / TBLSIZE as u32;
126
  i0 &= TBLSIZE as u32 - 1;
127
  let mut uf = f32::from_bits(ui);
128
  uf -= redux;
129
130
  let item = EXP2FT.0[i0 as usize];
131
  let z0: f32 = f32::from_bits(item.0);
132
  let z1: f32 = f32::from_bits(item.1);
133
134
  let f: f32 = d - uf - z1;
135
136
  let mut u = 0.055504108664458832;
137
  u = f_fmlaf(u, f, 0.24022650695908768);
138
  u = f_fmlaf(u, f, 0.69314718055994973);
139
  u *= f;
140
141
  let i2 = pow2if(k as i32);
142
  f_fmlaf(u, z0, z0) * i2
143
*/
144
145
/// Computing exp2f
146
///
147
/// ULP 0.4999994
148
#[inline]
149
0
pub fn f_exp2f(x: f32) -> f32 {
150
0
    let mut t = x.to_bits();
151
152
0
    if (t & 0xffff) == 0 {
153
        // x maybe integer
154
0
        let k: i32 = (((t >> 23) & 0xff) as i32).wrapping_sub(127); // 2^k <= |x| < 2^(k+1)
155
0
        if k >= 0 && k < 9 && (t << (9i32.wrapping_add(k))) == 0 {
156
            // x integer, with 1 <= |x| < 2^9
157
0
            let msk = (t as i32) >> 31;
158
0
            let mut m: i32 = (((t & 0x7fffff) | (1 << 23)) >> (23 - k)) as i32;
159
0
            m = (m ^ msk).wrapping_sub(msk).wrapping_add(127);
160
0
            if m > 0 && m < 255 {
161
0
                t = (m as u32).wrapping_shl(23);
162
0
                return f32::from_bits(t);
163
0
            } else if m <= 0 && m > -23 {
164
0
                t = 1i32.wrapping_shl(22i32.wrapping_add(m) as u32) as u32;
165
0
                return f32::from_bits(t);
166
0
            }
167
0
        }
168
0
    }
169
0
    let ux = t.wrapping_shl(1);
170
171
0
    if ux >= 0x86000000u32 || ux < 0x65000000u32 {
172
        // |x| >= 128 or x=nan or |x| < 0x1p-26
173
0
        if ux < 0x65000000u32 {
174
0
            return 1.0 + x;
175
0
        } // |x| < 0x1p-26
176
        // if x < -149 or 128 <= x is special
177
0
        if !(t >= 0xc3000000u32 && t < 0xc3150000u32) {
178
0
            if ux >= 0xffu32 << 24 {
179
                // x is inf or nan
180
0
                if ux > (0xffu32 << 24) {
181
0
                    return x + x;
182
0
                } // x = nan
183
                static IR: [f32; 2] = [f32::INFINITY, 0.];
184
0
                return IR[(t >> 31) as usize]; // x = +-inf
185
0
            }
186
0
            if t >= 0xc3150000u32 {
187
                // x < -149
188
0
                let z = x;
189
0
                let mut y = f_fmla(
190
0
                    z as f64 + 149.,
191
0
                    f64::from_bits(0x3690000000000000),
192
0
                    f64::from_bits(0x36a0000000000000),
193
                );
194
0
                y = y.max(f64::from_bits(0x3680000000000000));
195
0
                return y as f32;
196
0
            }
197
            // now x >= 128
198
0
            let r = black_box(f64::from_bits(0x47e0000000000000))
199
0
                * black_box(f64::from_bits(0x47e0000000000000));
200
0
            return r as f32;
201
0
        }
202
0
    }
203
204
0
    if ux <= 0x7a000000u32 {
205
        // |x| < 1/32
206
207
        // Generated by Sollya exp2 on range [-1/32;1/32]:
208
        // d = [-1/32, 1/32];
209
        // f_exp2f = (2^y - 1)/y;
210
        // Q = fpminimax(f_exp2f, 5, [|D...|], d, relative, floating);
211
212
        // See ./notes/exp2f_small.sollya
213
        const C: [u64; 6] = [
214
            0x3fe62e42fefa39f3,
215
            0x3fcebfbdff82c57b,
216
            0x3fac6b08d6f2d7aa,
217
            0x3f83b2ab6fc92f5d,
218
            0x3f55d897cfe27125,
219
            0x3f243090e61e6af1,
220
        ];
221
0
        let xd = x as f64;
222
0
        let p = f_polyeval6(
223
0
            xd,
224
0
            f64::from_bits(C[0]),
225
0
            f64::from_bits(C[1]),
226
0
            f64::from_bits(C[2]),
227
0
            f64::from_bits(C[3]),
228
0
            f64::from_bits(C[4]),
229
0
            f64::from_bits(C[5]),
230
        );
231
0
        return f_fmla(p, xd, 1.) as f32;
232
0
    }
233
234
0
    let x_d = x as f64;
235
0
    let kf = (x_d * 64.).cpu_round();
236
0
    let k = unsafe { kf.to_int_unchecked::<i32>() }; // it's already not indeterminate.
237
    // dx = lo = x - (hi + mid) = x - kf * 2^(-6)
238
0
    let dx = f_fmla(f64::from_bits(0xbf90000000000000), kf, x_d);
239
240
    const TABLE_BITS: u32 = 6;
241
    const TABLE_MASK: u64 = (1u64 << TABLE_BITS) - 1;
242
243
    // hi = floor(kf * 2^(-5))
244
    // exp_hi = shift hi to the exponent field of double precision.
245
0
    let exp_hi: i64 = ((k >> TABLE_BITS) as i64).wrapping_shl(52);
246
247
    // mh = 2^hi * 2^mid
248
    // mh_bits = bit field of mh
249
0
    let mh_bits = (EXP2F_TABLE[((k as u64) & TABLE_MASK) as usize] as i64).wrapping_add(exp_hi);
250
0
    let mh = f64::from_bits(mh_bits as u64);
251
252
    // Degree-4 polynomial approximating (2^x - 1)/x generated by Sollya with:
253
    // > P = fpminimax((2^y - 1)/y, 4, [|D...|], [-1/64. 1/64]);
254
    // see ./notes/exp2f.sollya
255
    const C: [u64; 5] = [
256
        0x3fe62e42fefa39ef,
257
        0x3fcebfbdff8131c4,
258
        0x3fac6b08d7061695,
259
        0x3f83b2b1bee74b2a,
260
        0x3f55d88091198529,
261
    ];
262
0
    let dx_sq = dx * dx;
263
0
    let c1 = f_fmla(dx, f64::from_bits(C[0]), 1.0);
264
0
    let c2 = f_fmla(dx, f64::from_bits(C[2]), f64::from_bits(C[1]));
265
0
    let c3 = f_fmla(dx, f64::from_bits(C[4]), f64::from_bits(C[3]));
266
0
    let p = f_fmla(dx_sq, c3, c2);
267
    // 2^x = 2^(hi + mid + lo)
268
    //     = 2^(hi + mid) * 2^lo
269
    //     ~ mh * (1 + lo * P(lo))
270
    //     = mh + (mh*lo) * P(lo)
271
0
    f_fmla(p, dx_sq * mh, c1 * mh) as f32
272
0
}
273
274
#[inline]
275
0
pub(crate) fn dirty_exp2f(d: f32) -> f32 {
276
0
    let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
277
278
0
    let ui = f32::to_bits(d + redux);
279
0
    let mut i0 = ui;
280
0
    i0 = i0.wrapping_add(TBLSIZE as u32 / 2);
281
0
    let k = i0 / TBLSIZE as u32;
282
0
    i0 &= TBLSIZE as u32 - 1;
283
0
    let mut uf = f32::from_bits(ui);
284
0
    uf -= redux;
285
286
0
    let item = EXP2FT.0[i0 as usize];
287
0
    let z0: f32 = f32::from_bits(item.0);
288
289
0
    let f: f32 = d - uf;
290
291
0
    let mut u = 0.24022650695908768;
292
0
    u = f_fmlaf(u, f, 0.69314718055994973);
293
0
    u *= f;
294
295
0
    let i2 = pow2if(k as i32);
296
0
    f_fmlaf(u, z0, z0) * i2
297
0
}
Unexecuted instantiation: pxfm::exponents::exp2f::dirty_exp2f
Unexecuted instantiation: pxfm::exponents::exp2f::dirty_exp2f
298
299
#[cfg(test)]
300
mod tests {
301
    use super::*;
302
303
    #[test]
304
    fn test_exp2f() {
305
        assert_eq!(f_exp2f(1. / 64.), 1.0108893);
306
        assert_eq!(f_exp2f(2.0), 4.0);
307
        assert_eq!(f_exp2f(3.0), 8.0);
308
        assert_eq!(f_exp2f(4.0), 16.0);
309
        assert_eq!(f_exp2f(10.0), 1024.0);
310
        assert_eq!(f_exp2f(-10.0), 0.0009765625);
311
        assert!(f_exp2f(f32::NAN).is_nan());
312
        assert_eq!(f_exp2f(-0.35), 0.7845841);
313
        assert_eq!(f_exp2f(0.35), 1.2745606);
314
        assert!(f_exp2f(f32::INFINITY).is_infinite());
315
        assert_eq!(f_exp2f(f32::NEG_INFINITY), 0.0);
316
    }
317
318
    #[test]
319
    fn test_dirty_exp2f() {
320
        assert!((dirty_exp2f(0.35f32) - 0.35f32.exp2()).abs() < 1e-5);
321
        assert!((dirty_exp2f(-0.6f32) - (-0.6f32).exp2()).abs() < 1e-5);
322
    }
323
}