Coverage Report

Created: 2026-03-20 07:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.28/src/logs/log2f.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::common::{f_fmla, f_fmlaf, set_exponent_f32};
30
use crate::logs::logf::{GenLogfBackend, LogfBackend};
31
use crate::polyeval::f_polyeval3;
32
33
pub(crate) static LOG2_R: [u64; 128] = [
34
    0x0000000000000000,
35
    0x3f872c7ba20f7327,
36
    0x3f9743ee861f3556,
37
    0x3fa184b8e4c56af8,
38
    0x3fa77394c9d958d5,
39
    0x3fad6ebd1f1febfe,
40
    0x3fb1bb32a600549d,
41
    0x3fb4c560fe68af88,
42
    0x3fb7d60496cfbb4c,
43
    0x3fb960caf9abb7ca,
44
    0x3fbc7b528b70f1c5,
45
    0x3fbf9c95dc1d1165,
46
    0x3fc097e38ce60649,
47
    0x3fc22dadc2ab3497,
48
    0x3fc3c6fb650cde51,
49
    0x3fc494f863b8df35,
50
    0x3fc633a8bf437ce1,
51
    0x3fc7046031c79f85,
52
    0x3fc8a8980abfbd32,
53
    0x3fc97c1cb13c7ec1,
54
    0x3fcb2602497d5346,
55
    0x3fcbfc67a7fff4cc,
56
    0x3fcdac22d3e441d3,
57
    0x3fce857d3d361368,
58
    0x3fd01d9bbcfa61d4,
59
    0x3fd08bce0d95fa38,
60
    0x3fd169c05363f158,
61
    0x3fd1d982c9d52708,
62
    0x3fd249cd2b13cd6c,
63
    0x3fd32bfee370ee68,
64
    0x3fd39de8e1559f6f,
65
    0x3fd4106017c3eca3,
66
    0x3fd4f6fbb2cec598,
67
    0x3fd56b22e6b578e5,
68
    0x3fd5dfdcf1eeae0e,
69
    0x3fd6552b49986277,
70
    0x3fd6cb0f6865c8ea,
71
    0x3fd7b89f02cf2aad,
72
    0x3fd8304d90c11fd3,
73
    0x3fd8a8980abfbd32,
74
    0x3fd921800924dd3b,
75
    0x3fd99b072a96c6b2,
76
    0x3fda8ff971810a5e,
77
    0x3fdb0b67f4f46810,
78
    0x3fdb877c57b1b070,
79
    0x3fdc043859e2fdb3,
80
    0x3fdc819dc2d45fe4,
81
    0x3fdcffae611ad12b,
82
    0x3fdd7e6c0abc3579,
83
    0x3fddfdd89d586e2b,
84
    0x3fde7df5fe538ab3,
85
    0x3fdefec61b011f85,
86
    0x3fdf804ae8d0cd02,
87
    0x3fe0014332be0033,
88
    0x3fe042bd4b9a7c99,
89
    0x3fe08494c66b8ef0,
90
    0x3fe0c6caaf0c5597,
91
    0x3fe1096015dee4da,
92
    0x3fe14c560fe68af9,
93
    0x3fe18fadb6e2d3c2,
94
    0x3fe1d368296b5255,
95
    0x3fe217868b0c37e8,
96
    0x3fe25c0a0463beb0,
97
    0x3fe2a0f3c340705c,
98
    0x3fe2e644fac04fd8,
99
    0x3fe2e644fac04fd8,
100
    0x3fe32bfee370ee68,
101
    0x3fe37222bb70747c,
102
    0x3fe3b8b1c68fa6ed,
103
    0x3fe3ffad4e74f1d6,
104
    0x3fe44716a2c08262,
105
    0x3fe44716a2c08262,
106
    0x3fe48eef19317991,
107
    0x3fe4d7380dcc422d,
108
    0x3fe51ff2e30214bc,
109
    0x3fe5692101d9b4a6,
110
    0x3fe5b2c3da19723b,
111
    0x3fe5b2c3da19723b,
112
    0x3fe5fcdce2727ddb,
113
    0x3fe6476d98ad990a,
114
    0x3fe6927781d932a8,
115
    0x3fe6927781d932a8,
116
    0x3fe6ddfc2a78fc63,
117
    0x3fe729fd26b707c8,
118
    0x3fe7767c12967a45,
119
    0x3fe7767c12967a45,
120
    0x3fe7c37a9227e7fb,
121
    0x3fe810fa51bf65fd,
122
    0x3fe810fa51bf65fd,
123
    0x3fe85efd062c656d,
124
    0x3fe8ad846cf369a4,
125
    0x3fe8ad846cf369a4,
126
    0x3fe8fc924c89ac84,
127
    0x3fe94c287492c4db,
128
    0x3fe94c287492c4db,
129
    0x3fe99c48be2063c8,
130
    0x3fe9ecf50bf43f13,
131
    0x3fe9ecf50bf43f13,
132
    0x3fea3e2f4ac43f60,
133
    0x3fea8ff971810a5e,
134
    0x3fea8ff971810a5e,
135
    0x3feae255819f022d,
136
    0x3feb35458761d479,
137
    0x3feb35458761d479,
138
    0x3feb88cb9a2ab521,
139
    0x3feb88cb9a2ab521,
140
    0x3febdce9dcc96187,
141
    0x3fec31a27dd00b4a,
142
    0x3fec31a27dd00b4a,
143
    0x3fec86f7b7ea4a89,
144
    0x3fec86f7b7ea4a89,
145
    0x3fecdcebd2373995,
146
    0x3fed338120a6dd9d,
147
    0x3fed338120a6dd9d,
148
    0x3fed8aba045b01c8,
149
    0x3fed8aba045b01c8,
150
    0x3fede298ec0bac0d,
151
    0x3fede298ec0bac0d,
152
    0x3fee3b20546f554a,
153
    0x3fee3b20546f554a,
154
    0x3fee9452c8a71028,
155
    0x3fee9452c8a71028,
156
    0x3feeee32e2aeccbf,
157
    0x3feeee32e2aeccbf,
158
    0x3fef48c34bd1e96f,
159
    0x3fef48c34bd1e96f,
160
    0x3fefa406bd2443df,
161
    0x3ff0000000000000,
162
];
163
164
#[inline(always)]
165
0
fn log2f_gen<B: LogfBackend>(x: f32, backend: B) -> f32 {
166
0
    let mut x_u = x.to_bits();
167
168
    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
169
170
0
    let mut m = -(E_BIAS as i32);
171
0
    if x_u < f32::MIN_POSITIVE.to_bits() || x_u > f32::MAX.to_bits() {
172
0
        if x == 0.0 {
173
0
            return f32::NEG_INFINITY;
174
0
        }
175
0
        if x_u == 0x80000000u32 {
176
0
            return f32::NEG_INFINITY;
177
0
        }
178
0
        if x.is_sign_negative() && !x.is_nan() {
179
0
            return f32::NAN + x;
180
0
        }
181
        // x is +inf or nan
182
0
        if x.is_nan() || x.is_infinite() {
183
0
            return x + x;
184
0
        }
185
        // Normalize denormal inputs.
186
0
        x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
187
0
        m -= 23;
188
0
    }
189
190
0
    m = m.wrapping_add(x_u.wrapping_shr(23) as i32);
191
0
    let mant = x_u & 0x007F_FFFF;
192
0
    let index = mant.wrapping_shr(16);
193
194
0
    x_u = set_exponent_f32(x_u, 0x7F);
195
196
0
    let u = f32::from_bits(x_u);
197
198
0
    let v = if B::HAS_FMA {
199
        use crate::logs::logf::LOG_REDUCTION_F32;
200
0
        backend.fmaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64 // Exact.
201
    } else {
202
        use crate::logs::LOG_RANGE_REDUCTION;
203
0
        backend.fma(
204
0
            u as f64,
205
0
            f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
206
            -1.0,
207
        ) // Exact
208
    };
209
    // Degree-5 polynomial approximation of log2 generated by Sollya with:
210
    // > P = fpminimax(log2(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]);
211
212
0
    let extra_factor = m as f64 + f64::from_bits(LOG2_R[index as usize]);
213
214
    const COEFFS: [u64; 5] = [
215
        0x3ff71547652b8133,
216
        0xbfe71547652d1e33,
217
        0x3fdec70a098473de,
218
        0xbfd7154c5ccdf121,
219
        0x3fd2514fd90a130a,
220
    ];
221
0
    let v2 = v * v; // Exact
222
0
    let c0 = backend.fma(v, f64::from_bits(COEFFS[0]), extra_factor);
223
0
    let c1 = backend.fma(v, f64::from_bits(COEFFS[2]), f64::from_bits(COEFFS[1]));
224
0
    let c2 = backend.fma(v, f64::from_bits(COEFFS[4]), f64::from_bits(COEFFS[3]));
225
226
0
    let r = backend.polyeval3(v2, c0, c1, c2);
227
0
    r as f32
228
0
}
Unexecuted instantiation: pxfm::logs::log2f::log2f_gen::<pxfm::logs::logf::FmaLogfBackend>
Unexecuted instantiation: pxfm::logs::log2f::log2f_gen::<pxfm::logs::logf::GenLogfBackend>
229
230
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
231
#[target_feature(enable = "avx", enable = "fma")]
232
0
unsafe fn log2f_fma_impl(x: f32) -> f32 {
233
    use crate::logs::logf::FmaLogfBackend;
234
0
    log2f_gen(x, FmaLogfBackend {})
235
0
}
236
237
/// Logarithm of base 2
238
///
239
/// Max found ULP 0.4999996
240
0
pub fn f_log2f(x: f32) -> f32 {
241
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
242
    {
243
        log2f_gen(x, GenLogfBackend {})
244
    }
245
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
246
    {
247
        use std::sync::OnceLock;
248
        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
249
0
        let q = EXECUTOR.get_or_init(|| {
250
0
            if std::arch::is_x86_feature_detected!("avx")
251
0
                && std::arch::is_x86_feature_detected!("fma")
252
            {
253
0
                log2f_fma_impl
254
            } else {
255
0
                fn def_log2f(x: f32) -> f32 {
256
0
                    log2f_gen(x, GenLogfBackend {})
257
0
                }
258
0
                def_log2f
259
            }
260
0
        });
261
0
        unsafe { q(x) }
262
    }
263
0
}
264
265
/// Natural logarithm using FMA
266
///
267
/// Max found ULP 0.4999996
268
#[inline(always)]
269
#[allow(dead_code)]
270
0
pub(crate) fn f_log2fx(x: f32) -> f64 {
271
0
    let mut x_u = x.to_bits();
272
273
    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
274
275
0
    let mut m = -(E_BIAS as i32);
276
0
    if x_u == 0x3f80_0000u32 {
277
0
        return 0.0;
278
0
    }
279
280
0
    if x_u < f32::MIN_POSITIVE.to_bits() || x_u > f32::MAX.to_bits() {
281
0
        if x == 0.0 {
282
0
            return f64::NEG_INFINITY;
283
0
        }
284
0
        if x_u == 0x80000000u32 {
285
0
            return f64::NEG_INFINITY;
286
0
        }
287
0
        if x.is_sign_negative() && !x.is_nan() {
288
0
            return f64::NAN + x as f64;
289
0
        }
290
        // x is +inf or nan
291
0
        if x.is_nan() || x.is_infinite() {
292
0
            return (x + x) as f64;
293
0
        }
294
        // Normalize denormal inputs.
295
0
        x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
296
0
        m -= 23;
297
0
    }
298
299
0
    m = m.wrapping_add(x_u.wrapping_shr(23) as i32);
300
0
    let mant = x_u & 0x007F_FFFF;
301
0
    let index = mant.wrapping_shr(16);
302
303
0
    x_u = set_exponent_f32(x_u, 0x7F);
304
305
    let v;
306
0
    let u = f32::from_bits(x_u);
307
308
    #[cfg(any(
309
        all(
310
            any(target_arch = "x86", target_arch = "x86_64"),
311
            target_feature = "fma"
312
        ),
313
        target_arch = "aarch64"
314
    ))]
315
    {
316
        use crate::logs::logf::LOG_REDUCTION_F32;
317
        v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; // Exact.
318
    }
319
    #[cfg(not(any(
320
        all(
321
            any(target_arch = "x86", target_arch = "x86_64"),
322
            target_feature = "fma"
323
        ),
324
        target_arch = "aarch64"
325
    )))]
326
    {
327
        use crate::logs::log2::LOG_RANGE_REDUCTION;
328
0
        v = f_fmla(
329
0
            u as f64,
330
0
            f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
331
0
            -1.0,
332
0
        ); // Exact
333
    }
334
    // Degree-5 polynomial approximation of log2 generated by Sollya with:
335
    // > P = fpminimax(log2(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]);
336
337
0
    let extra_factor = m as f64 + f64::from_bits(LOG2_R[index as usize]);
338
339
    const COEFFS: [u64; 5] = [
340
        0x3ff71547652b8133,
341
        0xbfe71547652d1e33,
342
        0x3fdec70a098473de,
343
        0xbfd7154c5ccdf121,
344
        0x3fd2514fd90a130a,
345
    ];
346
0
    let v2 = v * v; // Exact
347
0
    let c0 = f_fmla(v, f64::from_bits(COEFFS[0]), extra_factor);
348
0
    let c1 = f_fmla(v, f64::from_bits(COEFFS[2]), f64::from_bits(COEFFS[1]));
349
0
    let c2 = f_fmla(v, f64::from_bits(COEFFS[4]), f64::from_bits(COEFFS[3]));
350
351
0
    f_polyeval3(v2, c0, c1, c2)
352
0
}
353
354
/// Dirty log2 using FMA
355
#[inline(always)]
356
#[allow(dead_code)]
357
0
pub(crate) fn dirty_log2f(d: f32) -> f32 {
358
0
    let mut ix = d.to_bits();
359
    /* reduce x into [sqrt(2)/2, sqrt(2)] */
360
0
    ix = ix.wrapping_add(0x3f800000 - 0x3f3504f3);
361
0
    let n = (ix >> 23) as i32 - 0x7f;
362
0
    ix = (ix & 0x007fffff).wrapping_add(0x3f3504f3);
363
0
    let a = f32::from_bits(ix);
364
365
0
    let x = (a - 1.) / (a + 1.);
366
367
0
    let x2 = x * x;
368
0
    let mut u = 0.4121985850084821691e+0;
369
0
    u = f_fmlaf(u, x2, 0.5770780163490337802e+0);
370
0
    u = f_fmlaf(u, x2, 0.9617966939259845749e+0);
371
0
    f_fmlaf(x2 * x, u, f_fmlaf(x, 0.2885390081777926802e+1, n as f32))
372
0
}
373
374
#[cfg(test)]
375
mod tests {
376
    use super::*;
377
378
    #[test]
379
    fn test_log2f() {
380
        assert!((f_log2f(0.35f32) - 0.35f32.log2()).abs() < 1e-5);
381
        assert!((f_log2f(0.9f32) - 0.9f32.log2()).abs() < 1e-5);
382
        assert_eq!(f_log2f(0.), f32::NEG_INFINITY);
383
        assert_eq!(f_log2f(1.0), 0.0);
384
        assert!(f_log2f(-1.).is_nan());
385
        assert!(f_log2f(f32::NAN).is_nan());
386
        assert!(f_log2f(f32::NEG_INFINITY).is_nan());
387
        assert_eq!(f_log2f(f32::INFINITY), f32::INFINITY);
388
    }
389
390
    #[test]
391
    fn test_dirty_log2f() {
392
        assert!((dirty_log2f(0.35f32) - 0.35f32.log2()).abs() < 1e-5);
393
        assert!((dirty_log2f(0.9f32) - 0.9f32.log2()).abs() < 1e-5);
394
    }
395
}