Coverage Report

Created: 2025-10-14 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/logs/logf.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::bits::min_normal_f32;
30
use crate::common::*;
31
use crate::polyeval::f_polyeval3;
32
use std::hint::black_box;
33
34
#[repr(C, align(8))]
35
pub(crate) struct LogReductionF32Aligned(pub(crate) [u32; 128]);
36
37
pub(crate) static LOG_REDUCTION_F32: LogReductionF32Aligned = LogReductionF32Aligned([
38
    0x3f800000, 0x3f7e0000, 0x3f7c0000, 0x3f7a0000, 0x3f780000, 0x3f760000, 0x3f740000, 0x3f720000,
39
    0x3f700000, 0x3f6f0000, 0x3f6d0000, 0x3f6b0000, 0x3f6a0000, 0x3f680000, 0x3f660000, 0x3f650000,
40
    0x3f630000, 0x3f620000, 0x3f600000, 0x3f5f0000, 0x3f5d0000, 0x3f5c0000, 0x3f5a0000, 0x3f590000,
41
    0x3f570000, 0x3f560000, 0x3f540000, 0x3f530000, 0x3f520000, 0x3f500000, 0x3f4f0000, 0x3f4e0000,
42
    0x3f4c0000, 0x3f4b0000, 0x3f4a0000, 0x3f490000, 0x3f480000, 0x3f460000, 0x3f450000, 0x3f440000,
43
    0x3f430000, 0x3f420000, 0x3f400000, 0x3f3f0000, 0x3f3e0000, 0x3f3d0000, 0x3f3c0000, 0x3f3b0000,
44
    0x3f3a0000, 0x3f390000, 0x3f380000, 0x3f370000, 0x3f360000, 0x3f350000, 0x3f340000, 0x3f330000,
45
    0x3f320000, 0x3f310000, 0x3f300000, 0x3f2f0000, 0x3f2e0000, 0x3f2d0000, 0x3f2c0000, 0x3f2b0000,
46
    0x3f2a0000, 0x3f2a0000, 0x3f290000, 0x3f280000, 0x3f270000, 0x3f260000, 0x3f250000, 0x3f250000,
47
    0x3f240000, 0x3f230000, 0x3f220000, 0x3f210000, 0x3f200000, 0x3f200000, 0x3f1f0000, 0x3f1e0000,
48
    0x3f1d0000, 0x3f1d0000, 0x3f1c0000, 0x3f1b0000, 0x3f1a0000, 0x3f1a0000, 0x3f190000, 0x3f180000,
49
    0x3f180000, 0x3f170000, 0x3f160000, 0x3f160000, 0x3f150000, 0x3f140000, 0x3f140000, 0x3f130000,
50
    0x3f120000, 0x3f120000, 0x3f110000, 0x3f100000, 0x3f100000, 0x3f0f0000, 0x3f0e0000, 0x3f0e0000,
51
    0x3f0d0000, 0x3f0d0000, 0x3f0c0000, 0x3f0b0000, 0x3f0b0000, 0x3f0a0000, 0x3f0a0000, 0x3f090000,
52
    0x3f080000, 0x3f080000, 0x3f070000, 0x3f070000, 0x3f060000, 0x3f060000, 0x3f050000, 0x3f050000,
53
    0x3f040000, 0x3f040000, 0x3f030000, 0x3f030000, 0x3f020000, 0x3f020000, 0x3f010000, 0x3f000000,
54
]);
55
56
static LOG_R: [u64; 128] = [
57
    0x0000000000000000,
58
    0x3f8010157588de71,
59
    0x3f90205658935847,
60
    0x3f98492528c8cabf,
61
    0x3fa0415d89e74444,
62
    0x3fa466aed42de3ea,
63
    0x3fa894aa149fb343,
64
    0x3faccb73cdddb2cc,
65
    0x3fb08598b59e3a07,
66
    0x3fb1973bd1465567,
67
    0x3fb3bdf5a7d1ee64,
68
    0x3fb5e95a4d9791cb,
69
    0x3fb700d30aeac0e1,
70
    0x3fb9335e5d594989,
71
    0x3fbb6ac88dad5b1c,
72
    0x3fbc885801bc4b23,
73
    0x3fbec739830a1120,
74
    0x3fbfe89139dbd566,
75
    0x3fc1178e8227e47c,
76
    0x3fc1aa2b7e23f72a,
77
    0x3fc2d1610c86813a,
78
    0x3fc365fcb0159016,
79
    0x3fc4913d8333b561,
80
    0x3fc527e5e4a1b58d,
81
    0x3fc6574ebe8c133a,
82
    0x3fc6f0128b756abc,
83
    0x3fc823c16551a3c2,
84
    0x3fc8beafeb38fe8c,
85
    0x3fc95a5adcf7017f,
86
    0x3fca93ed3c8ad9e3,
87
    0x3fcb31d8575bce3d,
88
    0x3fcbd087383bd8ad,
89
    0x3fcd1037f2655e7b,
90
    0x3fcdb13db0d48940,
91
    0x3fce530effe71012,
92
    0x3fcef5ade4dcffe6,
93
    0x3fcf991c6cb3b379,
94
    0x3fd07138604d5862,
95
    0x3fd0c42d676162e3,
96
    0x3fd1178e8227e47c,
97
    0x3fd16b5ccbacfb73,
98
    0x3fd1bf99635a6b95,
99
    0x3fd269621134db92,
100
    0x3fd2bef07cdc9354,
101
    0x3fd314f1e1d35ce4,
102
    0x3fd36b6776be1117,
103
    0x3fd3c25277333184,
104
    0x3fd419b423d5e8c7,
105
    0x3fd4718dc271c41b,
106
    0x3fd4c9e09e172c3c,
107
    0x3fd522ae0738a3d8,
108
    0x3fd57bf753c8d1fb,
109
    0x3fd5d5bddf595f30,
110
    0x3fd630030b3aac49,
111
    0x3fd68ac83e9c6a14,
112
    0x3fd6e60ee6af1972,
113
    0x3fd741d876c67bb1,
114
    0x3fd79e26687cfb3e,
115
    0x3fd7fafa3bd8151c,
116
    0x3fd85855776dcbfb,
117
    0x3fd8b639a88b2df5,
118
    0x3fd914a8635bf68a,
119
    0x3fd973a3431356ae,
120
    0x3fd9d32bea15ed3b,
121
    0x3fda33440224fa79,
122
    0x3fda33440224fa79,
123
    0x3fda93ed3c8ad9e3,
124
    0x3fdaf5295248cdd0,
125
    0x3fdb56fa04462909,
126
    0x3fdbb9611b80e2fb,
127
    0x3fdc1c60693fa39e,
128
    0x3fdc1c60693fa39e,
129
    0x3fdc7ff9c74554c9,
130
    0x3fdce42f18064743,
131
    0x3fdd490246defa6b,
132
    0x3fddae75484c9616,
133
    0x3fde148a1a2726ce,
134
    0x3fde148a1a2726ce,
135
    0x3fde7b42c3ddad73,
136
    0x3fdee2a156b413e5,
137
    0x3fdf4aa7ee03192d,
138
    0x3fdf4aa7ee03192d,
139
    0x3fdfb358af7a4884,
140
    0x3fe00e5ae5b207ab,
141
    0x3fe04360be7603ad,
142
    0x3fe04360be7603ad,
143
    0x3fe078bf0533c568,
144
    0x3fe0ae76e2d054fa,
145
    0x3fe0ae76e2d054fa,
146
    0x3fe0e4898611cce1,
147
    0x3fe11af823c75aa8,
148
    0x3fe11af823c75aa8,
149
    0x3fe151c3f6f29612,
150
    0x3fe188ee40f23ca6,
151
    0x3fe188ee40f23ca6,
152
    0x3fe1c07849ae6007,
153
    0x3fe1f8635fc61659,
154
    0x3fe1f8635fc61659,
155
    0x3fe230b0d8bebc98,
156
    0x3fe269621134db92,
157
    0x3fe269621134db92,
158
    0x3fe2a2786d0ec107,
159
    0x3fe2dbf557b0df43,
160
    0x3fe2dbf557b0df43,
161
    0x3fe315da4434068b,
162
    0x3fe315da4434068b,
163
    0x3fe35028ad9d8c86,
164
    0x3fe38ae2171976e7,
165
    0x3fe38ae2171976e7,
166
    0x3fe3c6080c36bfb5,
167
    0x3fe3c6080c36bfb5,
168
    0x3fe4019c2125ca93,
169
    0x3fe43d9ff2f923c5,
170
    0x3fe43d9ff2f923c5,
171
    0x3fe47a1527e8a2d3,
172
    0x3fe47a1527e8a2d3,
173
    0x3fe4b6fd6f970c1f,
174
    0x3fe4b6fd6f970c1f,
175
    0x3fe4f45a835a4e19,
176
    0x3fe4f45a835a4e19,
177
    0x3fe5322e26867857,
178
    0x3fe5322e26867857,
179
    0x3fe5707a26bb8c66,
180
    0x3fe5707a26bb8c66,
181
    0x3fe5af405c3649e0,
182
    0x3fe5af405c3649e0,
183
    0x3fe5ee82aa241920,
184
    0x0000000000000000,
185
];
186
187
/// Natural logarithm
188
///
189
/// Max found ULP 0.4999988
190
0
pub fn f_logf(x: f32) -> f32 {
191
0
    let mut x_u = x.to_bits();
192
193
    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
194
195
0
    let mut m = -(E_BIAS as i32);
196
0
    if x_u < 0x4c5d65a5u32 {
197
0
        if x_u == 0x3f7f4d6fu32 {
198
0
            return black_box(f64::from_bits(0xbf6659ec80000000) as f32) + min_normal_f32(true);
199
0
        } else if x_u == 0x41178febu32 {
200
0
            return black_box(f64::from_bits(0x4001fcbce0000000) as f32) + min_normal_f32(true);
201
0
        } else if x_u == 0x3f800000u32 {
202
0
            return 0.;
203
0
        } else if x_u == 0x1e88452du32 {
204
0
            return black_box(f64::from_bits(0xc046d7b180000000) as f32) + min_normal_f32(true);
205
0
        }
206
0
        if x_u < f32::MIN_POSITIVE.to_bits() {
207
0
            if x == 0.0 {
208
0
                return f32::NEG_INFINITY;
209
0
            }
210
            // Normalize denormal inputs.
211
0
            x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
212
0
            m -= 23;
213
0
        }
214
    } else {
215
0
        if x_u == 0x4c5d65a5u32 {
216
0
            return black_box(f32::from_bits(0x418f034b)) + min_normal_f32(true);
217
0
        } else if x_u == 0x65d890d3u32 {
218
0
            return black_box(f32::from_bits(0x4254d1f9)) + min_normal_f32(true);
219
0
        } else if x_u == 0x6f31a8ecu32 {
220
0
            return black_box(f32::from_bits(0x42845a89)) + min_normal_f32(true);
221
0
        } else if x_u == 0x7a17f30au32 {
222
0
            return black_box(f32::from_bits(0x42a28a1b)) + min_normal_f32(true);
223
0
        } else if x_u == 0x500ffb03u32 {
224
0
            return black_box(f32::from_bits(0x41b7ee9a)) + min_normal_f32(true);
225
0
        } else if x_u == 0x5cd69e88u32 {
226
0
            return black_box(f32::from_bits(0x4222e0a3)) + min_normal_f32(true);
227
0
        } else if x_u == 0x5ee8984eu32 {
228
0
            return black_box(f32::from_bits(0x422e4a21)) + min_normal_f32(true);
229
0
        }
230
231
0
        if x_u > f32::MAX.to_bits() {
232
0
            if x_u == 0x80000000u32 {
233
0
                return f32::NEG_INFINITY;
234
0
            }
235
0
            if x.is_sign_negative() && !x.is_nan() {
236
0
                return f32::NAN + x;
237
0
            }
238
            // x is +inf or nan
239
0
            if x.is_nan() {
240
0
                return f32::NAN;
241
0
            }
242
243
0
            return x;
244
0
        }
245
    }
246
247
0
    let mant = x_u & 0x007F_FFFF;
248
    // Extract 7 leading fractional bits of the mantissa
249
0
    let index = mant.wrapping_shr(16);
250
    // Add unbiased exponent. Add an extra 1 if the 7 leading fractional bits are
251
    // all 1's.
252
0
    m = m.wrapping_add(x_u.wrapping_add(1 << 16).wrapping_shr(23) as i32);
253
0
    x_u = set_exponent_f32(x_u, 0x7F);
254
255
    let v;
256
0
    let u = f32::from_bits(x_u);
257
258
    #[cfg(any(
259
        all(
260
            any(target_arch = "x86", target_arch = "x86_64"),
261
            target_feature = "fma"
262
        ),
263
        target_arch = "aarch64"
264
    ))]
265
    {
266
        v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; // Exact.
267
    }
268
    #[cfg(not(any(
269
        all(
270
            any(target_arch = "x86", target_arch = "x86_64"),
271
            target_feature = "fma"
272
        ),
273
        target_arch = "aarch64"
274
    )))]
275
    {
276
        use crate::logs::log2::LOG_RANGE_REDUCTION;
277
0
        v = f_fmla(
278
0
            u as f64,
279
0
            f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
280
0
            -1.0,
281
0
        ); // Exact
282
    }
283
    // Degree-5 polynomial approximation of log generated by Sollya with:
284
    // > P = fpminimax(log(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]);
285
    const COEFFS: [u64; 4] = [
286
        0xbfe000000000fe63,
287
        0x3fd555556e963c16,
288
        0xbfd000028dedf986,
289
        0x3fc966681bfda7f7,
290
    ];
291
0
    let v2 = v * v; // Exact
292
0
    let p2 = f_fmla(v, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2]));
293
0
    let p1 = f_fmla(v, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0]));
294
0
    let p0 = f64::from_bits(LOG_R[index as usize]) + v;
295
    const LOG_2: f64 = f64::from_bits(0x3fe62e42fefa39ef);
296
0
    let r = f_fmla(m as f64, LOG_2, f_polyeval3(v2, p0, p1, p2));
297
0
    r as f32
298
0
}
299
300
#[inline]
301
0
pub(crate) fn fast_logf(x: f32) -> f64 {
302
0
    let mut x_u = x.to_bits();
303
    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
304
0
    let mut m = -(E_BIAS as i32);
305
0
    if x_u < f32::MIN_POSITIVE.to_bits() {
306
0
        // Normalize denormal inputs.
307
0
        x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
308
0
        m -= 23;
309
0
    }
310
311
0
    let mant = x_u & 0x007F_FFFF;
312
    // Extract 7 leading fractional bits of the mantissa
313
0
    let index = mant.wrapping_shr(16);
314
    // Add unbiased exponent. Add an extra 1 if the 7 leading fractional bits are
315
    // all 1's.
316
0
    m = m.wrapping_add(x_u.wrapping_add(1 << 16).wrapping_shr(23) as i32);
317
0
    x_u = set_exponent_f32(x_u, 0x7F);
318
319
    let v;
320
0
    let u = f32::from_bits(x_u);
321
322
    #[cfg(any(
323
        all(
324
            any(target_arch = "x86", target_arch = "x86_64"),
325
            target_feature = "fma"
326
        ),
327
        target_arch = "aarch64"
328
    ))]
329
    {
330
        v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; // Exact.
331
    }
332
    #[cfg(not(any(
333
        all(
334
            any(target_arch = "x86", target_arch = "x86_64"),
335
            target_feature = "fma"
336
        ),
337
        target_arch = "aarch64"
338
    )))]
339
    {
340
        use crate::logs::log2::LOG_RANGE_REDUCTION;
341
0
        v = f_fmla(
342
0
            u as f64,
343
0
            f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
344
0
            -1.0,
345
0
        ); // Exact
346
    }
347
    // Degree-5 polynomial approximation of log generated by Sollya with:
348
    // > P = fpminimax(log(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]);
349
    const COEFFS: [u64; 4] = [
350
        0xbfe000000000fe63,
351
        0x3fd555556e963c16,
352
        0xbfd000028dedf986,
353
        0x3fc966681bfda7f7,
354
    ];
355
0
    let v2 = v * v; // Exact
356
0
    let p2 = f_fmla(v, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2]));
357
0
    let p1 = f_fmla(v, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0]));
358
0
    let p0 = f64::from_bits(LOG_R[index as usize]) + v;
359
    const LOG_2: f64 = f64::from_bits(0x3fe62e42fefa39ef);
360
0
    f_fmla(m as f64, LOG_2, f_polyeval3(v2, p0, p1, p2))
361
0
}
362
363
/// Log for given value for const context.
364
/// This is simplified version just to make a good approximation on const context.
365
#[inline]
366
0
pub const fn logf(d: f32) -> f32 {
367
0
    let ux = d.to_bits();
368
    #[allow(clippy::collapsible_if)]
369
0
    if ux < (1 << 23) || ux >= 0x7f800000u32 {
370
0
        if ux == 0 || ux >= 0x7f800000u32 {
371
0
            if ux == 0x7f800000u32 {
372
0
                return d;
373
0
            }
374
0
            let ax = ux.wrapping_shl(1);
375
0
            if ax == 0u32 {
376
                // -0.0
377
0
                return f32::NEG_INFINITY;
378
0
            }
379
0
            if ax > 0xff000000u32 {
380
0
                return d + d;
381
0
            } // nan
382
0
            return f32::NAN;
383
0
        }
384
0
    }
385
386
0
    let mut ix = d.to_bits();
387
    /* reduce x into [sqrt(2)/2, sqrt(2)] */
388
0
    ix += 0x3f800000 - 0x3f3504f3;
389
0
    let n = (ix >> 23) as i32 - 0x7f;
390
0
    ix = (ix & 0x007fffff) + 0x3f3504f3;
391
0
    let a = f32::from_bits(ix) as f64;
392
393
0
    let x = (a - 1.) / (a + 1.);
394
0
    let x2 = x * x;
395
0
    let mut u = 0.2222220222147750310e+0;
396
0
    u = fmla(u, x2, 0.2857142871244668543e+0);
397
0
    u = fmla(u, x2, 0.3999999999950960318e+0);
398
0
    u = fmla(u, x2, 0.6666666666666734090e+0);
399
0
    u = fmla(u, x2, 2.);
400
0
    fmla(x, u, std::f64::consts::LN_2 * (n as f64)) as f32
401
0
}
402
403
#[cfg(test)]
404
mod tests {
405
    use super::*;
406
407
    #[test]
408
    fn test_logf() {
409
        assert!(
410
            (logf(1f32) - 0f32).abs() < 1e-6,
411
            "Invalid result {}",
412
            logf(1f32)
413
        );
414
        assert!(
415
            (logf(5f32) - 1.60943791243410037460f32).abs() < 1e-6,
416
            "Invalid result {}",
417
            logf(5f32)
418
        );
419
        assert_eq!(logf(0.), f32::NEG_INFINITY);
420
        assert!(logf(-1.).is_nan());
421
        assert!(logf(f32::NAN).is_nan());
422
        assert!(logf(f32::NEG_INFINITY).is_nan());
423
        assert_eq!(logf(f32::INFINITY), f32::INFINITY);
424
    }
425
426
    #[test]
427
    fn test_flogf() {
428
        assert!(
429
            (f_logf(1f32) - 0f32).abs() < 1e-6,
430
            "Invalid result {}",
431
            f_logf(1f32)
432
        );
433
        assert!(
434
            (f_logf(5f32) - 1.60943791243410037460f32).abs() < 1e-6,
435
            "Invalid result {}",
436
            f_logf(5f32)
437
        );
438
        assert_eq!(f_logf(0.), f32::NEG_INFINITY);
439
        assert!(f_logf(-1.).is_nan());
440
        assert!(f_logf(f32::NAN).is_nan());
441
        assert!(f_logf(f32::NEG_INFINITY).is_nan());
442
        assert_eq!(f_logf(f32::INFINITY), f32::INFINITY);
443
    }
444
}