Coverage Report

Created: 2025-12-11 07:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/err/erff.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::common::f_fmla;
30
31
// Polynomials approximating erf(x)/x on ( k/8, (k + 1)/8 ) generated by Sollya
32
// with:
33
// > P = fpminimax(erf(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|D...|],
34
//                 [k/8, (k + 1)/8]);
35
// for k = 0..31.
36
static COEFFS: [[u64; 8]; 32] = [
37
    [
38
        0x3ff20dd750429b6d,
39
        0xbfd812746b037753,
40
        0x3fbce2f219e8596a,
41
        0xbf9b82cdacb78fda,
42
        0x3f756479297dfda5,
43
        0xbf48b3ac5455ef02,
44
        0xbf7126fcac367e3b,
45
        0x3fb2d0bdb3ba4984,
46
    ],
47
    [
48
        0x3ff20dd750429b6d,
49
        0xbfd812746b0379a8,
50
        0x3fbce2f21a03cf2a,
51
        0xbf9b82ce30de083e,
52
        0x3f7565bcad3eb60f,
53
        0xbf4c02c66f659256,
54
        0x3f1f92f673385229,
55
        0xbeedef402648ae90,
56
    ],
57
    [
58
        0x3ff20dd750429b34,
59
        0xbfd812746b032dce,
60
        0x3fbce2f219d84aae,
61
        0xbf9b82ce22dcf139,
62
        0x3f7565b9efcd4af1,
63
        0xbf4c021f1af414bc,
64
        0x3f1f7c6d177eff82,
65
        0xbeec9e4410dcf865,
66
    ],
67
    [
68
        0x3ff20dd750426eab,
69
        0xbfd812746ae592c7,
70
        0x3fbce2f211525f14,
71
        0xbf9b82ccc125e63f,
72
        0x3f756596f261cfd3,
73
        0xbf4bfde1ff8eeecf,
74
        0x3f1f31a9d15dc5d8,
75
        0xbeea5a4362844b3c,
76
    ],
77
    [
78
        0x3ff20dd75039c705,
79
        0xbfd812746777e74d,
80
        0x3fbce2f17af98a1b,
81
        0xbf9b82be4b817cbe,
82
        0x3f7564bec2e2962e,
83
        0xbf4bee86f9da3558,
84
        0x3f1e9443689dc0cc,
85
        0xbee79c0f230805d8,
86
    ],
87
    [
88
        0x3ff20dd74f811211,
89
        0xbfd81274371a3e8f,
90
        0x3fbce2ec038262e5,
91
        0xbf9b8265b82c5e1f,
92
        0x3f75615a2e239267,
93
        0xbf4bc63ae023dceb,
94
        0x3f1d87c2102f7e06,
95
        0xbee49584bea41d62,
96
    ],
97
    [
98
        0x3ff20dd746d063e3,
99
        0xbfd812729a8a950f,
100
        0x3fbce2cb0a2df232,
101
        0xbf9b80eca1f51278,
102
        0x3f75572e26c46815,
103
        0xbf4b715e5638b65e,
104
        0x3f1bfbb195484968,
105
        0xbee177a565c15c52,
106
    ],
107
    [
108
        0x3ff20dd701b44486,
109
        0xbfd812691145f237,
110
        0x3fbce23a06b8cfd9,
111
        0xbf9b7c1dc7245288,
112
        0x3f753e92f7f397dd,
113
        0xbf4ad97cc4acf0b2,
114
        0x3f19f028b2b09b71,
115
        0xbedcdc4da08da8c1,
116
    ],
117
    [
118
        0x3ff20dd5715ac332,
119
        0xbfd8123e680bd0eb,
120
        0x3fbce0457aded691,
121
        0xbf9b6f52d52bed40,
122
        0x3f750c291b84414c,
123
        0xbf49ea246b1ad4a9,
124
        0x3f177654674e0ca0,
125
        0xbed737c11a1bcebb,
126
    ],
127
    [
128
        0x3ff20dce6593e114,
129
        0xbfd811a59c02eadc,
130
        0x3fbcdab53c7cd7d5,
131
        0xbf9b526d2e321eed,
132
        0x3f74b1d32cd8b994,
133
        0xbf48963143ec0a1e,
134
        0x3f14ad5700e4db91,
135
        0xbed231e100e43ef2,
136
    ],
137
    [
138
        0x3ff20db48bfd5a62,
139
        0xbfd80fdd84f9e308,
140
        0x3fbccd340d462983,
141
        0xbf9b196a29287680,
142
        0x3f74210c2c13a0f7,
143
        0xbf46dbdfb4ff71ae,
144
        0x3f11bca2d17fbd71,
145
        0xbecbca36f90c7cf5,
146
    ],
147
    [
148
        0x3ff20d64b2f8f508,
149
        0xbfd80b4d4f19fa8b,
150
        0x3fbcb088197262e3,
151
        0xbf9ab51fd02e5b99,
152
        0x3f734e1e5e81a632,
153
        0xbf44c66377b502ce,
154
        0x3f0d9ad25066213c,
155
        0xbec4b0df7dd0cfa1,
156
    ],
157
    [
158
        0x3ff20c8fc1243576,
159
        0xbfd8010cb2009e27,
160
        0x3fbc7a47e9299315,
161
        0xbf9a155be5683654,
162
        0x3f7233502694997b,
163
        0xbf426c94b7d81300,
164
        0x3f08094f1de25fb9,
165
        0xbebe0e3d776c6eef,
166
    ],
167
    [
168
        0x3ff20a9bd1611bc1,
169
        0xbfd7ec7fbce83f90,
170
        0x3fbc1d757d7317b7,
171
        0xbf992c160cd589f0,
172
        0x3f70d307269cc5c2,
173
        0xbf3fda5b0d2d1879,
174
        0x3f02fdd7b3b14a7f,
175
        0xbeb54eed4a26af5a,
176
    ],
177
    [
178
        0x3ff20682834f943d,
179
        0xbfd7c73f747bf5a9,
180
        0x3fbb8c2db4a9ffd1,
181
        0xbf97f0e4ffe989ec,
182
        0x3f6e7061eae4166e,
183
        0xbf3ad36e873fff2d,
184
        0x3efd39222396128e,
185
        0xbead83dacec5ea6b,
186
    ],
187
    [
188
        0x3ff1feb8d12676d7,
189
        0xbfd7898347284afe,
190
        0x3fbaba3466b34451,
191
        0xbf9663adc573e2f9,
192
        0x3f6ae99fb17c3e08,
193
        0xbf3602f950ad5535,
194
        0x3ef5e9717490609d,
195
        0xbea3fca107bbc8d5,
196
    ],
197
    [
198
        0x3ff1f12fe3c536fa,
199
        0xbfd72b1d1f22e6d3,
200
        0x3fb99fc0eed4a896,
201
        0xbf948db0a87bd8c6,
202
        0x3f673e368895aa61,
203
        0xbf319b35d5301fc8,
204
        0x3ef007987e4bb033,
205
        0xbe9a7edcd4c2dc70,
206
    ],
207
    [
208
        0x3ff1db7b0df84d5d,
209
        0xbfd6a4e4a41cde02,
210
        0x3fb83bbded16455d,
211
        0xbf92809b3b36977e,
212
        0x3f639c08bab44679,
213
        0xbf2b7b45a70ed119,
214
        0x3ee6e99b36410e7b,
215
        0xbe913619bb7ebc0c,
216
    ],
217
    [
218
        0x3ff1bb1c85c4a527,
219
        0xbfd5f23b99a249a3,
220
        0x3fb694c91fa0d12c,
221
        0xbf9053e1ce11c72d,
222
        0x3f602bf72c50ea78,
223
        0xbf24f478fb56cb02,
224
        0x3ee005f80ecbe213,
225
        0xbe85f2446bde7f5b,
226
    ],
227
    [
228
        0x3ff18dec3bd51f9d,
229
        0xbfd5123f58346186,
230
        0x3fb4b8a1ca536ab4,
231
        0xbf8c4243015cc723,
232
        0x3f5a1a8a01d351ef,
233
        0xbf1f466b34f1d86b,
234
        0x3ed5f835eea0bf6a,
235
        0xbe7b83165b939234,
236
    ],
237
    [
238
        0x3ff152804c3369f4,
239
        0xbfd4084cd4afd4bc,
240
        0x3fb2ba2e836e47aa,
241
        0xbf8800f2dfc6904b,
242
        0x3f54a6daf0669c59,
243
        0xbf16e326ab872317,
244
        0x3ecd9761a6a755a5,
245
        0xbe70fca33f9dd4b5,
246
    ],
247
    [
248
        0x3ff1087ad68356aa,
249
        0xbfd2dbb044707459,
250
        0x3fb0aea8ceaa0384,
251
        0xbf840b516d52b3d2,
252
        0x3f500c9e05f01d22,
253
        0xbf1076afb0dc0ff7,
254
        0x3ec39fadec400657,
255
        0xbe64b5761352e7e3,
256
    ],
257
    [
258
        0x3ff0b0a7a8ba4a22,
259
        0xbfd196990d22d4a1,
260
        0x3fad5551e6ac0c4d,
261
        0xbf807cce1770bd1a,
262
        0x3f4890347b8848bf,
263
        0xbf0757ec96750b6a,
264
        0x3eb9b258a1e06bce,
265
        0xbe58fc6d22da7572,
266
    ],
267
    [
268
        0x3ff04ce2be70fb47,
269
        0xbfd0449e4b0b9cac,
270
        0x3fa97f7424f4b0e7,
271
        0xbf7ac825439c42f4,
272
        0x3f428f5f65426dfb,
273
        0xbf005b699a90f90f,
274
        0x3eb0a888eecf4593,
275
        0xbe4deace2b32bb31,
276
    ],
277
    [
278
        0x3fefbf9fb0e11cc8,
279
        0xbfcde2640856545a,
280
        0x3fa5f5b1f47f8510,
281
        0xbf7588bc71eb41b9,
282
        0x3f3bc6a0a772f56d,
283
        0xbef6b9fad1f1657a,
284
        0x3ea573204ba66504,
285
        0xbe41d38065c94e44,
286
    ],
287
    [
288
        0x3feed8f18c99e031,
289
        0xbfcb4cb6acd903b4,
290
        0x3fa2c7f3dddd6fc1,
291
        0xbf713052067df4e0,
292
        0x3f34a5027444082f,
293
        0xbeef672bab0e2554,
294
        0x3e9b83c756348cc9,
295
        0xbe3534f1a1079499,
296
    ],
297
    [
298
        0x3fedebd33044166d,
299
        0xbfc8d7cd9053f7d8,
300
        0x3f9ff9957fb3d6e7,
301
        0xbf6b50be55de0f36,
302
        0x3f2e92c8ec53a628,
303
        0xbee5a4b88d508007,
304
        0x3e91a27737559e26,
305
        0xbe2942ae62cb2c14,
306
    ],
307
    [
308
        0x3fecfdbf0386f3bd,
309
        0xbfc68e33d93b0dc4,
310
        0x3f9b2683d58f53de,
311
        0xbf65a9174e70d26f,
312
        0x3f269ddd326d49cd,
313
        0xbeddd8f397a8219c,
314
        0x3e86a755016ad4dd,
315
        0xbe1e366e0139187d,
316
    ],
317
    [
318
        0x3fec132adb8d7464,
319
        0xbfc475a899f61b46,
320
        0x3f970a431397a77c,
321
        0xbf612e3d35beeee2,
322
        0x3f20c16b05738333,
323
        0xbed4a47f873e144e,
324
        0x3e7d3d494c698c02,
325
        0xbe12302c59547fe5,
326
    ],
327
    [
328
        0x3feb2f5fd05555e7,
329
        0xbfc28feefbe03ec7,
330
        0x3f93923acbb3a676,
331
        0xbf5b4ff793cd6358,
332
        0x3f18ea0eb8c913bc,
333
        0xbeccb31ec2baceb1,
334
        0x3e730011e7e80c04,
335
        0xbe0617710635cb1d,
336
    ],
337
    [
338
        0x3fea54853cd9593e,
339
        0xbfc0dbdbaea4dc8e,
340
        0x3f90a93e2c20a0fd,
341
        0xbf55c969ff401ea8,
342
        0x3f129e0cc64fe627,
343
        0xbec4160d8e9d3c2a,
344
        0x3e68e7b67594624a,
345
        0xbdfb1cf2c975b09b,
346
    ],
347
    [
348
        0x3fe983ceece09ff8,
349
        0xbfbeacc78f7a2d00,
350
        0x3f8c74418410655f,
351
        0xbf51756a050e441e,
352
        0x3f0bff3650f7f548,
353
        0xbebc56c0217d3ada,
354
        0x3e607b4918d0b489,
355
        0xbdf0d4be8c1c50f8,
356
    ],
357
];
358
359
trait ErffBackend {
360
    fn fma(&self, x: f64, y: f64, z: f64) -> f64;
361
}
362
363
struct GenErffBackend {}
364
365
impl ErffBackend for GenErffBackend {
366
    #[inline(always)]
367
0
    fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
368
0
        f_fmla(x, y, z)
369
0
    }
370
}
371
372
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
373
struct FmaErffBackend {}
374
375
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
376
impl ErffBackend for FmaErffBackend {
377
    #[inline(always)]
378
0
    fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
379
0
        f64::mul_add(x, y, z)
380
0
    }
381
}
382
383
#[inline(always)]
384
0
fn erff_gen<B: ErffBackend>(x: f32, backend: B) -> f32 {
385
0
    let x_u = x.to_bits();
386
0
    let x_abs = x_u & 0x7fff_ffffu32;
387
388
0
    if x_abs >= 0x4080_0000u32 {
389
        static ONE: [f32; 2] = [1.0, -1.0];
390
        static SMALL: [f32; 2] = [f32::from_bits(0xb3000000), f32::from_bits(0x33000000)];
391
392
0
        let sign = x.is_sign_negative() as usize;
393
394
0
        if x_abs >= 0x7f80_0000u32 {
395
0
            return if x_abs > 0x7f80_0000 { x } else { ONE[sign] };
396
0
        }
397
398
0
        return ONE[sign] + SMALL[sign];
399
0
    }
400
401
    // Polynomial approximation:
402
    //   erf(x) ~ x * (c0 + c1 * x^2 + c2 * x^4 + ... + c7 * x^14)
403
0
    let xd = x as f64;
404
0
    let xsq = xd * xd;
405
406
    const EIGHT: u32 = 3 << 23;
407
0
    let idx = unsafe { f32::from_bits(x_abs.wrapping_add(EIGHT)).to_int_unchecked::<usize>() };
408
409
0
    let c = COEFFS[idx];
410
411
0
    let x4 = xsq * xsq;
412
0
    let c0 = backend.fma(xsq, f64::from_bits(c[1]), f64::from_bits(c[0]));
413
0
    let c1 = backend.fma(xsq, f64::from_bits(c[3]), f64::from_bits(c[2]));
414
0
    let c2 = backend.fma(xsq, f64::from_bits(c[5]), f64::from_bits(c[4]));
415
0
    let c3 = backend.fma(xsq, f64::from_bits(c[7]), f64::from_bits(c[6]));
416
417
0
    let x8 = x4 * x4;
418
0
    let p0 = backend.fma(x4, c1, c0);
419
0
    let p1 = backend.fma(x4, c3, c2);
420
421
0
    (xd * backend.fma(x8, p1, p0)) as f32
422
0
}
Unexecuted instantiation: pxfm::err::erff::erff_gen::<pxfm::err::erff::FmaErffBackend>
Unexecuted instantiation: pxfm::err::erff::erff_gen::<pxfm::err::erff::GenErffBackend>
423
424
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
425
#[target_feature(enable = "avx", enable = "fma")]
426
0
unsafe fn erff_fma_impl(x: f32) -> f32 {
427
0
    erff_gen(x, FmaErffBackend {})
428
0
}
429
430
/// Error function
431
///
432
/// Max ulp 0.5
433
#[inline]
434
0
pub fn f_erff(x: f32) -> f32 {
435
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
436
    {
437
        crate::err::erff::erff_gen(x, GenErffBackend {})
438
    }
439
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
440
    {
441
        use std::sync::OnceLock;
442
        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
443
0
        let q = EXECUTOR.get_or_init(|| {
444
0
            if std::arch::is_x86_feature_detected!("avx")
445
0
                && std::arch::is_x86_feature_detected!("fma")
446
            {
447
0
                erff_fma_impl
448
            } else {
449
0
                fn def_erff(x: f32) -> f32 {
450
0
                    erff_gen(x, GenErffBackend {})
451
0
                }
452
0
                def_erff
453
            }
454
0
        });
455
0
        unsafe { q(x) }
456
    }
457
0
}
458
459
#[cfg(test)]
460
mod tests {
461
    use super::*;
462
463
    #[test]
464
    fn f_erff_test() {
465
        assert_eq!(f_erff(0.0), 0.0);
466
        assert_eq!(f_erff(1.0), 0.8427008);
467
        assert_eq!(f_erff(0.5), 0.5204999);
468
        assert_eq!(f_erff(f32::INFINITY), 1.0);
469
        assert_eq!(f_erff(f32::NEG_INFINITY), -1.0);
470
        assert!(f_erff(f32::NAN).is_nan());
471
    }
472
}