Coverage Report

Created: 2026-05-16 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.29/src/err/erfc.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::common::dd_fmla;
30
use crate::double_double::DoubleDouble;
31
use crate::err::erf::{Erf, erf_accurate, erf_fast};
32
use crate::exponents::{EXP_REDUCE_T0, EXP_REDUCE_T1, ldexp};
33
use crate::rounding::CpuRoundTiesEven;
34
use std::hint::black_box;
35
36
static ASYMPTOTIC_POLY: [[u64; 13]; 6] = [
37
    [
38
        0x3fe20dd750429b6d,
39
        0x3c61a1feb75a48a8,
40
        0xbfd20dd750429b6c,
41
        0x3fdb14c2f863e403,
42
        0xbff0ecf9db3af35d,
43
        0x400d9eb53ca6eeed,
44
        0xc030a945830d95c8,
45
        0x4056e8a963e2f1f5,
46
        0xc0829b7ccc8f396f,
47
        0x40b15e716e83c27e,
48
        0xc0e1cfdcfbcaf22a,
49
        0x4111986cc7a7e8fe,
50
        0xc1371f7540590a91,
51
    ],
52
    [
53
        0x3fe20dd750429ae7,
54
        0x3c863da89e801fd4,
55
        0xbfd20dd750400795,
56
        0x3fdb14c2f57c490c,
57
        0xbff0ecf95c8c9014,
58
        0x400d9e981f2321ef,
59
        0xc030a81482de1506,
60
        0x4056d662420a604b,
61
        0xc08233c96fff7772,
62
        0x40af5d62018d3e37,
63
        0xc0d9ae55e9554450,
64
        0x410052901e10d139,
65
        0xc1166465df1385f0,
66
    ],
67
    [
68
        0x3fe20dd75041e3fc,
69
        0xbc7c9b491c4920fc,
70
        0xbfd20dd74e5f1526,
71
        0x3fdb14c1d35a40e0,
72
        0xbff0ecdecd30e86b,
73
        0x400d9b4e7f725263,
74
        0xc030958b5ca8fb39,
75
        0x40563e3179bf609c,
76
        0xc0806bbd1cd2d0fd,
77
        0x40a7b66eb6d1d2f2,
78
        0xc0cce5a4b1afab75,
79
        0x40e8b5c6ae6f773c,
80
        0xc0f5475860326f86,
81
    ],
82
    [
83
        0x3fe20dd75025cfe9,
84
        0x3c55a92eef32fb20,
85
        0xbfd20dd71eb9d4e7,
86
        0x3fdb14af4c25db28,
87
        0xbff0ebc78a22b3d8,
88
        0x400d85287a0b3399,
89
        0xc03045f751e5ca1d,
90
        0x4054a0d87ddea589,
91
        0xc07ac6a0981d1eee,
92
        0x409f44822f567956,
93
        0xc0bcba372de71349,
94
        0x40d1a4a19f550ca4,
95
        0xc0d52a580455ed79,
96
    ],
97
    [
98
        0x3fe20dd74eb31d84,
99
        0xbc439c4054b7c090,
100
        0xbfd20dd561af98c4,
101
        0x3fdb1435165d9df1,
102
        0xbff0e6b60308e940,
103
        0x400d3ce30c140882,
104
        0xc02f2083e404c299,
105
        0x40520f113d89b42a,
106
        0xc0741433ebd89f19,
107
        0x4092f35b6a3154f6,
108
        0xc0ab020a4313cf3b,
109
        0x40b90f07e92da7ee,
110
        0xc0b6565e1d7665c3,
111
    ],
112
    [
113
        0x3fe20dd744b3517b,
114
        0xbc6f77ab25e01ab4,
115
        0xbfd20dcc62ec4024,
116
        0x3fdb125bfa4f66c1,
117
        0xbff0d80e65381970,
118
        0x400ca11fbcfa65b2,
119
        0xc02cd9eaffb88315,
120
        0x404e010db42e0da7,
121
        0xc06c5c85250ef6a3,
122
        0x4085e118d9c1eeaf,
123
        0xc098d74be13d3d30,
124
        0x40a211b1b2b5ac83,
125
        0xc09900be759fc663,
126
    ],
127
];
128
129
static ASYMPTOTIC_POLY_ACCURATE: [[u64; 30]; 10] = [
130
    [
131
        0x3fe20dd750429b6d,
132
        0x3c61ae3a912b08f0,
133
        0xbfd20dd750429b6d,
134
        0xbc51ae34c0606d68,
135
        0x3fdb14c2f863e924,
136
        0xbc796c0f4c848fc8,
137
        0xbff0ecf9db3e71b6,
138
        0x3c645d756bd288b0,
139
        0x400d9eb53fad4672,
140
        0xbcac61629de9adf2,
141
        0xc030a945f3d147ea,
142
        0x3cb8fec5ad7ece20,
143
        0x4056e8c02f27ca6d,
144
        0xc0829d1c21c363e0,
145
        0x40b17349b70be627,
146
        0xc0e28a6bb4686182,
147
        0x411602d1662523ca,
148
        0xc14ccae7625c4111,
149
        0x4184237d064f6e0d,
150
        0xc1bb1e5466ca3a2f,
151
        0x41e90ae06a0f6cc1,
152
        0,
153
        0,
154
        0,
155
        0,
156
        0,
157
        0,
158
        0,
159
        0,
160
        0,
161
    ],
162
    [
163
        0x3fe20dd750429b6d,
164
        0x3c61adaa62435c10,
165
        0xbfd20dd750429b6d,
166
        0xbc441516126827c8,
167
        0x3fdb14c2f863e90b,
168
        0x3c7a535780ba5ed4,
169
        0xbff0ecf9db3e65d6,
170
        0xbc9089edde27ad07,
171
        0x400d9eb53fa52f20,
172
        0xbcabc9737e9464ac,
173
        0xc030a945f2cd7621,
174
        0xbcc589f28b700332,
175
        0x4056e8bffd7e194e,
176
        0xc0829d18716876e2,
177
        0x40b17312abe18250,
178
        0xc0e287e73592805c,
179
        0x4115ebf7394a39c1,
180
        0xc14c2f14d46d0cf9,
181
        0x4182af3d256f955e,
182
        0xc1b7041659ebd7aa,
183
        0x41e6039c232e2f71,
184
        0xc2070ca15c5a07cb,
185
        0,
186
        0,
187
        0,
188
        0,
189
        0,
190
        0,
191
        0,
192
        0,
193
    ],
194
    [
195
        0x3fe20dd750429b6d,
196
        0x3c5d3c35b5d37410,
197
        0xbfd20dd750429b56,
198
        0xbc7c028415f6f81b,
199
        0x3fdb14c2f863c1cf,
200
        0x3c51bb0de6470dbc,
201
        0xbff0ecf9db33c363,
202
        0x3c80f8068459eb16,
203
        0x400d9eb53b9ce57b,
204
        0x3ca20cce33e7d84a,
205
        0xc030a945aa2ec4fa,
206
        0xbcdf6e0fcd7c6030,
207
        0x4056e8b824d2bfaa,
208
        0xc0829cc372a6d0b0,
209
        0x40b1703a99ddd429,
210
        0xc0e2749f9a267cc6,
211
        0x4115856a17271849,
212
        0xc14a8bcb4ba9753f,
213
        0x418035dcce882940,
214
        0xc1b1e5d8c5e6e043,
215
        0x41dfe3b4f365386e,
216
        0xc20398fdef2b98fe,
217
        0x42184234d4f4ea12,
218
        0,
219
        0,
220
        0,
221
        0,
222
        0,
223
        0,
224
        0,
225
    ],
226
    [
227
        0x3fe20dd750429b6a,
228
        0x3c8ae622b765e9fd,
229
        0xbfd20dd750428f0e,
230
        0x3c703c6c67d69513,
231
        0x3fdb14c2f8563e8e,
232
        0x3c6766a6bd7aa89c,
233
        0xbff0ecf9d8dedd48,
234
        0x3c90af52e90336e3,
235
        0x400d9eb4aad086fe,
236
        0x3ca640d371d54a19,
237
        0xc030a93f1d01cfe0,
238
        0xbcc68dbd8d9c522c,
239
        0x4056e842e9fd5898,
240
        0xc08299886ef1fb80,
241
        0x40b15e0f0162c9a0,
242
        0xc0e222dbc6b04cd8,
243
        0x411460268db1ebdf,
244
        0xc1474f53ce065fb3,
245
        0x417961ca8553f870,
246
        0xc1a8788395d13798,
247
        0x41d35e37b25d0e81,
248
        0xc1f707b7457c8f5e,
249
        0x4211ff852df1c023,
250
        0xc21b75d0ec56e2cd,
251
        0,
252
        0,
253
        0,
254
        0,
255
        0,
256
        0,
257
    ],
258
    [
259
        0x3fe20dd750429a8f,
260
        0xbc766d8dda59bcea,
261
        0xbfd20dd7503fdbab,
262
        0x3c6707bdffc2b3fe,
263
        0x3fdb14c2f6526025,
264
        0xbc27fa4bb9541140,
265
        0xbff0ecf99c417d45,
266
        0xbc9748645ef7af94,
267
        0x400d9eaa9c712a7d,
268
        0x3ca79e478994ebb4,
269
        0xc030a8ef11fbf141,
270
        0x3cbb5c72d69f8954,
271
        0x4056e4653e0455b1,
272
        0xc08286909448e6cf,
273
        0x40b113424ce76821,
274
        0xc0e1346d859e76de,
275
        0x4111f9f6cf2293bf,
276
        0xc14258e6e3b337db,
277
        0x41714029ecd465fb,
278
        0xc19c530df5337a6f,
279
        0x41c34bc4bbccd336,
280
        0xc1e4a37c52641688,
281
        0x420019707cec2974,
282
        0xc21031fe736ea169,
283
        0x420f6b3003de3ddf,
284
        0,
285
        0,
286
        0,
287
        0,
288
        0,
289
    ],
290
    [
291
        0x3fe20dd75042756b,
292
        0x3c84ad9178b56910,
293
        0xbfd20dd74feda9e8,
294
        0xbc78141c70bbc8d6,
295
        0x3fdb14c2cb128467,
296
        0xbc709aebaa106821,
297
        0xbff0ecf603921a0b,
298
        0x3c97d3cb5bceaf0b,
299
        0x400d9e3e1751ca59,
300
        0x3c76622ae5642670,
301
        0xc030a686af57f547,
302
        0x3cc083b320aff6b6,
303
        0x4056cf0b6c027326,
304
        0xc0823afcb69443d3,
305
        0x40b03ab450d9f1b9,
306
        0xc0de74cdb76bcab4,
307
        0x410c671b60e607f1,
308
        0xc138f1376d324ce4,
309
        0x4163b64276234676,
310
        0xc18aff0ce13c5a8e,
311
        0x41aef20247251e87,
312
        0xc1cc9f5662f721f6,
313
        0x41e4687858e185e1,
314
        0xc1f4fa507be073c2,
315
        0x41fb99ac35ee4acc,
316
        0xc1f16cb585ee3fa9,
317
        0,
318
        0,
319
        0,
320
        0,
321
    ],
322
    [
323
        0x3fe20dd7503e730d,
324
        0x3c84e524a098a467,
325
        0xbfd20dd7498fa6b2,
326
        0x3c260a4e27751c80,
327
        0x3fdb14c061bd2a0c,
328
        0x3c695a8f847d2fc2,
329
        0xbff0ecd0f11b8c7d,
330
        0xbc94126deea76061,
331
        0x400d9b1344463548,
332
        0x3cafe09a4eca9b0e,
333
        0xc030996ea52a87ed,
334
        0xbca924f920db26c0,
335
        0x40567a2264b556b0,
336
        0xc0815dfc2c86b6b5,
337
        0x40accc291b62efe4,
338
        0xc0d81375a78e746a,
339
        0x41033a6f15546329,
340
        0xc12c1e9dc1216010,
341
        0x4152397ea3d43fda,
342
        0xc174661e5b2ea512,
343
        0x4193412367ca5d45,
344
        0xc1ade56b9d7f37c4,
345
        0x41c2851d9722146d,
346
        0xc1d19027baf0c3fe,
347
        0x41d7e7b8b6ab58ac,
348
        0xc1d4c446d56aaf22,
349
        0x41c1492190400505,
350
        0,
351
        0,
352
        0,
353
    ],
354
    [
355
        0x3fe20dd74ff10852,
356
        0x3c8a32f26deff875,
357
        0xbfd20dd6f06c491c,
358
        0x3c770c16e1793358,
359
        0x3fdb14a7d5e7fd4a,
360
        0x3c7479998b54db5b,
361
        0xbff0ebbdb3889c5f,
362
        0xbc759b853e11369c,
363
        0x400d89dd249d7ef8,
364
        0xbc84b5edf0c8c314,
365
        0xc0306526fb386114,
366
        0xbc840d04eed7c7e0,
367
        0x40557ff657e429ce,
368
        0xc07ef63e90d38630,
369
        0x40a6d4f34c4ea3da,
370
        0xc0d04542b9e36a54,
371
        0x40f577bf19097738,
372
        0xc119702fe47c736d,
373
        0x413a7ae12b54fdc6,
374
        0xc157ca3f0f7c4fa9,
375
        0x417225d983963cbf,
376
        0xc1871a6eac612f9e,
377
        0x4198086324225e1e,
378
        0xc1a3de68670a7716,
379
        0x41a91674de4dcbe9,
380
        0xc1a6b44cc15b76c2,
381
        0x419a36dae0f30d80,
382
        0xc17cffc1747ea3dc,
383
        0,
384
        0,
385
    ],
386
    [
387
        0x3fe20dd74ba8f300,
388
        0xbc59dd256871d210,
389
        0xbfd20dd3593675bc,
390
        0x3c7ec0e7ffa91ad9,
391
        0x3fdb13eef86a077a,
392
        0xbc74fb5d78d411b8,
393
        0xbff0e5cf52a11f3a,
394
        0xbc851f36c779dc8c,
395
        0x400d4417a08b39d5,
396
        0x3c91be9fb5956638,
397
        0xc02f91b9f6ce80c3,
398
        0xbccc9c99dd42829c,
399
        0x405356439f45bb43,
400
        0xc078c0ca12819b48,
401
        0x409efcad2ecd6671,
402
        0xc0c21b0af6fc1039,
403
        0x40e327d215ee30c9,
404
        0xc101fabda96167b0,
405
        0x411d82e4373b315d,
406
        0xc134ed9e2ff591e9,
407
        0x41495c85dcd8eab5,
408
        0xc159f016f0a3d62a,
409
        0x41660e89d918b96f,
410
        0xc16e97be202cba64,
411
        0x4170d8a081619793,
412
        0xc16c5422b4fcfc65,
413
        0x4161131a9dc6aed1,
414
        0xc14a457d9dced257,
415
        0x4123605e980e8b86,
416
        0,
417
    ],
418
    [
419
        0x3fe20dd7319d4d25,
420
        0x3c82b02992c3b7ab,
421
        0xbfd20dc29c13ab1b,
422
        0xbc7d78d79b4ad767,
423
        0x3fdb115a57b5ab13,
424
        0xbc6aa8c45be0aa2e,
425
        0xbff0d58ec437efd7,
426
        0xbc5994f00a15e850,
427
        0x400cb1742e229f23,
428
        0xbca8000471d54399,
429
        0xc02d99a5edf7b946,
430
        0xbcbaf76ed7e35cde,
431
        0x4050a8b71058eb28,
432
        0xc072d88289da5bfc,
433
        0x40943ddf24168edb,
434
        0xc0b3e9dfc38b6d1a,
435
        0x40d18d4df97ab3df,
436
        0xc0eb550fc62dcab5,
437
        0x41029cb71f116ed1,
438
        0xc115fc9cc4e854e3,
439
        0x41265915fd0567b1,
440
        0xc1335eb5fca0e46d,
441
        0x413c5261ecc0d789,
442
        0xc14138932dc4eafc,
443
        0x414117d4eb18facd,
444
        0xc13af96163e35eca,
445
        0x4130454a3a63c766,
446
        0xc11c2ebc1d39b44a,
447
        0x40ff3327698e0e6b,
448
        0xc0d094febc3dff35,
449
    ],
450
];
451
452
// Approximation for the fast path of exp(z) for z=zh+zl,
453
// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
454
// (assuming x^y does not overflow or underflow)
455
#[inline]
456
0
fn q_1(z_dd: DoubleDouble) -> DoubleDouble {
457
    const C: [u64; 5] = [
458
        0x3ff0000000000000,
459
        0x3ff0000000000000,
460
        0x3fe0000000000000,
461
        0x3fc5555555995d37,
462
        0x3fa55555558489dc,
463
    ];
464
0
    let z = z_dd.to_f64();
465
0
    let mut q = dd_fmla(f64::from_bits(C[4]), z_dd.hi, f64::from_bits(C[3]));
466
467
0
    q = dd_fmla(q, z, f64::from_bits(C[2]));
468
469
0
    let mut v = DoubleDouble::from_exact_add(f64::from_bits(C[1]), q * z);
470
0
    v = DoubleDouble::quick_mult(z_dd, v);
471
0
    DoubleDouble::f64_add(f64::from_bits(C[0]), v)
472
0
}
473
474
#[inline]
475
0
fn exp_1(x: DoubleDouble) -> DoubleDouble {
476
    const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe); /* |INVLOG2-2^12/log(2)| < 2^-43.4 */
477
0
    let k = (x.hi * INVLOG2).cpu_round_ties_even();
478
479
    const LOG2_DD: DoubleDouble = DoubleDouble::new(
480
        f64::from_bits(0x3bbabc9e3b39803f),
481
        f64::from_bits(0x3f262e42fefa39ef),
482
    );
483
0
    let k_dd = DoubleDouble::quick_f64_mult(k, LOG2_DD);
484
0
    let mut y_dd = DoubleDouble::from_exact_add(x.hi - k_dd.hi, x.lo);
485
0
    y_dd.lo -= k_dd.lo;
486
487
0
    let ki: i64 = k as i64; /* Note: k is an integer, this is just a conversion. */
488
0
    let mi = (ki >> 12).wrapping_add(0x3ff);
489
0
    let i2: i64 = (ki >> 6) & 0x3f;
490
0
    let i1: i64 = ki & 0x3f;
491
0
    let t1 = DoubleDouble::new(
492
0
        f64::from_bits(EXP_REDUCE_T0[i2 as usize].0),
493
0
        f64::from_bits(EXP_REDUCE_T0[i2 as usize].1),
494
    );
495
0
    let t2 = DoubleDouble::new(
496
0
        f64::from_bits(EXP_REDUCE_T1[i1 as usize].0),
497
0
        f64::from_bits(EXP_REDUCE_T1[i1 as usize].1),
498
    );
499
0
    let mut v = DoubleDouble::quick_mult(t2, t1);
500
0
    let q = q_1(y_dd);
501
0
    v = DoubleDouble::quick_mult(v, q);
502
503
0
    let scale = f64::from_bits((mi as u64) << 52);
504
0
    v.hi *= scale;
505
0
    v.lo *= scale;
506
0
    v
507
0
}
508
509
struct Exp {
510
    e: i32,
511
    result: DoubleDouble,
512
}
513
514
0
fn exp_accurate(x_dd: DoubleDouble) -> Exp {
515
    static E2: [u64; 28] = [
516
        0x3ff0000000000000,
517
        0xb960000000000000,
518
        0x3ff0000000000000,
519
        0xb9be200000000000,
520
        0x3fe0000000000000,
521
        0x3a03c00000000000,
522
        0x3fc5555555555555,
523
        0x3c655555555c78d9,
524
        0x3fa5555555555555,
525
        0x3c455555545616e2,
526
        0x3f81111111111111,
527
        0x3c011110121fc314,
528
        0x3f56c16c16c16c17,
529
        0xbbef49e06ee3a56e,
530
        0x3f2a01a01a01a01a,
531
        0x3b6b053e1eeab9c0,
532
        0x3efa01a01a01a01a,
533
        0x3ec71de3a556c733,
534
        0x3e927e4fb7789f66,
535
        0x3e5ae64567f54abe,
536
        0x3e21eed8eff8958b,
537
        0x3de6124613837216,
538
        0x3da93974aaf26a57,
539
        0x3d6ae7f4fd6d0bd9,
540
        0x3d2ae7e982620b25,
541
        0x3ce94e4ca59460d8,
542
        0x3ca69a2a4b7ef36d,
543
        0x3c6abfe1602308c9,
544
    ];
545
    const LOG2INV: f64 = f64::from_bits(0x3ff71547652b82fe);
546
0
    let k: i32 = unsafe {
547
0
        (x_dd.hi * LOG2INV)
548
0
            .cpu_round_ties_even()
549
0
            .to_int_unchecked::<i32>()
550
    };
551
552
    const LOG2_H: f64 = f64::from_bits(0x3fe62e42fefa39ef);
553
    /* we approximate LOG2Lacc ~ log(2) - LOG2H with 38 bits, so that
554
    k*LOG2Lacc is exact (k has at most 11 bits) */
555
    const LOG2_L: f64 = f64::from_bits(0x3c7abc9e3b398000);
556
    const LOG2_TINY: f64 = f64::from_bits(0x398f97b57a079a19);
557
0
    let yh = dd_fmla(-k as f64, LOG2_H, x_dd.hi);
558
    /* since |xh+xl| >= 2.92 we have |k| >= 4;
559
    (|k|-1/2)*log(2) <= |x| <= (|k|+1/2)*log(2) thus
560
    1-1/(2|k|) <= |x/(k*log(2))| <= 1+1/(2|k|) thus by Sterbenz theorem
561
    yh is exact too */
562
0
    let mut t = DoubleDouble::from_full_exact_add(-k as f64 * LOG2_L, x_dd.lo);
563
0
    let mut y_dd = DoubleDouble::from_exact_add(yh, t.hi);
564
0
    y_dd.lo = dd_fmla(-k as f64, LOG2_TINY, y_dd.lo + t.lo);
565
    /* now yh+yl approximates xh + xl - k*log(2), and we approximate p(yh+yl)
566
    in h + l */
567
    /* Since |xh| <= 742, we assume |xl| <= ulp(742) = 2^-43. Then since
568
       |k| <= round(742/log(2)) = 1070, |yl| <= 1070*LOG2L + 2^-42 < 2^-42.7.
569
       Since |yh| <= log(2)/2, the contribution of yl is negligible as long
570
       as |i*p[i]*yh^(i-1)*yl| < 2^-104, which holds for i >= 16.
571
       Thus for coefficients of degree 16 or more, we don't take yl into account.
572
    */
573
0
    let mut h = f64::from_bits(E2[19 + 8]); // degree 19
574
0
    for a in (16..=18).rev() {
575
0
        h = dd_fmla(h, y_dd.hi, f64::from_bits(E2[a + 8])); // degree i
576
0
    }
577
    /* degree 15: h*(yh+yl)+E2[15 + 8] */
578
0
    t = DoubleDouble::from_exact_mult(h, y_dd.hi);
579
0
    t.lo = dd_fmla(h, y_dd.lo, t.lo);
580
0
    let mut v = DoubleDouble::from_exact_add(f64::from_bits(E2[15 + 8]), t.hi);
581
0
    v.lo += t.lo;
582
0
    for a in (8..=14).rev() {
583
0
        /* degree i: (h+l)*(yh+yl)+E2[i+8] */
584
0
        t = DoubleDouble::quick_mult(v, y_dd);
585
0
        v = DoubleDouble::from_exact_add(f64::from_bits(E2[a + 8]), t.hi);
586
0
        v.lo += t.lo;
587
0
    }
588
0
    for a in (0..=7).rev() {
589
0
        /* degree i: (h+l)*(yh+yl)+E2[2i]+E2[2i+1] */
590
0
        t = DoubleDouble::quick_mult(v, y_dd);
591
0
        v = DoubleDouble::from_exact_add(f64::from_bits(E2[2 * a]), t.hi);
592
0
        v.lo += t.lo + f64::from_bits(E2[2 * a + 1]);
593
0
    }
594
595
0
    Exp { e: k, result: v }
596
0
}
597
598
#[cold]
599
0
fn erfc_asympt_accurate(x: f64) -> f64 {
600
    /* subnormal exceptions */
601
0
    if x == f64::from_bits(0x403a8f7bfbd15495) {
602
0
        return dd_fmla(
603
0
            f64::from_bits(0x0000000000000001),
604
            -0.25,
605
0
            f64::from_bits(0x000667bd620fd95b),
606
        );
607
0
    }
608
0
    let u_dd = DoubleDouble::from_exact_mult(x, x);
609
0
    let exp_result = exp_accurate(DoubleDouble::new(-u_dd.lo, -u_dd.hi));
610
611
    /* compute 1/x as double-double */
612
0
    let yh = 1.0 / x;
613
    /* Newton's iteration for 1/x is y -> y + y*(1-x*y) */
614
0
    let yl = yh * dd_fmla(-x, yh, 1.0);
615
    // yh+yl approximates 1/x
616
    static THRESHOLD: [u64; 10] = [
617
        0x3fb4500000000000,
618
        0x3fbe000000000000,
619
        0x3fc3f00000000000,
620
        0x3fc9500000000000,
621
        0x3fcf500000000000,
622
        0x3fd3100000000000,
623
        0x3fd7100000000000,
624
        0x3fdbc00000000000,
625
        0x3fe0b00000000000,
626
        0x3fe3000000000000,
627
    ];
628
0
    let mut i = 0usize;
629
0
    while i < THRESHOLD.len() && yh > f64::from_bits(THRESHOLD[i]) {
630
0
        i += 1;
631
0
    }
632
0
    let p = ASYMPTOTIC_POLY_ACCURATE[i];
633
0
    let mut u_dd = DoubleDouble::from_exact_mult(yh, yh);
634
0
    u_dd.lo = dd_fmla(2.0 * yh, yl, u_dd.lo);
635
    /* the polynomial p has degree 29+2i, and its coefficient of largest
636
    degree is p[14+6+i] */
637
0
    let mut z_dd = DoubleDouble::new(0., f64::from_bits(p[14 + 6 + i]));
638
0
    for a in (13..=27 + 2 * i).rev().step_by(2) {
639
0
        /* degree j: (zh+zl)*(uh+ul)+p[(j-1)/2+6]] */
640
0
        let v = DoubleDouble::quick_mult(z_dd, u_dd);
641
0
        z_dd = DoubleDouble::from_full_exact_add(f64::from_bits(p[(a - 1) / 2 + 6]), v.hi);
642
0
        z_dd.lo += v.lo;
643
0
    }
644
0
    for a in (1..=11).rev().step_by(2) {
645
0
        let v = DoubleDouble::quick_mult(z_dd, u_dd);
646
0
        z_dd = DoubleDouble::from_full_exact_add(f64::from_bits(p[a - 1]), v.hi);
647
0
        z_dd.lo += v.lo + f64::from_bits(p[a]);
648
0
    }
649
650
    /* multiply by yh+yl */
651
0
    u_dd = DoubleDouble::quick_mult(z_dd, DoubleDouble::new(yl, yh));
652
    /* now uh+ul approximates p(1/x), i.e., erfc(x)*exp(x^2) */
653
    /* now multiply (uh+ul)*(eh+el), after normalizing uh+ul to reduce the
654
    number of exceptional cases */
655
0
    u_dd = DoubleDouble::from_exact_add(u_dd.hi, u_dd.lo);
656
0
    let v = DoubleDouble::quick_mult(u_dd, exp_result.result);
657
    /* multiply by 2^e */
658
    /* multiply by 2^e */
659
0
    let mut res = ldexp(v.to_f64(), exp_result.e);
660
0
    if res < f64::from_bits(0x0010000000000000) {
661
0
        /* for erfc(x) in the subnormal range, we have to perform a special
662
0
        rounding */
663
0
        let mut corr = v.hi - ldexp(res, -exp_result.e);
664
0
        corr += v.lo;
665
0
        /* add corr*2^e */
666
0
        res += ldexp(corr, exp_result.e);
667
0
    }
668
0
    res
669
0
}
670
671
#[cold]
672
0
fn erfc_accurate(x: f64) -> f64 {
673
0
    if x < 0. {
674
0
        let mut v_dd = erf_accurate(-x);
675
0
        let t = DoubleDouble::from_exact_add(1.0, v_dd.hi);
676
0
        v_dd.hi = t.hi;
677
0
        v_dd.lo += t.lo;
678
0
        return v_dd.to_f64();
679
0
    } else if x <= f64::from_bits(0x3ffb59ffb450828c) {
680
        // erfc(x) >= 2^-6
681
0
        let mut v_dd = erf_accurate(x);
682
0
        let t = DoubleDouble::from_exact_add(1.0, -v_dd.hi);
683
0
        v_dd.hi = t.hi;
684
0
        v_dd.lo = t.lo - v_dd.lo;
685
0
        return v_dd.to_f64();
686
0
    }
687
    // now 0x1.b59ffb450828cp+0 < x < 0x1.b39dc41e48bfdp+4
688
0
    erfc_asympt_accurate(x)
689
0
}
690
691
/* Fast path for 0x1.713786d9c7c09p+1 < x < 0x1.b39dc41e48bfdp+4,
692
using the asymptotic formula erfc(x) = exp(-x^2) * p(1/x)*/
693
0
fn erfc_asympt_fast(x: f64) -> Erf {
694
    /* for x >= 0x1.9db1bb14e15cap+4, erfc(x) < 2^-970, and we might encounter
695
    underflow issues in the computation of l, thus we delegate this case
696
    to the accurate path */
697
0
    if x >= f64::from_bits(0x4039db1bb14e15ca) {
698
0
        return Erf {
699
0
            err: 1.0,
700
0
            result: DoubleDouble::default(),
701
0
        };
702
0
    }
703
704
0
    let mut u = DoubleDouble::from_exact_mult(x, x);
705
0
    let e_dd = exp_1(DoubleDouble::new(-u.lo, -u.hi));
706
707
    /* the assumptions from exp_1 are satisfied:
708
    * a_mul ensures |ul| <= ulp(uh), thus |ul/uh| <= 2^-52
709
    * since |x| < 0x1.9db1bb14e15cap+4 we have
710
      |ul| < ulp(0x1.9db1bb14e15cap+4^2) = 2^-43 */
711
    /* eh+el approximates exp(-x^2) with maximal relative error 2^-74.139 */
712
713
    /* compute 1/x as double-double */
714
0
    let yh = 1.0 / x;
715
    /* Assume 1 <= x < 2, then 0.5 <= yh <= 1,
716
    and yh = 1/x + eps with |eps| <= 2^-53. */
717
    /* Newton's iteration for 1/x is y -> y + y*(1-x*y) */
718
0
    let yl = yh * dd_fmla(-x, yh, 1.0);
719
    /* x*yh-1 = x*(1/x+eps)-1 = x*eps
720
       with |x*eps| <= 2^-52, thus the error on the FMA is bounded by
721
       ulp(2^-52.1) = 2^-105.
722
       Now |yl| <= |yh| * 2^-52 <= 2^-52, thus the rounding error on
723
       yh * __builtin_fma (-x, yh, 1.0) is bounded also by ulp(2^-52.1) = 2^-105.
724
       From [6], Lemma 3.7, if yl was computed exactly, then yh+yl would differ
725
       from 1/x by at most yh^2/theta^3*(1/x-yh)^2 for some theta in [yh,1/x]
726
       or [1/x,yh].
727
       Since yh, 1/x <= 1, this gives eps^2 <= 2^-106.
728
       Adding the rounding errors, we have:
729
       |yh + yl - 1/x| <= 2^-105 + 2^-105 + 2^-106 < 2^-103.67.
730
       For the relative error, since |yh| >= 1/2, this gives:
731
       |yh + yl - 1/x| < 2^-102.67 * |yh+yl|
732
    */
733
    const THRESHOLD: [u64; 6] = [
734
        0x3fbd500000000000,
735
        0x3fc59da6ca291ba6,
736
        0x3fcbc00000000000,
737
        0x3fd0c00000000000,
738
        0x3fd3800000000000,
739
        0x3fd6300000000000,
740
    ];
741
0
    let mut i = 0usize;
742
0
    while i < THRESHOLD.len() && yh > f64::from_bits(THRESHOLD[i]) {
743
0
        i += 1;
744
0
    }
745
0
    let p = ASYMPTOTIC_POLY[i];
746
0
    u = DoubleDouble::from_exact_mult(yh, yh);
747
    /* Since |yh| <= 1, we have |uh| <= 1 and |ul| <= 2^-53. */
748
0
    u.lo = dd_fmla(2.0 * yh, yl, u.lo);
749
    /* uh+ul approximates (yh+yl)^2, with absolute error bounded by
750
       ulp(ul) + yl^2, where ulp(ul) is the maximal rounding error in
751
       the FMA, and yl^2 is the neglected term.
752
       Since |ul| <= 2^-53, ulp(ul) <= 2^-105, and since |yl| <= 2^-52,
753
       this yields |uh + ul - yh^2| <= 2^-105 + 2^-104 < 2^-103.41.
754
       For the relative error, since |(yh+yl)^2| >= 1/4:
755
       |uh + ul - yh^2| < 2^-101.41 * |uh+ul|.
756
       And relatively to 1/x^2:
757
       yh + yl = 1/x * (1 + eps1)       with |eps1| < 2^-102.67
758
       uh + ul = (yh+yl)^2 * (1 + eps2) with |eps2| < 2^-101.41
759
       This yields:
760
       |uh + ul - 1/x| < 2^-100.90 * |uh+ul|.
761
    */
762
763
    /* evaluate p(uh+ul) */
764
0
    let mut zh: f64 = f64::from_bits(p[12]); // degree 23
765
0
    zh = dd_fmla(zh, u.hi, f64::from_bits(p[11])); // degree 21
766
0
    zh = dd_fmla(zh, u.hi, f64::from_bits(p[10])); // degree 19
767
768
    /* degree 17: zh*(uh+ul)+p[i] */
769
0
    let mut v = DoubleDouble::quick_f64_mult(zh, u);
770
0
    let mut z_dd = DoubleDouble::from_exact_add(f64::from_bits(p[9]), v.hi);
771
0
    z_dd.lo += v.lo;
772
773
0
    for a in (3..=15).rev().step_by(2) {
774
0
        v = DoubleDouble::quick_mult(z_dd, u);
775
0
        z_dd = DoubleDouble::from_exact_add(f64::from_bits(p[((a + 1) / 2) as usize]), v.hi);
776
0
        z_dd.lo += v.lo;
777
0
    }
778
779
    /* degree 1: (zh+zl)*(uh+ul)+p[0]+p[1] */
780
0
    v = DoubleDouble::quick_mult(z_dd, u);
781
0
    z_dd = DoubleDouble::from_exact_add(f64::from_bits(p[0]), v.hi);
782
0
    z_dd.lo += v.lo + f64::from_bits(p[1]);
783
    /* multiply by yh+yl */
784
0
    u = DoubleDouble::quick_mult(z_dd, DoubleDouble::new(yl, yh));
785
    /* now uh+ul approximates p(1/x) */
786
    /* now multiply (uh+ul)*(eh+el) */
787
0
    v = DoubleDouble::quick_mult(u, e_dd);
788
789
    /* Write y = 1/x.  We have the following errors:
790
       * the maximal mathematical error is:
791
         |erfc(x)*exp(x^2) - p(y)| < 2^-71.804 * |p(y)| (for i=3) thus
792
         |erfc(x) - exp(-x^2)*p(y)| < 2^-71.804 * |exp(-x^2)*p(y)|
793
       * the error in approximating exp(-x^2) by eh+el:
794
         |eh + el - exp(-x^2)| < 2^-74.139 * |eh + el|
795
       * the fact that we evaluate p on yh+yl instead of 1/x
796
         this error is bounded by |p'| * |yh+yl - 1/x|, where
797
         |yh+yl - 1/x| < 2^-102.67 * |yh+yl|, and the relative
798
         error is bounded by |p'/p| * |yh+yl - 1/x|.
799
         Since the maximal value of |p'/p| is bounded by 27.2 (for i=0),
800
         this yields 27.2 * 2^-102.67 < 2^-97.9
801
       * the rounding errors when evaluating p on yh+yl: this error is bounded
802
         (relatively) by 2^-67.184 (for i=5), see analyze_erfc_asympt_fast()
803
         in erfc.sage
804
       * the rounding error in (uh+ul)*(eh+el): we assume this error is bounded
805
         by 2^-80 (relatively)
806
       This yields a global relative bound of:
807
       (1+2^-71.804)*(1+2^-74.139)*(1+2^-97.9)*(1+2^-67.184)*(1+2^-80)-1
808
       < 2^-67.115
809
    */
810
0
    if v.hi >= f64::from_bits(0x044151b9a3fdd5c9) {
811
0
        Erf {
812
0
            err: f64::from_bits(0x3bbd900000000000) * v.hi,
813
0
            result: v,
814
0
        } /* 2^-67.115 < 0x1.d9p-68 */
815
    } else {
816
0
        Erf {
817
0
            result: v,
818
0
            err: f64::from_bits(0x0010000000000000),
819
0
        } // this overestimates 0x1.d9p-68 * h
820
    }
821
0
}
822
823
#[inline]
824
0
fn erfc_fast(x: f64) -> Erf {
825
0
    if x < 0.
826
    // erfc(x) = 1 - erf(x) = 1 + erf(-x)
827
    {
828
0
        let res = erf_fast(-x);
829
        /* h+l approximates erf(-x), with relative error bounded by err,
830
        where err <= 0x1.78p-69 */
831
0
        let err = res.err * res.result.hi; /* convert into absolute error */
832
0
        let mut t = DoubleDouble::from_exact_add(1.0, res.result.hi);
833
0
        t.lo += res.result.lo;
834
        // since h <= 2, the fast_two_sum() error is bounded by 2^-105*h <= 2^-104
835
        /* After the fast_two_sum() call, we have |t| <= ulp(h) <= ulp(2) = 2^-51
836
        thus assuming |l| <= 2^-51 after the cr_erf_fast() call,
837
        we have |t| <= 2^-50 here, thus the rounding
838
        error on t -= *l is bounded by ulp(2^-50) = 2^-102.
839
        The absolute error is thus bounded by err + 2^-104 + 2^-102
840
        = err + 0x1.4p-102.
841
        The maximal value of err here is for |x| < 0.0625, where cr_erf_fast()
842
        returns 0x1.78p-69, and h=1/2, yielding err = 0x1.78p-70 here.
843
        Adding 0x1.4p-102 is thus exact. */
844
0
        return Erf {
845
0
            err: err + f64::from_bits(0x3994000000000000),
846
0
            result: t,
847
0
        };
848
0
    } else if x <= f64::from_bits(0x400713786d9c7c09) {
849
0
        let res = erf_fast(x);
850
        /* h+l approximates erf(x), with relative error bounded by err,
851
        where err <= 0x1.78p-69 */
852
0
        let err = res.err * res.result.hi; /* convert into absolute error */
853
0
        let mut t = DoubleDouble::from_exact_add(1.0, -res.result.hi);
854
0
        t.lo -= res.result.lo;
855
        /* for x >= 0x1.e861fbb24c00ap-2, erf(x) >= 1/2, thus 1-h is exact
856
        by Sterbenz theorem, thus t = 0 in fast_two_sum(), and we have t = -l
857
        here, thus the absolute error is err */
858
0
        if x >= f64::from_bits(0x3fde861fbb24c00a) {
859
0
            return Erf { err, result: t };
860
0
        }
861
        /* for x < 0x1.e861fbb24c00ap-2, the error in fast_two_sum() is bounded
862
        by 2^-105*h, and since h <= 1/2, this yields 2^-106.
863
        After the fast_two_sum() call, we have |t| <= ulp(h) <= ulp(1/2) = 2^-53
864
        thus assuming |l| <= 2^-53 after the cr_erf_fast() call,
865
        we have |t| <= 2^-52 here, thus the rounding
866
        error on t -= *l is bounded by ulp(2^-52) = 2^-104.
867
        The absolute error is thus bounded by err + 2^-106 + 2^-104
868
        The maximal value of err here is for x < 0.0625, where cr_erf_fast()
869
        returns 0x1.78p-69, and h=1/2, yielding err = 0x1.78p-70 here.
870
        Adding 0x1.4p-104 is thus exact. */
871
0
        return Erf {
872
0
            err: err + f64::from_bits(0x3974000000000000),
873
0
            result: t,
874
0
        };
875
0
    }
876
    /* Now THRESHOLD1 < x < 0x1.b39dc41e48bfdp+4 thus erfc(x) < 0.000046. */
877
    /* on a i7-8700 with gcc 12.2.0, for x in [THRESHOLD1,+5.0],
878
    the average reciprocal throughput is about 111 cycles
879
    (among which 20 cycles for exp_1) */
880
0
    erfc_asympt_fast(x)
881
0
}
882
883
/// Complementary error function
884
///
885
/// Max ulp 0.5
886
0
pub fn f_erfc(x: f64) -> f64 {
887
0
    let t: u64 = x.to_bits();
888
0
    let at: u64 = t & 0x7fff_ffff_ffff_ffff;
889
890
0
    if t >= 0x8000000000000000u64
891
    // x = -NaN or x <= 0 (excluding +0)
892
    {
893
        // for x <= -0x1.7744f8f74e94bp2, erfc(x) rounds to 2 (to nearest)
894
0
        if t >= 0xc017744f8f74e94bu64
895
        // x = NaN or x <= -0x1.7744f8f74e94bp2
896
        {
897
0
            if t >= 0xfff0000000000000u64 {
898
                // -Inf or NaN
899
0
                if t == 0xfff0000000000000u64 {
900
0
                    return 2.0;
901
0
                } // -Inf
902
0
                return x + x; // NaN
903
0
            }
904
0
            return black_box(2.0) - black_box(f64::from_bits(0x3c90000000000000)); // rounds to 2 or below(2)
905
0
        }
906
907
        // for -9.8390953768041405e-17 <= x <= 0, erfc(x) rounds to 1 (to nearest)
908
0
        if f64::from_bits(0xbc9c5bf891b4ef6a) <= x {
909
0
            return dd_fmla(-x, f64::from_bits(0x3c90000000000000), 1.0);
910
0
        }
911
    } else
912
    // x = +NaN or x >= 0 (excluding -0)
913
    {
914
        // for x >= 0x1.b39dc41e48bfdp+4, erfc(x) < 2^-1075: rounds to 0 or 2^-1074
915
0
        if at >= 0x403b39dc41e48bfdu64
916
        // x = NaN or x >= 0x1.b39dc41e48bfdp+4
917
        {
918
0
            if at >= 0x7ff0000000000000u64 {
919
                // +Inf or NaN
920
0
                if at == 0x7ff0000000000000u64 {
921
0
                    return 0.0;
922
0
                } // +Inf
923
0
                return x + x; // NaN
924
0
            }
925
0
            return black_box(f64::from_bits(0x0000000000000001)) * black_box(0.25); // 0 or 2^-1074 wrt rounding
926
0
        }
927
928
        // for 0 <= x <= 0x1.c5bf891b4ef6ap-55, erfc(x) rounds to 1 (to nearest)
929
0
        if x <= f64::from_bits(0x3c8c5bf891b4ef6a) {
930
0
            return dd_fmla(-x, f64::from_bits(0x3c90000000000000), 1.0);
931
0
        }
932
    }
933
934
    /* now -0x1.7744f8f74e94bp+2 < x < -0x1.c5bf891b4ef6ap-54
935
    or 0x1.c5bf891b4ef6ap-55 < x < 0x1.b39dc41e48bfdp+4 */
936
937
0
    let result = erfc_fast(x);
938
939
0
    let left = result.result.hi + (result.result.lo - result.err);
940
0
    let right = result.result.hi + (result.result.lo + result.err);
941
0
    if left == right {
942
0
        return left;
943
0
    }
944
0
    erfc_accurate(x)
945
0
}
946
947
#[cfg(test)]
948
mod tests {
949
    use super::*;
950
    #[test]
951
    fn test_erfc() {
952
        assert_eq!(f_erfc(1.0), 0.15729920705028513);
953
        assert_eq!(f_erfc(0.5), 0.4795001221869535);
954
        assert_eq!(f_erfc(0.000000005), 0.9999999943581042);
955
        assert_eq!(f_erfc(-0.00000000000065465465423305), 1.0000000000007387);
956
        assert!(f_erfc(f64::NAN).is_nan());
957
        assert_eq!(f_erfc(f64::INFINITY), 0.0);
958
        assert_eq!(f_erfc(f64::NEG_INFINITY), 2.0);
959
    }
960
}