/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.29/src/err/erfc.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::dd_fmla; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::err::erf::{Erf, erf_accurate, erf_fast}; |
32 | | use crate::exponents::{EXP_REDUCE_T0, EXP_REDUCE_T1, ldexp}; |
33 | | use crate::rounding::CpuRoundTiesEven; |
34 | | use std::hint::black_box; |
35 | | |
36 | | static ASYMPTOTIC_POLY: [[u64; 13]; 6] = [ |
37 | | [ |
38 | | 0x3fe20dd750429b6d, |
39 | | 0x3c61a1feb75a48a8, |
40 | | 0xbfd20dd750429b6c, |
41 | | 0x3fdb14c2f863e403, |
42 | | 0xbff0ecf9db3af35d, |
43 | | 0x400d9eb53ca6eeed, |
44 | | 0xc030a945830d95c8, |
45 | | 0x4056e8a963e2f1f5, |
46 | | 0xc0829b7ccc8f396f, |
47 | | 0x40b15e716e83c27e, |
48 | | 0xc0e1cfdcfbcaf22a, |
49 | | 0x4111986cc7a7e8fe, |
50 | | 0xc1371f7540590a91, |
51 | | ], |
52 | | [ |
53 | | 0x3fe20dd750429ae7, |
54 | | 0x3c863da89e801fd4, |
55 | | 0xbfd20dd750400795, |
56 | | 0x3fdb14c2f57c490c, |
57 | | 0xbff0ecf95c8c9014, |
58 | | 0x400d9e981f2321ef, |
59 | | 0xc030a81482de1506, |
60 | | 0x4056d662420a604b, |
61 | | 0xc08233c96fff7772, |
62 | | 0x40af5d62018d3e37, |
63 | | 0xc0d9ae55e9554450, |
64 | | 0x410052901e10d139, |
65 | | 0xc1166465df1385f0, |
66 | | ], |
67 | | [ |
68 | | 0x3fe20dd75041e3fc, |
69 | | 0xbc7c9b491c4920fc, |
70 | | 0xbfd20dd74e5f1526, |
71 | | 0x3fdb14c1d35a40e0, |
72 | | 0xbff0ecdecd30e86b, |
73 | | 0x400d9b4e7f725263, |
74 | | 0xc030958b5ca8fb39, |
75 | | 0x40563e3179bf609c, |
76 | | 0xc0806bbd1cd2d0fd, |
77 | | 0x40a7b66eb6d1d2f2, |
78 | | 0xc0cce5a4b1afab75, |
79 | | 0x40e8b5c6ae6f773c, |
80 | | 0xc0f5475860326f86, |
81 | | ], |
82 | | [ |
83 | | 0x3fe20dd75025cfe9, |
84 | | 0x3c55a92eef32fb20, |
85 | | 0xbfd20dd71eb9d4e7, |
86 | | 0x3fdb14af4c25db28, |
87 | | 0xbff0ebc78a22b3d8, |
88 | | 0x400d85287a0b3399, |
89 | | 0xc03045f751e5ca1d, |
90 | | 0x4054a0d87ddea589, |
91 | | 0xc07ac6a0981d1eee, |
92 | | 0x409f44822f567956, |
93 | | 0xc0bcba372de71349, |
94 | | 0x40d1a4a19f550ca4, |
95 | | 0xc0d52a580455ed79, |
96 | | ], |
97 | | [ |
98 | | 0x3fe20dd74eb31d84, |
99 | | 0xbc439c4054b7c090, |
100 | | 0xbfd20dd561af98c4, |
101 | | 0x3fdb1435165d9df1, |
102 | | 0xbff0e6b60308e940, |
103 | | 0x400d3ce30c140882, |
104 | | 0xc02f2083e404c299, |
105 | | 0x40520f113d89b42a, |
106 | | 0xc0741433ebd89f19, |
107 | | 0x4092f35b6a3154f6, |
108 | | 0xc0ab020a4313cf3b, |
109 | | 0x40b90f07e92da7ee, |
110 | | 0xc0b6565e1d7665c3, |
111 | | ], |
112 | | [ |
113 | | 0x3fe20dd744b3517b, |
114 | | 0xbc6f77ab25e01ab4, |
115 | | 0xbfd20dcc62ec4024, |
116 | | 0x3fdb125bfa4f66c1, |
117 | | 0xbff0d80e65381970, |
118 | | 0x400ca11fbcfa65b2, |
119 | | 0xc02cd9eaffb88315, |
120 | | 0x404e010db42e0da7, |
121 | | 0xc06c5c85250ef6a3, |
122 | | 0x4085e118d9c1eeaf, |
123 | | 0xc098d74be13d3d30, |
124 | | 0x40a211b1b2b5ac83, |
125 | | 0xc09900be759fc663, |
126 | | ], |
127 | | ]; |
128 | | |
129 | | static ASYMPTOTIC_POLY_ACCURATE: [[u64; 30]; 10] = [ |
130 | | [ |
131 | | 0x3fe20dd750429b6d, |
132 | | 0x3c61ae3a912b08f0, |
133 | | 0xbfd20dd750429b6d, |
134 | | 0xbc51ae34c0606d68, |
135 | | 0x3fdb14c2f863e924, |
136 | | 0xbc796c0f4c848fc8, |
137 | | 0xbff0ecf9db3e71b6, |
138 | | 0x3c645d756bd288b0, |
139 | | 0x400d9eb53fad4672, |
140 | | 0xbcac61629de9adf2, |
141 | | 0xc030a945f3d147ea, |
142 | | 0x3cb8fec5ad7ece20, |
143 | | 0x4056e8c02f27ca6d, |
144 | | 0xc0829d1c21c363e0, |
145 | | 0x40b17349b70be627, |
146 | | 0xc0e28a6bb4686182, |
147 | | 0x411602d1662523ca, |
148 | | 0xc14ccae7625c4111, |
149 | | 0x4184237d064f6e0d, |
150 | | 0xc1bb1e5466ca3a2f, |
151 | | 0x41e90ae06a0f6cc1, |
152 | | 0, |
153 | | 0, |
154 | | 0, |
155 | | 0, |
156 | | 0, |
157 | | 0, |
158 | | 0, |
159 | | 0, |
160 | | 0, |
161 | | ], |
162 | | [ |
163 | | 0x3fe20dd750429b6d, |
164 | | 0x3c61adaa62435c10, |
165 | | 0xbfd20dd750429b6d, |
166 | | 0xbc441516126827c8, |
167 | | 0x3fdb14c2f863e90b, |
168 | | 0x3c7a535780ba5ed4, |
169 | | 0xbff0ecf9db3e65d6, |
170 | | 0xbc9089edde27ad07, |
171 | | 0x400d9eb53fa52f20, |
172 | | 0xbcabc9737e9464ac, |
173 | | 0xc030a945f2cd7621, |
174 | | 0xbcc589f28b700332, |
175 | | 0x4056e8bffd7e194e, |
176 | | 0xc0829d18716876e2, |
177 | | 0x40b17312abe18250, |
178 | | 0xc0e287e73592805c, |
179 | | 0x4115ebf7394a39c1, |
180 | | 0xc14c2f14d46d0cf9, |
181 | | 0x4182af3d256f955e, |
182 | | 0xc1b7041659ebd7aa, |
183 | | 0x41e6039c232e2f71, |
184 | | 0xc2070ca15c5a07cb, |
185 | | 0, |
186 | | 0, |
187 | | 0, |
188 | | 0, |
189 | | 0, |
190 | | 0, |
191 | | 0, |
192 | | 0, |
193 | | ], |
194 | | [ |
195 | | 0x3fe20dd750429b6d, |
196 | | 0x3c5d3c35b5d37410, |
197 | | 0xbfd20dd750429b56, |
198 | | 0xbc7c028415f6f81b, |
199 | | 0x3fdb14c2f863c1cf, |
200 | | 0x3c51bb0de6470dbc, |
201 | | 0xbff0ecf9db33c363, |
202 | | 0x3c80f8068459eb16, |
203 | | 0x400d9eb53b9ce57b, |
204 | | 0x3ca20cce33e7d84a, |
205 | | 0xc030a945aa2ec4fa, |
206 | | 0xbcdf6e0fcd7c6030, |
207 | | 0x4056e8b824d2bfaa, |
208 | | 0xc0829cc372a6d0b0, |
209 | | 0x40b1703a99ddd429, |
210 | | 0xc0e2749f9a267cc6, |
211 | | 0x4115856a17271849, |
212 | | 0xc14a8bcb4ba9753f, |
213 | | 0x418035dcce882940, |
214 | | 0xc1b1e5d8c5e6e043, |
215 | | 0x41dfe3b4f365386e, |
216 | | 0xc20398fdef2b98fe, |
217 | | 0x42184234d4f4ea12, |
218 | | 0, |
219 | | 0, |
220 | | 0, |
221 | | 0, |
222 | | 0, |
223 | | 0, |
224 | | 0, |
225 | | ], |
226 | | [ |
227 | | 0x3fe20dd750429b6a, |
228 | | 0x3c8ae622b765e9fd, |
229 | | 0xbfd20dd750428f0e, |
230 | | 0x3c703c6c67d69513, |
231 | | 0x3fdb14c2f8563e8e, |
232 | | 0x3c6766a6bd7aa89c, |
233 | | 0xbff0ecf9d8dedd48, |
234 | | 0x3c90af52e90336e3, |
235 | | 0x400d9eb4aad086fe, |
236 | | 0x3ca640d371d54a19, |
237 | | 0xc030a93f1d01cfe0, |
238 | | 0xbcc68dbd8d9c522c, |
239 | | 0x4056e842e9fd5898, |
240 | | 0xc08299886ef1fb80, |
241 | | 0x40b15e0f0162c9a0, |
242 | | 0xc0e222dbc6b04cd8, |
243 | | 0x411460268db1ebdf, |
244 | | 0xc1474f53ce065fb3, |
245 | | 0x417961ca8553f870, |
246 | | 0xc1a8788395d13798, |
247 | | 0x41d35e37b25d0e81, |
248 | | 0xc1f707b7457c8f5e, |
249 | | 0x4211ff852df1c023, |
250 | | 0xc21b75d0ec56e2cd, |
251 | | 0, |
252 | | 0, |
253 | | 0, |
254 | | 0, |
255 | | 0, |
256 | | 0, |
257 | | ], |
258 | | [ |
259 | | 0x3fe20dd750429a8f, |
260 | | 0xbc766d8dda59bcea, |
261 | | 0xbfd20dd7503fdbab, |
262 | | 0x3c6707bdffc2b3fe, |
263 | | 0x3fdb14c2f6526025, |
264 | | 0xbc27fa4bb9541140, |
265 | | 0xbff0ecf99c417d45, |
266 | | 0xbc9748645ef7af94, |
267 | | 0x400d9eaa9c712a7d, |
268 | | 0x3ca79e478994ebb4, |
269 | | 0xc030a8ef11fbf141, |
270 | | 0x3cbb5c72d69f8954, |
271 | | 0x4056e4653e0455b1, |
272 | | 0xc08286909448e6cf, |
273 | | 0x40b113424ce76821, |
274 | | 0xc0e1346d859e76de, |
275 | | 0x4111f9f6cf2293bf, |
276 | | 0xc14258e6e3b337db, |
277 | | 0x41714029ecd465fb, |
278 | | 0xc19c530df5337a6f, |
279 | | 0x41c34bc4bbccd336, |
280 | | 0xc1e4a37c52641688, |
281 | | 0x420019707cec2974, |
282 | | 0xc21031fe736ea169, |
283 | | 0x420f6b3003de3ddf, |
284 | | 0, |
285 | | 0, |
286 | | 0, |
287 | | 0, |
288 | | 0, |
289 | | ], |
290 | | [ |
291 | | 0x3fe20dd75042756b, |
292 | | 0x3c84ad9178b56910, |
293 | | 0xbfd20dd74feda9e8, |
294 | | 0xbc78141c70bbc8d6, |
295 | | 0x3fdb14c2cb128467, |
296 | | 0xbc709aebaa106821, |
297 | | 0xbff0ecf603921a0b, |
298 | | 0x3c97d3cb5bceaf0b, |
299 | | 0x400d9e3e1751ca59, |
300 | | 0x3c76622ae5642670, |
301 | | 0xc030a686af57f547, |
302 | | 0x3cc083b320aff6b6, |
303 | | 0x4056cf0b6c027326, |
304 | | 0xc0823afcb69443d3, |
305 | | 0x40b03ab450d9f1b9, |
306 | | 0xc0de74cdb76bcab4, |
307 | | 0x410c671b60e607f1, |
308 | | 0xc138f1376d324ce4, |
309 | | 0x4163b64276234676, |
310 | | 0xc18aff0ce13c5a8e, |
311 | | 0x41aef20247251e87, |
312 | | 0xc1cc9f5662f721f6, |
313 | | 0x41e4687858e185e1, |
314 | | 0xc1f4fa507be073c2, |
315 | | 0x41fb99ac35ee4acc, |
316 | | 0xc1f16cb585ee3fa9, |
317 | | 0, |
318 | | 0, |
319 | | 0, |
320 | | 0, |
321 | | ], |
322 | | [ |
323 | | 0x3fe20dd7503e730d, |
324 | | 0x3c84e524a098a467, |
325 | | 0xbfd20dd7498fa6b2, |
326 | | 0x3c260a4e27751c80, |
327 | | 0x3fdb14c061bd2a0c, |
328 | | 0x3c695a8f847d2fc2, |
329 | | 0xbff0ecd0f11b8c7d, |
330 | | 0xbc94126deea76061, |
331 | | 0x400d9b1344463548, |
332 | | 0x3cafe09a4eca9b0e, |
333 | | 0xc030996ea52a87ed, |
334 | | 0xbca924f920db26c0, |
335 | | 0x40567a2264b556b0, |
336 | | 0xc0815dfc2c86b6b5, |
337 | | 0x40accc291b62efe4, |
338 | | 0xc0d81375a78e746a, |
339 | | 0x41033a6f15546329, |
340 | | 0xc12c1e9dc1216010, |
341 | | 0x4152397ea3d43fda, |
342 | | 0xc174661e5b2ea512, |
343 | | 0x4193412367ca5d45, |
344 | | 0xc1ade56b9d7f37c4, |
345 | | 0x41c2851d9722146d, |
346 | | 0xc1d19027baf0c3fe, |
347 | | 0x41d7e7b8b6ab58ac, |
348 | | 0xc1d4c446d56aaf22, |
349 | | 0x41c1492190400505, |
350 | | 0, |
351 | | 0, |
352 | | 0, |
353 | | ], |
354 | | [ |
355 | | 0x3fe20dd74ff10852, |
356 | | 0x3c8a32f26deff875, |
357 | | 0xbfd20dd6f06c491c, |
358 | | 0x3c770c16e1793358, |
359 | | 0x3fdb14a7d5e7fd4a, |
360 | | 0x3c7479998b54db5b, |
361 | | 0xbff0ebbdb3889c5f, |
362 | | 0xbc759b853e11369c, |
363 | | 0x400d89dd249d7ef8, |
364 | | 0xbc84b5edf0c8c314, |
365 | | 0xc0306526fb386114, |
366 | | 0xbc840d04eed7c7e0, |
367 | | 0x40557ff657e429ce, |
368 | | 0xc07ef63e90d38630, |
369 | | 0x40a6d4f34c4ea3da, |
370 | | 0xc0d04542b9e36a54, |
371 | | 0x40f577bf19097738, |
372 | | 0xc119702fe47c736d, |
373 | | 0x413a7ae12b54fdc6, |
374 | | 0xc157ca3f0f7c4fa9, |
375 | | 0x417225d983963cbf, |
376 | | 0xc1871a6eac612f9e, |
377 | | 0x4198086324225e1e, |
378 | | 0xc1a3de68670a7716, |
379 | | 0x41a91674de4dcbe9, |
380 | | 0xc1a6b44cc15b76c2, |
381 | | 0x419a36dae0f30d80, |
382 | | 0xc17cffc1747ea3dc, |
383 | | 0, |
384 | | 0, |
385 | | ], |
386 | | [ |
387 | | 0x3fe20dd74ba8f300, |
388 | | 0xbc59dd256871d210, |
389 | | 0xbfd20dd3593675bc, |
390 | | 0x3c7ec0e7ffa91ad9, |
391 | | 0x3fdb13eef86a077a, |
392 | | 0xbc74fb5d78d411b8, |
393 | | 0xbff0e5cf52a11f3a, |
394 | | 0xbc851f36c779dc8c, |
395 | | 0x400d4417a08b39d5, |
396 | | 0x3c91be9fb5956638, |
397 | | 0xc02f91b9f6ce80c3, |
398 | | 0xbccc9c99dd42829c, |
399 | | 0x405356439f45bb43, |
400 | | 0xc078c0ca12819b48, |
401 | | 0x409efcad2ecd6671, |
402 | | 0xc0c21b0af6fc1039, |
403 | | 0x40e327d215ee30c9, |
404 | | 0xc101fabda96167b0, |
405 | | 0x411d82e4373b315d, |
406 | | 0xc134ed9e2ff591e9, |
407 | | 0x41495c85dcd8eab5, |
408 | | 0xc159f016f0a3d62a, |
409 | | 0x41660e89d918b96f, |
410 | | 0xc16e97be202cba64, |
411 | | 0x4170d8a081619793, |
412 | | 0xc16c5422b4fcfc65, |
413 | | 0x4161131a9dc6aed1, |
414 | | 0xc14a457d9dced257, |
415 | | 0x4123605e980e8b86, |
416 | | 0, |
417 | | ], |
418 | | [ |
419 | | 0x3fe20dd7319d4d25, |
420 | | 0x3c82b02992c3b7ab, |
421 | | 0xbfd20dc29c13ab1b, |
422 | | 0xbc7d78d79b4ad767, |
423 | | 0x3fdb115a57b5ab13, |
424 | | 0xbc6aa8c45be0aa2e, |
425 | | 0xbff0d58ec437efd7, |
426 | | 0xbc5994f00a15e850, |
427 | | 0x400cb1742e229f23, |
428 | | 0xbca8000471d54399, |
429 | | 0xc02d99a5edf7b946, |
430 | | 0xbcbaf76ed7e35cde, |
431 | | 0x4050a8b71058eb28, |
432 | | 0xc072d88289da5bfc, |
433 | | 0x40943ddf24168edb, |
434 | | 0xc0b3e9dfc38b6d1a, |
435 | | 0x40d18d4df97ab3df, |
436 | | 0xc0eb550fc62dcab5, |
437 | | 0x41029cb71f116ed1, |
438 | | 0xc115fc9cc4e854e3, |
439 | | 0x41265915fd0567b1, |
440 | | 0xc1335eb5fca0e46d, |
441 | | 0x413c5261ecc0d789, |
442 | | 0xc14138932dc4eafc, |
443 | | 0x414117d4eb18facd, |
444 | | 0xc13af96163e35eca, |
445 | | 0x4130454a3a63c766, |
446 | | 0xc11c2ebc1d39b44a, |
447 | | 0x40ff3327698e0e6b, |
448 | | 0xc0d094febc3dff35, |
449 | | ], |
450 | | ]; |
451 | | |
452 | | // Approximation for the fast path of exp(z) for z=zh+zl, |
453 | | // with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6 |
454 | | // (assuming x^y does not overflow or underflow) |
455 | | #[inline] |
456 | 0 | fn q_1(z_dd: DoubleDouble) -> DoubleDouble { |
457 | | const C: [u64; 5] = [ |
458 | | 0x3ff0000000000000, |
459 | | 0x3ff0000000000000, |
460 | | 0x3fe0000000000000, |
461 | | 0x3fc5555555995d37, |
462 | | 0x3fa55555558489dc, |
463 | | ]; |
464 | 0 | let z = z_dd.to_f64(); |
465 | 0 | let mut q = dd_fmla(f64::from_bits(C[4]), z_dd.hi, f64::from_bits(C[3])); |
466 | | |
467 | 0 | q = dd_fmla(q, z, f64::from_bits(C[2])); |
468 | | |
469 | 0 | let mut v = DoubleDouble::from_exact_add(f64::from_bits(C[1]), q * z); |
470 | 0 | v = DoubleDouble::quick_mult(z_dd, v); |
471 | 0 | DoubleDouble::f64_add(f64::from_bits(C[0]), v) |
472 | 0 | } |
473 | | |
474 | | #[inline] |
475 | 0 | fn exp_1(x: DoubleDouble) -> DoubleDouble { |
476 | | const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe); /* |INVLOG2-2^12/log(2)| < 2^-43.4 */ |
477 | 0 | let k = (x.hi * INVLOG2).cpu_round_ties_even(); |
478 | | |
479 | | const LOG2_DD: DoubleDouble = DoubleDouble::new( |
480 | | f64::from_bits(0x3bbabc9e3b39803f), |
481 | | f64::from_bits(0x3f262e42fefa39ef), |
482 | | ); |
483 | 0 | let k_dd = DoubleDouble::quick_f64_mult(k, LOG2_DD); |
484 | 0 | let mut y_dd = DoubleDouble::from_exact_add(x.hi - k_dd.hi, x.lo); |
485 | 0 | y_dd.lo -= k_dd.lo; |
486 | | |
487 | 0 | let ki: i64 = k as i64; /* Note: k is an integer, this is just a conversion. */ |
488 | 0 | let mi = (ki >> 12).wrapping_add(0x3ff); |
489 | 0 | let i2: i64 = (ki >> 6) & 0x3f; |
490 | 0 | let i1: i64 = ki & 0x3f; |
491 | 0 | let t1 = DoubleDouble::new( |
492 | 0 | f64::from_bits(EXP_REDUCE_T0[i2 as usize].0), |
493 | 0 | f64::from_bits(EXP_REDUCE_T0[i2 as usize].1), |
494 | | ); |
495 | 0 | let t2 = DoubleDouble::new( |
496 | 0 | f64::from_bits(EXP_REDUCE_T1[i1 as usize].0), |
497 | 0 | f64::from_bits(EXP_REDUCE_T1[i1 as usize].1), |
498 | | ); |
499 | 0 | let mut v = DoubleDouble::quick_mult(t2, t1); |
500 | 0 | let q = q_1(y_dd); |
501 | 0 | v = DoubleDouble::quick_mult(v, q); |
502 | | |
503 | 0 | let scale = f64::from_bits((mi as u64) << 52); |
504 | 0 | v.hi *= scale; |
505 | 0 | v.lo *= scale; |
506 | 0 | v |
507 | 0 | } |
508 | | |
509 | | struct Exp { |
510 | | e: i32, |
511 | | result: DoubleDouble, |
512 | | } |
513 | | |
514 | 0 | fn exp_accurate(x_dd: DoubleDouble) -> Exp { |
515 | | static E2: [u64; 28] = [ |
516 | | 0x3ff0000000000000, |
517 | | 0xb960000000000000, |
518 | | 0x3ff0000000000000, |
519 | | 0xb9be200000000000, |
520 | | 0x3fe0000000000000, |
521 | | 0x3a03c00000000000, |
522 | | 0x3fc5555555555555, |
523 | | 0x3c655555555c78d9, |
524 | | 0x3fa5555555555555, |
525 | | 0x3c455555545616e2, |
526 | | 0x3f81111111111111, |
527 | | 0x3c011110121fc314, |
528 | | 0x3f56c16c16c16c17, |
529 | | 0xbbef49e06ee3a56e, |
530 | | 0x3f2a01a01a01a01a, |
531 | | 0x3b6b053e1eeab9c0, |
532 | | 0x3efa01a01a01a01a, |
533 | | 0x3ec71de3a556c733, |
534 | | 0x3e927e4fb7789f66, |
535 | | 0x3e5ae64567f54abe, |
536 | | 0x3e21eed8eff8958b, |
537 | | 0x3de6124613837216, |
538 | | 0x3da93974aaf26a57, |
539 | | 0x3d6ae7f4fd6d0bd9, |
540 | | 0x3d2ae7e982620b25, |
541 | | 0x3ce94e4ca59460d8, |
542 | | 0x3ca69a2a4b7ef36d, |
543 | | 0x3c6abfe1602308c9, |
544 | | ]; |
545 | | const LOG2INV: f64 = f64::from_bits(0x3ff71547652b82fe); |
546 | 0 | let k: i32 = unsafe { |
547 | 0 | (x_dd.hi * LOG2INV) |
548 | 0 | .cpu_round_ties_even() |
549 | 0 | .to_int_unchecked::<i32>() |
550 | | }; |
551 | | |
552 | | const LOG2_H: f64 = f64::from_bits(0x3fe62e42fefa39ef); |
553 | | /* we approximate LOG2Lacc ~ log(2) - LOG2H with 38 bits, so that |
554 | | k*LOG2Lacc is exact (k has at most 11 bits) */ |
555 | | const LOG2_L: f64 = f64::from_bits(0x3c7abc9e3b398000); |
556 | | const LOG2_TINY: f64 = f64::from_bits(0x398f97b57a079a19); |
557 | 0 | let yh = dd_fmla(-k as f64, LOG2_H, x_dd.hi); |
558 | | /* since |xh+xl| >= 2.92 we have |k| >= 4; |
559 | | (|k|-1/2)*log(2) <= |x| <= (|k|+1/2)*log(2) thus |
560 | | 1-1/(2|k|) <= |x/(k*log(2))| <= 1+1/(2|k|) thus by Sterbenz theorem |
561 | | yh is exact too */ |
562 | 0 | let mut t = DoubleDouble::from_full_exact_add(-k as f64 * LOG2_L, x_dd.lo); |
563 | 0 | let mut y_dd = DoubleDouble::from_exact_add(yh, t.hi); |
564 | 0 | y_dd.lo = dd_fmla(-k as f64, LOG2_TINY, y_dd.lo + t.lo); |
565 | | /* now yh+yl approximates xh + xl - k*log(2), and we approximate p(yh+yl) |
566 | | in h + l */ |
567 | | /* Since |xh| <= 742, we assume |xl| <= ulp(742) = 2^-43. Then since |
568 | | |k| <= round(742/log(2)) = 1070, |yl| <= 1070*LOG2L + 2^-42 < 2^-42.7. |
569 | | Since |yh| <= log(2)/2, the contribution of yl is negligible as long |
570 | | as |i*p[i]*yh^(i-1)*yl| < 2^-104, which holds for i >= 16. |
571 | | Thus for coefficients of degree 16 or more, we don't take yl into account. |
572 | | */ |
573 | 0 | let mut h = f64::from_bits(E2[19 + 8]); // degree 19 |
574 | 0 | for a in (16..=18).rev() { |
575 | 0 | h = dd_fmla(h, y_dd.hi, f64::from_bits(E2[a + 8])); // degree i |
576 | 0 | } |
577 | | /* degree 15: h*(yh+yl)+E2[15 + 8] */ |
578 | 0 | t = DoubleDouble::from_exact_mult(h, y_dd.hi); |
579 | 0 | t.lo = dd_fmla(h, y_dd.lo, t.lo); |
580 | 0 | let mut v = DoubleDouble::from_exact_add(f64::from_bits(E2[15 + 8]), t.hi); |
581 | 0 | v.lo += t.lo; |
582 | 0 | for a in (8..=14).rev() { |
583 | 0 | /* degree i: (h+l)*(yh+yl)+E2[i+8] */ |
584 | 0 | t = DoubleDouble::quick_mult(v, y_dd); |
585 | 0 | v = DoubleDouble::from_exact_add(f64::from_bits(E2[a + 8]), t.hi); |
586 | 0 | v.lo += t.lo; |
587 | 0 | } |
588 | 0 | for a in (0..=7).rev() { |
589 | 0 | /* degree i: (h+l)*(yh+yl)+E2[2i]+E2[2i+1] */ |
590 | 0 | t = DoubleDouble::quick_mult(v, y_dd); |
591 | 0 | v = DoubleDouble::from_exact_add(f64::from_bits(E2[2 * a]), t.hi); |
592 | 0 | v.lo += t.lo + f64::from_bits(E2[2 * a + 1]); |
593 | 0 | } |
594 | | |
595 | 0 | Exp { e: k, result: v } |
596 | 0 | } |
597 | | |
598 | | #[cold] |
599 | 0 | fn erfc_asympt_accurate(x: f64) -> f64 { |
600 | | /* subnormal exceptions */ |
601 | 0 | if x == f64::from_bits(0x403a8f7bfbd15495) { |
602 | 0 | return dd_fmla( |
603 | 0 | f64::from_bits(0x0000000000000001), |
604 | | -0.25, |
605 | 0 | f64::from_bits(0x000667bd620fd95b), |
606 | | ); |
607 | 0 | } |
608 | 0 | let u_dd = DoubleDouble::from_exact_mult(x, x); |
609 | 0 | let exp_result = exp_accurate(DoubleDouble::new(-u_dd.lo, -u_dd.hi)); |
610 | | |
611 | | /* compute 1/x as double-double */ |
612 | 0 | let yh = 1.0 / x; |
613 | | /* Newton's iteration for 1/x is y -> y + y*(1-x*y) */ |
614 | 0 | let yl = yh * dd_fmla(-x, yh, 1.0); |
615 | | // yh+yl approximates 1/x |
616 | | static THRESHOLD: [u64; 10] = [ |
617 | | 0x3fb4500000000000, |
618 | | 0x3fbe000000000000, |
619 | | 0x3fc3f00000000000, |
620 | | 0x3fc9500000000000, |
621 | | 0x3fcf500000000000, |
622 | | 0x3fd3100000000000, |
623 | | 0x3fd7100000000000, |
624 | | 0x3fdbc00000000000, |
625 | | 0x3fe0b00000000000, |
626 | | 0x3fe3000000000000, |
627 | | ]; |
628 | 0 | let mut i = 0usize; |
629 | 0 | while i < THRESHOLD.len() && yh > f64::from_bits(THRESHOLD[i]) { |
630 | 0 | i += 1; |
631 | 0 | } |
632 | 0 | let p = ASYMPTOTIC_POLY_ACCURATE[i]; |
633 | 0 | let mut u_dd = DoubleDouble::from_exact_mult(yh, yh); |
634 | 0 | u_dd.lo = dd_fmla(2.0 * yh, yl, u_dd.lo); |
635 | | /* the polynomial p has degree 29+2i, and its coefficient of largest |
636 | | degree is p[14+6+i] */ |
637 | 0 | let mut z_dd = DoubleDouble::new(0., f64::from_bits(p[14 + 6 + i])); |
638 | 0 | for a in (13..=27 + 2 * i).rev().step_by(2) { |
639 | 0 | /* degree j: (zh+zl)*(uh+ul)+p[(j-1)/2+6]] */ |
640 | 0 | let v = DoubleDouble::quick_mult(z_dd, u_dd); |
641 | 0 | z_dd = DoubleDouble::from_full_exact_add(f64::from_bits(p[(a - 1) / 2 + 6]), v.hi); |
642 | 0 | z_dd.lo += v.lo; |
643 | 0 | } |
644 | 0 | for a in (1..=11).rev().step_by(2) { |
645 | 0 | let v = DoubleDouble::quick_mult(z_dd, u_dd); |
646 | 0 | z_dd = DoubleDouble::from_full_exact_add(f64::from_bits(p[a - 1]), v.hi); |
647 | 0 | z_dd.lo += v.lo + f64::from_bits(p[a]); |
648 | 0 | } |
649 | | |
650 | | /* multiply by yh+yl */ |
651 | 0 | u_dd = DoubleDouble::quick_mult(z_dd, DoubleDouble::new(yl, yh)); |
652 | | /* now uh+ul approximates p(1/x), i.e., erfc(x)*exp(x^2) */ |
653 | | /* now multiply (uh+ul)*(eh+el), after normalizing uh+ul to reduce the |
654 | | number of exceptional cases */ |
655 | 0 | u_dd = DoubleDouble::from_exact_add(u_dd.hi, u_dd.lo); |
656 | 0 | let v = DoubleDouble::quick_mult(u_dd, exp_result.result); |
657 | | /* multiply by 2^e */ |
658 | | /* multiply by 2^e */ |
659 | 0 | let mut res = ldexp(v.to_f64(), exp_result.e); |
660 | 0 | if res < f64::from_bits(0x0010000000000000) { |
661 | 0 | /* for erfc(x) in the subnormal range, we have to perform a special |
662 | 0 | rounding */ |
663 | 0 | let mut corr = v.hi - ldexp(res, -exp_result.e); |
664 | 0 | corr += v.lo; |
665 | 0 | /* add corr*2^e */ |
666 | 0 | res += ldexp(corr, exp_result.e); |
667 | 0 | } |
668 | 0 | res |
669 | 0 | } |
670 | | |
671 | | #[cold] |
672 | 0 | fn erfc_accurate(x: f64) -> f64 { |
673 | 0 | if x < 0. { |
674 | 0 | let mut v_dd = erf_accurate(-x); |
675 | 0 | let t = DoubleDouble::from_exact_add(1.0, v_dd.hi); |
676 | 0 | v_dd.hi = t.hi; |
677 | 0 | v_dd.lo += t.lo; |
678 | 0 | return v_dd.to_f64(); |
679 | 0 | } else if x <= f64::from_bits(0x3ffb59ffb450828c) { |
680 | | // erfc(x) >= 2^-6 |
681 | 0 | let mut v_dd = erf_accurate(x); |
682 | 0 | let t = DoubleDouble::from_exact_add(1.0, -v_dd.hi); |
683 | 0 | v_dd.hi = t.hi; |
684 | 0 | v_dd.lo = t.lo - v_dd.lo; |
685 | 0 | return v_dd.to_f64(); |
686 | 0 | } |
687 | | // now 0x1.b59ffb450828cp+0 < x < 0x1.b39dc41e48bfdp+4 |
688 | 0 | erfc_asympt_accurate(x) |
689 | 0 | } |
690 | | |
691 | | /* Fast path for 0x1.713786d9c7c09p+1 < x < 0x1.b39dc41e48bfdp+4, |
692 | | using the asymptotic formula erfc(x) = exp(-x^2) * p(1/x)*/ |
693 | 0 | fn erfc_asympt_fast(x: f64) -> Erf { |
694 | | /* for x >= 0x1.9db1bb14e15cap+4, erfc(x) < 2^-970, and we might encounter |
695 | | underflow issues in the computation of l, thus we delegate this case |
696 | | to the accurate path */ |
697 | 0 | if x >= f64::from_bits(0x4039db1bb14e15ca) { |
698 | 0 | return Erf { |
699 | 0 | err: 1.0, |
700 | 0 | result: DoubleDouble::default(), |
701 | 0 | }; |
702 | 0 | } |
703 | | |
704 | 0 | let mut u = DoubleDouble::from_exact_mult(x, x); |
705 | 0 | let e_dd = exp_1(DoubleDouble::new(-u.lo, -u.hi)); |
706 | | |
707 | | /* the assumptions from exp_1 are satisfied: |
708 | | * a_mul ensures |ul| <= ulp(uh), thus |ul/uh| <= 2^-52 |
709 | | * since |x| < 0x1.9db1bb14e15cap+4 we have |
710 | | |ul| < ulp(0x1.9db1bb14e15cap+4^2) = 2^-43 */ |
711 | | /* eh+el approximates exp(-x^2) with maximal relative error 2^-74.139 */ |
712 | | |
713 | | /* compute 1/x as double-double */ |
714 | 0 | let yh = 1.0 / x; |
715 | | /* Assume 1 <= x < 2, then 0.5 <= yh <= 1, |
716 | | and yh = 1/x + eps with |eps| <= 2^-53. */ |
717 | | /* Newton's iteration for 1/x is y -> y + y*(1-x*y) */ |
718 | 0 | let yl = yh * dd_fmla(-x, yh, 1.0); |
719 | | /* x*yh-1 = x*(1/x+eps)-1 = x*eps |
720 | | with |x*eps| <= 2^-52, thus the error on the FMA is bounded by |
721 | | ulp(2^-52.1) = 2^-105. |
722 | | Now |yl| <= |yh| * 2^-52 <= 2^-52, thus the rounding error on |
723 | | yh * __builtin_fma (-x, yh, 1.0) is bounded also by ulp(2^-52.1) = 2^-105. |
724 | | From [6], Lemma 3.7, if yl was computed exactly, then yh+yl would differ |
725 | | from 1/x by at most yh^2/theta^3*(1/x-yh)^2 for some theta in [yh,1/x] |
726 | | or [1/x,yh]. |
727 | | Since yh, 1/x <= 1, this gives eps^2 <= 2^-106. |
728 | | Adding the rounding errors, we have: |
729 | | |yh + yl - 1/x| <= 2^-105 + 2^-105 + 2^-106 < 2^-103.67. |
730 | | For the relative error, since |yh| >= 1/2, this gives: |
731 | | |yh + yl - 1/x| < 2^-102.67 * |yh+yl| |
732 | | */ |
733 | | const THRESHOLD: [u64; 6] = [ |
734 | | 0x3fbd500000000000, |
735 | | 0x3fc59da6ca291ba6, |
736 | | 0x3fcbc00000000000, |
737 | | 0x3fd0c00000000000, |
738 | | 0x3fd3800000000000, |
739 | | 0x3fd6300000000000, |
740 | | ]; |
741 | 0 | let mut i = 0usize; |
742 | 0 | while i < THRESHOLD.len() && yh > f64::from_bits(THRESHOLD[i]) { |
743 | 0 | i += 1; |
744 | 0 | } |
745 | 0 | let p = ASYMPTOTIC_POLY[i]; |
746 | 0 | u = DoubleDouble::from_exact_mult(yh, yh); |
747 | | /* Since |yh| <= 1, we have |uh| <= 1 and |ul| <= 2^-53. */ |
748 | 0 | u.lo = dd_fmla(2.0 * yh, yl, u.lo); |
749 | | /* uh+ul approximates (yh+yl)^2, with absolute error bounded by |
750 | | ulp(ul) + yl^2, where ulp(ul) is the maximal rounding error in |
751 | | the FMA, and yl^2 is the neglected term. |
752 | | Since |ul| <= 2^-53, ulp(ul) <= 2^-105, and since |yl| <= 2^-52, |
753 | | this yields |uh + ul - yh^2| <= 2^-105 + 2^-104 < 2^-103.41. |
754 | | For the relative error, since |(yh+yl)^2| >= 1/4: |
755 | | |uh + ul - yh^2| < 2^-101.41 * |uh+ul|. |
756 | | And relatively to 1/x^2: |
757 | | yh + yl = 1/x * (1 + eps1) with |eps1| < 2^-102.67 |
758 | | uh + ul = (yh+yl)^2 * (1 + eps2) with |eps2| < 2^-101.41 |
759 | | This yields: |
760 | | |uh + ul - 1/x| < 2^-100.90 * |uh+ul|. |
761 | | */ |
762 | | |
763 | | /* evaluate p(uh+ul) */ |
764 | 0 | let mut zh: f64 = f64::from_bits(p[12]); // degree 23 |
765 | 0 | zh = dd_fmla(zh, u.hi, f64::from_bits(p[11])); // degree 21 |
766 | 0 | zh = dd_fmla(zh, u.hi, f64::from_bits(p[10])); // degree 19 |
767 | | |
768 | | /* degree 17: zh*(uh+ul)+p[i] */ |
769 | 0 | let mut v = DoubleDouble::quick_f64_mult(zh, u); |
770 | 0 | let mut z_dd = DoubleDouble::from_exact_add(f64::from_bits(p[9]), v.hi); |
771 | 0 | z_dd.lo += v.lo; |
772 | | |
773 | 0 | for a in (3..=15).rev().step_by(2) { |
774 | 0 | v = DoubleDouble::quick_mult(z_dd, u); |
775 | 0 | z_dd = DoubleDouble::from_exact_add(f64::from_bits(p[((a + 1) / 2) as usize]), v.hi); |
776 | 0 | z_dd.lo += v.lo; |
777 | 0 | } |
778 | | |
779 | | /* degree 1: (zh+zl)*(uh+ul)+p[0]+p[1] */ |
780 | 0 | v = DoubleDouble::quick_mult(z_dd, u); |
781 | 0 | z_dd = DoubleDouble::from_exact_add(f64::from_bits(p[0]), v.hi); |
782 | 0 | z_dd.lo += v.lo + f64::from_bits(p[1]); |
783 | | /* multiply by yh+yl */ |
784 | 0 | u = DoubleDouble::quick_mult(z_dd, DoubleDouble::new(yl, yh)); |
785 | | /* now uh+ul approximates p(1/x) */ |
786 | | /* now multiply (uh+ul)*(eh+el) */ |
787 | 0 | v = DoubleDouble::quick_mult(u, e_dd); |
788 | | |
789 | | /* Write y = 1/x. We have the following errors: |
790 | | * the maximal mathematical error is: |
791 | | |erfc(x)*exp(x^2) - p(y)| < 2^-71.804 * |p(y)| (for i=3) thus |
792 | | |erfc(x) - exp(-x^2)*p(y)| < 2^-71.804 * |exp(-x^2)*p(y)| |
793 | | * the error in approximating exp(-x^2) by eh+el: |
794 | | |eh + el - exp(-x^2)| < 2^-74.139 * |eh + el| |
795 | | * the fact that we evaluate p on yh+yl instead of 1/x |
796 | | this error is bounded by |p'| * |yh+yl - 1/x|, where |
797 | | |yh+yl - 1/x| < 2^-102.67 * |yh+yl|, and the relative |
798 | | error is bounded by |p'/p| * |yh+yl - 1/x|. |
799 | | Since the maximal value of |p'/p| is bounded by 27.2 (for i=0), |
800 | | this yields 27.2 * 2^-102.67 < 2^-97.9 |
801 | | * the rounding errors when evaluating p on yh+yl: this error is bounded |
802 | | (relatively) by 2^-67.184 (for i=5), see analyze_erfc_asympt_fast() |
803 | | in erfc.sage |
804 | | * the rounding error in (uh+ul)*(eh+el): we assume this error is bounded |
805 | | by 2^-80 (relatively) |
806 | | This yields a global relative bound of: |
807 | | (1+2^-71.804)*(1+2^-74.139)*(1+2^-97.9)*(1+2^-67.184)*(1+2^-80)-1 |
808 | | < 2^-67.115 |
809 | | */ |
810 | 0 | if v.hi >= f64::from_bits(0x044151b9a3fdd5c9) { |
811 | 0 | Erf { |
812 | 0 | err: f64::from_bits(0x3bbd900000000000) * v.hi, |
813 | 0 | result: v, |
814 | 0 | } /* 2^-67.115 < 0x1.d9p-68 */ |
815 | | } else { |
816 | 0 | Erf { |
817 | 0 | result: v, |
818 | 0 | err: f64::from_bits(0x0010000000000000), |
819 | 0 | } // this overestimates 0x1.d9p-68 * h |
820 | | } |
821 | 0 | } |
822 | | |
823 | | #[inline] |
824 | 0 | fn erfc_fast(x: f64) -> Erf { |
825 | 0 | if x < 0. |
826 | | // erfc(x) = 1 - erf(x) = 1 + erf(-x) |
827 | | { |
828 | 0 | let res = erf_fast(-x); |
829 | | /* h+l approximates erf(-x), with relative error bounded by err, |
830 | | where err <= 0x1.78p-69 */ |
831 | 0 | let err = res.err * res.result.hi; /* convert into absolute error */ |
832 | 0 | let mut t = DoubleDouble::from_exact_add(1.0, res.result.hi); |
833 | 0 | t.lo += res.result.lo; |
834 | | // since h <= 2, the fast_two_sum() error is bounded by 2^-105*h <= 2^-104 |
835 | | /* After the fast_two_sum() call, we have |t| <= ulp(h) <= ulp(2) = 2^-51 |
836 | | thus assuming |l| <= 2^-51 after the cr_erf_fast() call, |
837 | | we have |t| <= 2^-50 here, thus the rounding |
838 | | error on t -= *l is bounded by ulp(2^-50) = 2^-102. |
839 | | The absolute error is thus bounded by err + 2^-104 + 2^-102 |
840 | | = err + 0x1.4p-102. |
841 | | The maximal value of err here is for |x| < 0.0625, where cr_erf_fast() |
842 | | returns 0x1.78p-69, and h=1/2, yielding err = 0x1.78p-70 here. |
843 | | Adding 0x1.4p-102 is thus exact. */ |
844 | 0 | return Erf { |
845 | 0 | err: err + f64::from_bits(0x3994000000000000), |
846 | 0 | result: t, |
847 | 0 | }; |
848 | 0 | } else if x <= f64::from_bits(0x400713786d9c7c09) { |
849 | 0 | let res = erf_fast(x); |
850 | | /* h+l approximates erf(x), with relative error bounded by err, |
851 | | where err <= 0x1.78p-69 */ |
852 | 0 | let err = res.err * res.result.hi; /* convert into absolute error */ |
853 | 0 | let mut t = DoubleDouble::from_exact_add(1.0, -res.result.hi); |
854 | 0 | t.lo -= res.result.lo; |
855 | | /* for x >= 0x1.e861fbb24c00ap-2, erf(x) >= 1/2, thus 1-h is exact |
856 | | by Sterbenz theorem, thus t = 0 in fast_two_sum(), and we have t = -l |
857 | | here, thus the absolute error is err */ |
858 | 0 | if x >= f64::from_bits(0x3fde861fbb24c00a) { |
859 | 0 | return Erf { err, result: t }; |
860 | 0 | } |
861 | | /* for x < 0x1.e861fbb24c00ap-2, the error in fast_two_sum() is bounded |
862 | | by 2^-105*h, and since h <= 1/2, this yields 2^-106. |
863 | | After the fast_two_sum() call, we have |t| <= ulp(h) <= ulp(1/2) = 2^-53 |
864 | | thus assuming |l| <= 2^-53 after the cr_erf_fast() call, |
865 | | we have |t| <= 2^-52 here, thus the rounding |
866 | | error on t -= *l is bounded by ulp(2^-52) = 2^-104. |
867 | | The absolute error is thus bounded by err + 2^-106 + 2^-104 |
868 | | The maximal value of err here is for x < 0.0625, where cr_erf_fast() |
869 | | returns 0x1.78p-69, and h=1/2, yielding err = 0x1.78p-70 here. |
870 | | Adding 0x1.4p-104 is thus exact. */ |
871 | 0 | return Erf { |
872 | 0 | err: err + f64::from_bits(0x3974000000000000), |
873 | 0 | result: t, |
874 | 0 | }; |
875 | 0 | } |
876 | | /* Now THRESHOLD1 < x < 0x1.b39dc41e48bfdp+4 thus erfc(x) < 0.000046. */ |
877 | | /* on a i7-8700 with gcc 12.2.0, for x in [THRESHOLD1,+5.0], |
878 | | the average reciprocal throughput is about 111 cycles |
879 | | (among which 20 cycles for exp_1) */ |
880 | 0 | erfc_asympt_fast(x) |
881 | 0 | } |
882 | | |
883 | | /// Complementary error function |
884 | | /// |
885 | | /// Max ulp 0.5 |
886 | 0 | pub fn f_erfc(x: f64) -> f64 { |
887 | 0 | let t: u64 = x.to_bits(); |
888 | 0 | let at: u64 = t & 0x7fff_ffff_ffff_ffff; |
889 | | |
890 | 0 | if t >= 0x8000000000000000u64 |
891 | | // x = -NaN or x <= 0 (excluding +0) |
892 | | { |
893 | | // for x <= -0x1.7744f8f74e94bp2, erfc(x) rounds to 2 (to nearest) |
894 | 0 | if t >= 0xc017744f8f74e94bu64 |
895 | | // x = NaN or x <= -0x1.7744f8f74e94bp2 |
896 | | { |
897 | 0 | if t >= 0xfff0000000000000u64 { |
898 | | // -Inf or NaN |
899 | 0 | if t == 0xfff0000000000000u64 { |
900 | 0 | return 2.0; |
901 | 0 | } // -Inf |
902 | 0 | return x + x; // NaN |
903 | 0 | } |
904 | 0 | return black_box(2.0) - black_box(f64::from_bits(0x3c90000000000000)); // rounds to 2 or below(2) |
905 | 0 | } |
906 | | |
907 | | // for -9.8390953768041405e-17 <= x <= 0, erfc(x) rounds to 1 (to nearest) |
908 | 0 | if f64::from_bits(0xbc9c5bf891b4ef6a) <= x { |
909 | 0 | return dd_fmla(-x, f64::from_bits(0x3c90000000000000), 1.0); |
910 | 0 | } |
911 | | } else |
912 | | // x = +NaN or x >= 0 (excluding -0) |
913 | | { |
914 | | // for x >= 0x1.b39dc41e48bfdp+4, erfc(x) < 2^-1075: rounds to 0 or 2^-1074 |
915 | 0 | if at >= 0x403b39dc41e48bfdu64 |
916 | | // x = NaN or x >= 0x1.b39dc41e48bfdp+4 |
917 | | { |
918 | 0 | if at >= 0x7ff0000000000000u64 { |
919 | | // +Inf or NaN |
920 | 0 | if at == 0x7ff0000000000000u64 { |
921 | 0 | return 0.0; |
922 | 0 | } // +Inf |
923 | 0 | return x + x; // NaN |
924 | 0 | } |
925 | 0 | return black_box(f64::from_bits(0x0000000000000001)) * black_box(0.25); // 0 or 2^-1074 wrt rounding |
926 | 0 | } |
927 | | |
928 | | // for 0 <= x <= 0x1.c5bf891b4ef6ap-55, erfc(x) rounds to 1 (to nearest) |
929 | 0 | if x <= f64::from_bits(0x3c8c5bf891b4ef6a) { |
930 | 0 | return dd_fmla(-x, f64::from_bits(0x3c90000000000000), 1.0); |
931 | 0 | } |
932 | | } |
933 | | |
934 | | /* now -0x1.7744f8f74e94bp+2 < x < -0x1.c5bf891b4ef6ap-54 |
935 | | or 0x1.c5bf891b4ef6ap-55 < x < 0x1.b39dc41e48bfdp+4 */ |
936 | | |
937 | 0 | let result = erfc_fast(x); |
938 | | |
939 | 0 | let left = result.result.hi + (result.result.lo - result.err); |
940 | 0 | let right = result.result.hi + (result.result.lo + result.err); |
941 | 0 | if left == right { |
942 | 0 | return left; |
943 | 0 | } |
944 | 0 | erfc_accurate(x) |
945 | 0 | } |
946 | | |
947 | | #[cfg(test)] |
948 | | mod tests { |
949 | | use super::*; |
950 | | #[test] |
951 | | fn test_erfc() { |
952 | | assert_eq!(f_erfc(1.0), 0.15729920705028513); |
953 | | assert_eq!(f_erfc(0.5), 0.4795001221869535); |
954 | | assert_eq!(f_erfc(0.000000005), 0.9999999943581042); |
955 | | assert_eq!(f_erfc(-0.00000000000065465465423305), 1.0000000000007387); |
956 | | assert!(f_erfc(f64::NAN).is_nan()); |
957 | | assert_eq!(f_erfc(f64::INFINITY), 0.0); |
958 | | assert_eq!(f_erfc(f64::NEG_INFINITY), 2.0); |
959 | | } |
960 | | } |