/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/err/erff.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::f_fmla; |
30 | | |
31 | | // Polynomials approximating erf(x)/x on ( k/8, (k + 1)/8 ) generated by Sollya |
32 | | // with: |
33 | | // > P = fpminimax(erf(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|D...|], |
34 | | // [k/8, (k + 1)/8]); |
35 | | // for k = 0..31. |
36 | | static COEFFS: [[u64; 8]; 32] = [ |
37 | | [ |
38 | | 0x3ff20dd750429b6d, |
39 | | 0xbfd812746b037753, |
40 | | 0x3fbce2f219e8596a, |
41 | | 0xbf9b82cdacb78fda, |
42 | | 0x3f756479297dfda5, |
43 | | 0xbf48b3ac5455ef02, |
44 | | 0xbf7126fcac367e3b, |
45 | | 0x3fb2d0bdb3ba4984, |
46 | | ], |
47 | | [ |
48 | | 0x3ff20dd750429b6d, |
49 | | 0xbfd812746b0379a8, |
50 | | 0x3fbce2f21a03cf2a, |
51 | | 0xbf9b82ce30de083e, |
52 | | 0x3f7565bcad3eb60f, |
53 | | 0xbf4c02c66f659256, |
54 | | 0x3f1f92f673385229, |
55 | | 0xbeedef402648ae90, |
56 | | ], |
57 | | [ |
58 | | 0x3ff20dd750429b34, |
59 | | 0xbfd812746b032dce, |
60 | | 0x3fbce2f219d84aae, |
61 | | 0xbf9b82ce22dcf139, |
62 | | 0x3f7565b9efcd4af1, |
63 | | 0xbf4c021f1af414bc, |
64 | | 0x3f1f7c6d177eff82, |
65 | | 0xbeec9e4410dcf865, |
66 | | ], |
67 | | [ |
68 | | 0x3ff20dd750426eab, |
69 | | 0xbfd812746ae592c7, |
70 | | 0x3fbce2f211525f14, |
71 | | 0xbf9b82ccc125e63f, |
72 | | 0x3f756596f261cfd3, |
73 | | 0xbf4bfde1ff8eeecf, |
74 | | 0x3f1f31a9d15dc5d8, |
75 | | 0xbeea5a4362844b3c, |
76 | | ], |
77 | | [ |
78 | | 0x3ff20dd75039c705, |
79 | | 0xbfd812746777e74d, |
80 | | 0x3fbce2f17af98a1b, |
81 | | 0xbf9b82be4b817cbe, |
82 | | 0x3f7564bec2e2962e, |
83 | | 0xbf4bee86f9da3558, |
84 | | 0x3f1e9443689dc0cc, |
85 | | 0xbee79c0f230805d8, |
86 | | ], |
87 | | [ |
88 | | 0x3ff20dd74f811211, |
89 | | 0xbfd81274371a3e8f, |
90 | | 0x3fbce2ec038262e5, |
91 | | 0xbf9b8265b82c5e1f, |
92 | | 0x3f75615a2e239267, |
93 | | 0xbf4bc63ae023dceb, |
94 | | 0x3f1d87c2102f7e06, |
95 | | 0xbee49584bea41d62, |
96 | | ], |
97 | | [ |
98 | | 0x3ff20dd746d063e3, |
99 | | 0xbfd812729a8a950f, |
100 | | 0x3fbce2cb0a2df232, |
101 | | 0xbf9b80eca1f51278, |
102 | | 0x3f75572e26c46815, |
103 | | 0xbf4b715e5638b65e, |
104 | | 0x3f1bfbb195484968, |
105 | | 0xbee177a565c15c52, |
106 | | ], |
107 | | [ |
108 | | 0x3ff20dd701b44486, |
109 | | 0xbfd812691145f237, |
110 | | 0x3fbce23a06b8cfd9, |
111 | | 0xbf9b7c1dc7245288, |
112 | | 0x3f753e92f7f397dd, |
113 | | 0xbf4ad97cc4acf0b2, |
114 | | 0x3f19f028b2b09b71, |
115 | | 0xbedcdc4da08da8c1, |
116 | | ], |
117 | | [ |
118 | | 0x3ff20dd5715ac332, |
119 | | 0xbfd8123e680bd0eb, |
120 | | 0x3fbce0457aded691, |
121 | | 0xbf9b6f52d52bed40, |
122 | | 0x3f750c291b84414c, |
123 | | 0xbf49ea246b1ad4a9, |
124 | | 0x3f177654674e0ca0, |
125 | | 0xbed737c11a1bcebb, |
126 | | ], |
127 | | [ |
128 | | 0x3ff20dce6593e114, |
129 | | 0xbfd811a59c02eadc, |
130 | | 0x3fbcdab53c7cd7d5, |
131 | | 0xbf9b526d2e321eed, |
132 | | 0x3f74b1d32cd8b994, |
133 | | 0xbf48963143ec0a1e, |
134 | | 0x3f14ad5700e4db91, |
135 | | 0xbed231e100e43ef2, |
136 | | ], |
137 | | [ |
138 | | 0x3ff20db48bfd5a62, |
139 | | 0xbfd80fdd84f9e308, |
140 | | 0x3fbccd340d462983, |
141 | | 0xbf9b196a29287680, |
142 | | 0x3f74210c2c13a0f7, |
143 | | 0xbf46dbdfb4ff71ae, |
144 | | 0x3f11bca2d17fbd71, |
145 | | 0xbecbca36f90c7cf5, |
146 | | ], |
147 | | [ |
148 | | 0x3ff20d64b2f8f508, |
149 | | 0xbfd80b4d4f19fa8b, |
150 | | 0x3fbcb088197262e3, |
151 | | 0xbf9ab51fd02e5b99, |
152 | | 0x3f734e1e5e81a632, |
153 | | 0xbf44c66377b502ce, |
154 | | 0x3f0d9ad25066213c, |
155 | | 0xbec4b0df7dd0cfa1, |
156 | | ], |
157 | | [ |
158 | | 0x3ff20c8fc1243576, |
159 | | 0xbfd8010cb2009e27, |
160 | | 0x3fbc7a47e9299315, |
161 | | 0xbf9a155be5683654, |
162 | | 0x3f7233502694997b, |
163 | | 0xbf426c94b7d81300, |
164 | | 0x3f08094f1de25fb9, |
165 | | 0xbebe0e3d776c6eef, |
166 | | ], |
167 | | [ |
168 | | 0x3ff20a9bd1611bc1, |
169 | | 0xbfd7ec7fbce83f90, |
170 | | 0x3fbc1d757d7317b7, |
171 | | 0xbf992c160cd589f0, |
172 | | 0x3f70d307269cc5c2, |
173 | | 0xbf3fda5b0d2d1879, |
174 | | 0x3f02fdd7b3b14a7f, |
175 | | 0xbeb54eed4a26af5a, |
176 | | ], |
177 | | [ |
178 | | 0x3ff20682834f943d, |
179 | | 0xbfd7c73f747bf5a9, |
180 | | 0x3fbb8c2db4a9ffd1, |
181 | | 0xbf97f0e4ffe989ec, |
182 | | 0x3f6e7061eae4166e, |
183 | | 0xbf3ad36e873fff2d, |
184 | | 0x3efd39222396128e, |
185 | | 0xbead83dacec5ea6b, |
186 | | ], |
187 | | [ |
188 | | 0x3ff1feb8d12676d7, |
189 | | 0xbfd7898347284afe, |
190 | | 0x3fbaba3466b34451, |
191 | | 0xbf9663adc573e2f9, |
192 | | 0x3f6ae99fb17c3e08, |
193 | | 0xbf3602f950ad5535, |
194 | | 0x3ef5e9717490609d, |
195 | | 0xbea3fca107bbc8d5, |
196 | | ], |
197 | | [ |
198 | | 0x3ff1f12fe3c536fa, |
199 | | 0xbfd72b1d1f22e6d3, |
200 | | 0x3fb99fc0eed4a896, |
201 | | 0xbf948db0a87bd8c6, |
202 | | 0x3f673e368895aa61, |
203 | | 0xbf319b35d5301fc8, |
204 | | 0x3ef007987e4bb033, |
205 | | 0xbe9a7edcd4c2dc70, |
206 | | ], |
207 | | [ |
208 | | 0x3ff1db7b0df84d5d, |
209 | | 0xbfd6a4e4a41cde02, |
210 | | 0x3fb83bbded16455d, |
211 | | 0xbf92809b3b36977e, |
212 | | 0x3f639c08bab44679, |
213 | | 0xbf2b7b45a70ed119, |
214 | | 0x3ee6e99b36410e7b, |
215 | | 0xbe913619bb7ebc0c, |
216 | | ], |
217 | | [ |
218 | | 0x3ff1bb1c85c4a527, |
219 | | 0xbfd5f23b99a249a3, |
220 | | 0x3fb694c91fa0d12c, |
221 | | 0xbf9053e1ce11c72d, |
222 | | 0x3f602bf72c50ea78, |
223 | | 0xbf24f478fb56cb02, |
224 | | 0x3ee005f80ecbe213, |
225 | | 0xbe85f2446bde7f5b, |
226 | | ], |
227 | | [ |
228 | | 0x3ff18dec3bd51f9d, |
229 | | 0xbfd5123f58346186, |
230 | | 0x3fb4b8a1ca536ab4, |
231 | | 0xbf8c4243015cc723, |
232 | | 0x3f5a1a8a01d351ef, |
233 | | 0xbf1f466b34f1d86b, |
234 | | 0x3ed5f835eea0bf6a, |
235 | | 0xbe7b83165b939234, |
236 | | ], |
237 | | [ |
238 | | 0x3ff152804c3369f4, |
239 | | 0xbfd4084cd4afd4bc, |
240 | | 0x3fb2ba2e836e47aa, |
241 | | 0xbf8800f2dfc6904b, |
242 | | 0x3f54a6daf0669c59, |
243 | | 0xbf16e326ab872317, |
244 | | 0x3ecd9761a6a755a5, |
245 | | 0xbe70fca33f9dd4b5, |
246 | | ], |
247 | | [ |
248 | | 0x3ff1087ad68356aa, |
249 | | 0xbfd2dbb044707459, |
250 | | 0x3fb0aea8ceaa0384, |
251 | | 0xbf840b516d52b3d2, |
252 | | 0x3f500c9e05f01d22, |
253 | | 0xbf1076afb0dc0ff7, |
254 | | 0x3ec39fadec400657, |
255 | | 0xbe64b5761352e7e3, |
256 | | ], |
257 | | [ |
258 | | 0x3ff0b0a7a8ba4a22, |
259 | | 0xbfd196990d22d4a1, |
260 | | 0x3fad5551e6ac0c4d, |
261 | | 0xbf807cce1770bd1a, |
262 | | 0x3f4890347b8848bf, |
263 | | 0xbf0757ec96750b6a, |
264 | | 0x3eb9b258a1e06bce, |
265 | | 0xbe58fc6d22da7572, |
266 | | ], |
267 | | [ |
268 | | 0x3ff04ce2be70fb47, |
269 | | 0xbfd0449e4b0b9cac, |
270 | | 0x3fa97f7424f4b0e7, |
271 | | 0xbf7ac825439c42f4, |
272 | | 0x3f428f5f65426dfb, |
273 | | 0xbf005b699a90f90f, |
274 | | 0x3eb0a888eecf4593, |
275 | | 0xbe4deace2b32bb31, |
276 | | ], |
277 | | [ |
278 | | 0x3fefbf9fb0e11cc8, |
279 | | 0xbfcde2640856545a, |
280 | | 0x3fa5f5b1f47f8510, |
281 | | 0xbf7588bc71eb41b9, |
282 | | 0x3f3bc6a0a772f56d, |
283 | | 0xbef6b9fad1f1657a, |
284 | | 0x3ea573204ba66504, |
285 | | 0xbe41d38065c94e44, |
286 | | ], |
287 | | [ |
288 | | 0x3feed8f18c99e031, |
289 | | 0xbfcb4cb6acd903b4, |
290 | | 0x3fa2c7f3dddd6fc1, |
291 | | 0xbf713052067df4e0, |
292 | | 0x3f34a5027444082f, |
293 | | 0xbeef672bab0e2554, |
294 | | 0x3e9b83c756348cc9, |
295 | | 0xbe3534f1a1079499, |
296 | | ], |
297 | | [ |
298 | | 0x3fedebd33044166d, |
299 | | 0xbfc8d7cd9053f7d8, |
300 | | 0x3f9ff9957fb3d6e7, |
301 | | 0xbf6b50be55de0f36, |
302 | | 0x3f2e92c8ec53a628, |
303 | | 0xbee5a4b88d508007, |
304 | | 0x3e91a27737559e26, |
305 | | 0xbe2942ae62cb2c14, |
306 | | ], |
307 | | [ |
308 | | 0x3fecfdbf0386f3bd, |
309 | | 0xbfc68e33d93b0dc4, |
310 | | 0x3f9b2683d58f53de, |
311 | | 0xbf65a9174e70d26f, |
312 | | 0x3f269ddd326d49cd, |
313 | | 0xbeddd8f397a8219c, |
314 | | 0x3e86a755016ad4dd, |
315 | | 0xbe1e366e0139187d, |
316 | | ], |
317 | | [ |
318 | | 0x3fec132adb8d7464, |
319 | | 0xbfc475a899f61b46, |
320 | | 0x3f970a431397a77c, |
321 | | 0xbf612e3d35beeee2, |
322 | | 0x3f20c16b05738333, |
323 | | 0xbed4a47f873e144e, |
324 | | 0x3e7d3d494c698c02, |
325 | | 0xbe12302c59547fe5, |
326 | | ], |
327 | | [ |
328 | | 0x3feb2f5fd05555e7, |
329 | | 0xbfc28feefbe03ec7, |
330 | | 0x3f93923acbb3a676, |
331 | | 0xbf5b4ff793cd6358, |
332 | | 0x3f18ea0eb8c913bc, |
333 | | 0xbeccb31ec2baceb1, |
334 | | 0x3e730011e7e80c04, |
335 | | 0xbe0617710635cb1d, |
336 | | ], |
337 | | [ |
338 | | 0x3fea54853cd9593e, |
339 | | 0xbfc0dbdbaea4dc8e, |
340 | | 0x3f90a93e2c20a0fd, |
341 | | 0xbf55c969ff401ea8, |
342 | | 0x3f129e0cc64fe627, |
343 | | 0xbec4160d8e9d3c2a, |
344 | | 0x3e68e7b67594624a, |
345 | | 0xbdfb1cf2c975b09b, |
346 | | ], |
347 | | [ |
348 | | 0x3fe983ceece09ff8, |
349 | | 0xbfbeacc78f7a2d00, |
350 | | 0x3f8c74418410655f, |
351 | | 0xbf51756a050e441e, |
352 | | 0x3f0bff3650f7f548, |
353 | | 0xbebc56c0217d3ada, |
354 | | 0x3e607b4918d0b489, |
355 | | 0xbdf0d4be8c1c50f8, |
356 | | ], |
357 | | ]; |
358 | | |
359 | | trait ErffBackend { |
360 | | fn fma(&self, x: f64, y: f64, z: f64) -> f64; |
361 | | } |
362 | | |
363 | | struct GenErffBackend {} |
364 | | |
365 | | impl ErffBackend for GenErffBackend { |
366 | | #[inline(always)] |
367 | 0 | fn fma(&self, x: f64, y: f64, z: f64) -> f64 { |
368 | 0 | f_fmla(x, y, z) |
369 | 0 | } |
370 | | } |
371 | | |
372 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
373 | | struct FmaErffBackend {} |
374 | | |
375 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
376 | | impl ErffBackend for FmaErffBackend { |
377 | | #[inline(always)] |
378 | 0 | fn fma(&self, x: f64, y: f64, z: f64) -> f64 { |
379 | 0 | f64::mul_add(x, y, z) |
380 | 0 | } |
381 | | } |
382 | | |
383 | | #[inline(always)] |
384 | 0 | fn erff_gen<B: ErffBackend>(x: f32, backend: B) -> f32 { |
385 | 0 | let x_u = x.to_bits(); |
386 | 0 | let x_abs = x_u & 0x7fff_ffffu32; |
387 | | |
388 | 0 | if x_abs >= 0x4080_0000u32 { |
389 | | static ONE: [f32; 2] = [1.0, -1.0]; |
390 | | static SMALL: [f32; 2] = [f32::from_bits(0xb3000000), f32::from_bits(0x33000000)]; |
391 | | |
392 | 0 | let sign = x.is_sign_negative() as usize; |
393 | | |
394 | 0 | if x_abs >= 0x7f80_0000u32 { |
395 | 0 | return if x_abs > 0x7f80_0000 { x } else { ONE[sign] }; |
396 | 0 | } |
397 | | |
398 | 0 | return ONE[sign] + SMALL[sign]; |
399 | 0 | } |
400 | | |
401 | | // Polynomial approximation: |
402 | | // erf(x) ~ x * (c0 + c1 * x^2 + c2 * x^4 + ... + c7 * x^14) |
403 | 0 | let xd = x as f64; |
404 | 0 | let xsq = xd * xd; |
405 | | |
406 | | const EIGHT: u32 = 3 << 23; |
407 | 0 | let idx = unsafe { f32::from_bits(x_abs.wrapping_add(EIGHT)).to_int_unchecked::<usize>() }; |
408 | | |
409 | 0 | let c = COEFFS[idx]; |
410 | | |
411 | 0 | let x4 = xsq * xsq; |
412 | 0 | let c0 = backend.fma(xsq, f64::from_bits(c[1]), f64::from_bits(c[0])); |
413 | 0 | let c1 = backend.fma(xsq, f64::from_bits(c[3]), f64::from_bits(c[2])); |
414 | 0 | let c2 = backend.fma(xsq, f64::from_bits(c[5]), f64::from_bits(c[4])); |
415 | 0 | let c3 = backend.fma(xsq, f64::from_bits(c[7]), f64::from_bits(c[6])); |
416 | | |
417 | 0 | let x8 = x4 * x4; |
418 | 0 | let p0 = backend.fma(x4, c1, c0); |
419 | 0 | let p1 = backend.fma(x4, c3, c2); |
420 | | |
421 | 0 | (xd * backend.fma(x8, p1, p0)) as f32 |
422 | 0 | } Unexecuted instantiation: pxfm::err::erff::erff_gen::<pxfm::err::erff::FmaErffBackend> Unexecuted instantiation: pxfm::err::erff::erff_gen::<pxfm::err::erff::GenErffBackend> |
423 | | |
424 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
425 | | #[target_feature(enable = "avx", enable = "fma")] |
426 | 0 | unsafe fn erff_fma_impl(x: f32) -> f32 { |
427 | 0 | erff_gen(x, FmaErffBackend {}) |
428 | 0 | } |
429 | | |
430 | | /// Error function |
431 | | /// |
432 | | /// Max ulp 0.5 |
433 | | #[inline] |
434 | 0 | pub fn f_erff(x: f32) -> f32 { |
435 | | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] |
436 | | { |
437 | | crate::err::erff::erff_gen(x, GenErffBackend {}) |
438 | | } |
439 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
440 | | { |
441 | | use std::sync::OnceLock; |
442 | | static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new(); |
443 | 0 | let q = EXECUTOR.get_or_init(|| { |
444 | 0 | if std::arch::is_x86_feature_detected!("avx") |
445 | 0 | && std::arch::is_x86_feature_detected!("fma") |
446 | | { |
447 | 0 | erff_fma_impl |
448 | | } else { |
449 | 0 | fn def_erff(x: f32) -> f32 { |
450 | 0 | erff_gen(x, GenErffBackend {}) |
451 | 0 | } |
452 | 0 | def_erff |
453 | | } |
454 | 0 | }); |
455 | 0 | unsafe { q(x) } |
456 | | } |
457 | 0 | } |
458 | | |
459 | | #[cfg(test)] |
460 | | mod tests { |
461 | | use super::*; |
462 | | |
463 | | #[test] |
464 | | fn f_erff_test() { |
465 | | assert_eq!(f_erff(0.0), 0.0); |
466 | | assert_eq!(f_erff(1.0), 0.8427008); |
467 | | assert_eq!(f_erff(0.5), 0.5204999); |
468 | | assert_eq!(f_erff(f32::INFINITY), 1.0); |
469 | | assert_eq!(f_erff(f32::NEG_INFINITY), -1.0); |
470 | | assert!(f_erff(f32::NAN).is_nan()); |
471 | | } |
472 | | } |