/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/err/rerf.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::f_fmla; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::err::rerf_poly::RERF_HARD; |
32 | | use crate::polyeval::f_polyeval7; |
33 | | |
34 | | #[cold] |
35 | | #[inline(never)] |
36 | 0 | fn rerf_poly_tiny_hard(x: f64, z2: DoubleDouble) -> f64 { |
37 | | // Polynomial for x/erf(x) |
38 | | // Generated by Sollya. |
39 | | // d = [0, 1/16]; |
40 | | // f = x/erf(x); |
41 | | // Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18|], [|107...|], d); |
42 | | // See ./notes/r_erf_tiny_hard.sollya |
43 | | const C: [(u64, u64); 10] = [ |
44 | | (0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b), |
45 | | (0xbc6d7696fe4a7cd0, 0x3fd2e7fb0bcdf4f2), |
46 | | (0xbc0cb8b926064434, 0x3f842aa561ecc102), |
47 | | (0x3c1cd94c2f3e6f09, 0xbf75207c7ef80727), |
48 | | (0xbbb35c4effe3c87c, 0x3f2db4a8d7c32472), |
49 | | (0x3bbf1d1edd1e109a, 0x3f20faa7a99a4d3d), |
50 | | (0xbb9e05d21f4e1755, 0xbef3adb84631c39c), |
51 | | (0x3b6ee5dc31565280, 0xbec366647cacdcc9), |
52 | | (0x3b3698f8162c5fac, 0x3eaabb9db9f3b048), |
53 | | (0xbb026f5401fce891, 0xbe66cd40349520b6), |
54 | | ]; |
55 | 0 | let mut p = DoubleDouble::mul_add( |
56 | 0 | z2, |
57 | 0 | DoubleDouble::from_bit_pair(C[9]), |
58 | 0 | DoubleDouble::from_bit_pair(C[8]), |
59 | | ); |
60 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[7])); |
61 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[6])); |
62 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[5])); |
63 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[4])); |
64 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[3])); |
65 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[2])); |
66 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[1])); |
67 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[0])); |
68 | 0 | p = DoubleDouble::from_exact_add(p.hi, p.lo); |
69 | 0 | let z = DoubleDouble::div_dd_f64(p, x); |
70 | 0 | z.to_f64() |
71 | 0 | } |
72 | | |
73 | | #[inline] |
74 | 0 | fn rerf_poly_tiny(z: f64, x: f64) -> f64 { |
75 | 0 | let z2 = DoubleDouble::from_exact_mult(z, z); |
76 | | |
77 | | // Polynomial for x/erf(x) |
78 | | // Generated by Sollya. |
79 | | // d = [0, 1/16]; |
80 | | // f = x/erf(x); |
81 | | // Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18|], [|107, 107, 107, D...|], d); |
82 | | // See ./notes/r_erf_tiny.sollya |
83 | 0 | let p = f_polyeval7( |
84 | 0 | z2.hi, |
85 | 0 | f64::from_bits(0xbf75207c7ef80727), |
86 | 0 | f64::from_bits(0x3f2db4a8d7c36a03), |
87 | 0 | f64::from_bits(0x3f20faa7a8db7f27), |
88 | 0 | f64::from_bits(0xbef3adae94983bb2), |
89 | 0 | f64::from_bits(0xbec3b05fe5c49f32), |
90 | 0 | f64::from_bits(0x3ed67902690892be), |
91 | 0 | f64::from_bits(0xbf3090033375e5ee), |
92 | | ); |
93 | 0 | let mut r = DoubleDouble::quick_mul_f64_add( |
94 | 0 | z2, |
95 | 0 | p, |
96 | 0 | DoubleDouble::from_bit_pair((0xbc0cb29fd910c494, 0x3f842aa561ecc102)), |
97 | | ); |
98 | 0 | r = DoubleDouble::quick_mul_add( |
99 | 0 | z2, |
100 | 0 | r, |
101 | 0 | DoubleDouble::from_bit_pair((0xbc6d7696ff4f712a, 0x3fd2e7fb0bcdf4f2)), |
102 | 0 | ); |
103 | 0 | r = DoubleDouble::quick_mul_add( |
104 | 0 | z2, |
105 | 0 | r, |
106 | 0 | DoubleDouble::from_bit_pair((0xbc8618f13eb7ca11, 0x3fec5bf891b4ef6b)), |
107 | 0 | ); |
108 | 0 | r = DoubleDouble::from_exact_add(r.hi, r.lo); |
109 | 0 | r = DoubleDouble::div_dd_f64(r, x); |
110 | 0 | let err = f_fmla( |
111 | 0 | r.hi, |
112 | 0 | f64::from_bits(0x3c10000000000000), // 2^-62 |
113 | 0 | f64::from_bits(0x3b90000000000000), // 2^-70 |
114 | | ); |
115 | 0 | let ub = r.hi + (r.lo + err); |
116 | 0 | let lb = r.hi + (r.lo - err); |
117 | 0 | if ub == lb { |
118 | 0 | return r.to_f64(); |
119 | 0 | } |
120 | 0 | rerf_poly_tiny_hard(x, z2) |
121 | 0 | } |
122 | | |
123 | | #[inline] |
124 | 0 | fn rerf_poly_hard(x: f64, z2: DoubleDouble, idx: usize) -> f64 { |
125 | 0 | let c = &RERF_HARD[idx]; |
126 | 0 | let mut p = DoubleDouble::mul_add( |
127 | 0 | z2, |
128 | 0 | DoubleDouble::from_bit_pair(c[10]), |
129 | 0 | DoubleDouble::from_bit_pair(c[9]), |
130 | | ); |
131 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[8])); |
132 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[7])); |
133 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[6])); |
134 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[5])); |
135 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[4])); |
136 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[3])); |
137 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[2])); |
138 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[1])); |
139 | 0 | p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[0])); |
140 | 0 | p = DoubleDouble::from_exact_add(p.hi, p.lo); |
141 | 0 | let z = DoubleDouble::div_dd_f64(p, x); |
142 | 0 | z.to_f64() |
143 | 0 | } |
144 | | |
145 | | /// Computes 1/erf(x) |
146 | | /// |
147 | | /// Max ulp 0.5001 |
148 | 0 | pub fn f_rerf(x: f64) -> f64 { |
149 | 0 | let z = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff); |
150 | 0 | let t = z.to_bits(); |
151 | 0 | let ux = t; |
152 | | /* 1/erf(x) rounds to +/-1 for RNDN for |x| > 0x4017afb48dc96626 */ |
153 | 0 | if ux > 0x4017afb48dc96626 |
154 | | // |x| > 0x4017afb48dc96626 |
155 | | { |
156 | 0 | let os = f64::copysign(1.0, x); |
157 | | const MASK: u64 = 0x7ff0000000000000u64; |
158 | 0 | if ux > MASK { |
159 | 0 | return x + x; /* NaN */ |
160 | 0 | } |
161 | 0 | if ux == MASK { |
162 | 0 | return os; /* +/-Inf */ |
163 | 0 | } |
164 | 0 | return f_fmla(-f64::from_bits(0x3c90000000000000), os, os); |
165 | 0 | } |
166 | | |
167 | | /* now |x| <= 0x4017afb48dc96626 */ |
168 | 0 | if z < f64::from_bits(0x3c20000000000000) { |
169 | | // |x| < 0.0000000000000000004336808689942018 |
170 | | /* for x=-0 the code below returns +0 which is wrong */ |
171 | 0 | if x == 0. { |
172 | 0 | return if x.is_sign_negative() { |
173 | 0 | f64::NEG_INFINITY |
174 | | } else { |
175 | 0 | f64::INFINITY |
176 | | }; |
177 | 0 | } |
178 | | |
179 | 0 | if z.to_bits() <= 0x38b7f12369dedu64 { |
180 | 0 | return if x.is_sign_negative() { |
181 | 0 | f64::NEG_INFINITY |
182 | | } else { |
183 | 0 | f64::INFINITY |
184 | | }; |
185 | 0 | } |
186 | | |
187 | | /* double-double approximation of 2/sqrt(pi) to nearest */ |
188 | | const SQRT_PI_OVER_2: DoubleDouble = DoubleDouble::new( |
189 | | f64::from_bits(0xbc8618f13eb7ca89), |
190 | | f64::from_bits(0x3fec5bf891b4ef6b), |
191 | | ); |
192 | | |
193 | | /* tiny x is Taylor Series: 1/erf(x) ~ sqrt(pi)/(2 * x) + O(x^3), where the ratio of the O(x^3) |
194 | | term to the main term is in x^2/3, thus less than 2^-123 */ |
195 | | /* scale x by 2^106 to get out the subnormal range */ |
196 | 0 | let sx = x * f64::from_bits(0x4690000000000000); |
197 | 0 | let mut prod = DoubleDouble::div_dd_f64(SQRT_PI_OVER_2, sx); |
198 | | // scale back by 2^106, since we're performed the division |
199 | 0 | prod = DoubleDouble::quick_mult_f64(prod, f64::from_bits(0x4690000000000000)); |
200 | 0 | return prod.to_f64(); |
201 | 0 | } |
202 | | |
203 | 0 | if z.to_bits() < 0x3fb0000000000000u64 { |
204 | 0 | return rerf_poly_tiny(z, x); |
205 | 0 | } |
206 | | |
207 | | const SIXTEEN: u64 = 4 << 52; |
208 | 0 | let idx = |
209 | 0 | unsafe { f64::from_bits(z.to_bits().wrapping_add(SIXTEEN)).to_int_unchecked::<usize>() }; |
210 | 0 | let z2 = DoubleDouble::from_exact_mult(z, z); |
211 | 0 | rerf_poly_hard(x, z2, idx) |
212 | 0 | } |
213 | | |
214 | | #[cfg(test)] |
215 | | mod tests { |
216 | | use super::*; |
217 | | |
218 | | #[test] |
219 | | fn test_erf() { |
220 | | assert_eq!(f_rerf(65.), 1.0); |
221 | | assert_eq!(f_rerf(3.), 1.0000220909849995); |
222 | | assert_eq!(f_rerf(-3.), -1.0000220909849995); |
223 | | assert_eq!(f_rerf(-0.03723630312089732), -23.811078627277197); |
224 | | assert_eq!( |
225 | | f_rerf(0.0000000000000000002336808689942018), |
226 | | 3.7924667486354975e18 |
227 | | ); |
228 | | assert_eq!(f_rerf(2.000225067138672), 1.004695025872889); |
229 | | assert_eq!(f_rerf(0.), f64::INFINITY); |
230 | | assert_eq!(f_rerf(-0.), f64::NEG_INFINITY); |
231 | | assert!(f_rerf(f64::NAN).is_nan()); |
232 | | } |
233 | | } |