/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/err/inverf.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::f_fmla; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::logs::fast_log_dd; |
32 | | use crate::polyeval::{f_polyeval4, f_polyeval5}; |
33 | | |
34 | | #[cold] |
35 | 0 | fn inverf_0p06_to_0p75(x: f64) -> f64 { |
36 | | // First step rational approximant is generated, but it's ill-conditioned, thus |
37 | | // we're using taylor expansion to create Newton form at the point. |
38 | | // Generated in Wolfram Mathematica: |
39 | | // <<FunctionApproximations` |
40 | | // ClearAll["Global`*"] |
41 | | // f[x_]:=InverseErf[x]/x |
42 | | // g[x_] =f[Sqrt[x]]; |
43 | | // {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100] |
44 | | // num=Numerator[approx][[1]]; |
45 | | // den=Denominator[approx][[1]]; |
46 | | // poly=den; |
47 | | // coeffs=CoefficientList[poly,z]; |
48 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
49 | | // x0=SetPrecision[0.5625,75]; |
50 | | // NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)] |
51 | | // coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}]; |
52 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]; |
53 | | const P: [(u64, u64); 10] = [ |
54 | | (0xbc3e06eda42202a0, 0x3f93c2fc5d00e0c8), |
55 | | (0xbc6eb374406b33b4, 0xbfc76fcfd022e3ff), |
56 | | (0xbc857822d7ffd282, 0x3fe6f8443546010a), |
57 | | (0x3c68269c66dfb28a, 0xbff80996754ceb79), |
58 | | (0x3c543dce8990a9f9, 0x3ffcf778d5ef0504), |
59 | | (0xbc72fc55f73765f6, 0xbff433be821423d0), |
60 | | (0xbc66d05fb37c8592, 0x3fdf15f19e9d8da4), |
61 | | (0x3c56dfb85e83a2c5, 0xbfb770b6827e0829), |
62 | | (0x3bff1472ecdfa403, 0x3f7a98a2980282bb), |
63 | | (0x3baffb33d69d6276, 0xbf142a246fd2c07c), |
64 | | ]; |
65 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
66 | 0 | let vz = DoubleDouble::full_add_f64(x2, -0.5625); |
67 | | |
68 | 0 | let vx2 = vz * vz; |
69 | 0 | let vx4 = vx2 * vx2; |
70 | 0 | let vx8 = vx4 * vx4; |
71 | | |
72 | 0 | let p0 = DoubleDouble::mul_add( |
73 | 0 | vz, |
74 | 0 | DoubleDouble::from_bit_pair(P[1]), |
75 | 0 | DoubleDouble::from_bit_pair(P[0]), |
76 | | ); |
77 | 0 | let p1 = DoubleDouble::mul_add( |
78 | 0 | vz, |
79 | 0 | DoubleDouble::from_bit_pair(P[3]), |
80 | 0 | DoubleDouble::from_bit_pair(P[2]), |
81 | | ); |
82 | 0 | let p2 = DoubleDouble::mul_add( |
83 | 0 | vz, |
84 | 0 | DoubleDouble::from_bit_pair(P[5]), |
85 | 0 | DoubleDouble::from_bit_pair(P[4]), |
86 | | ); |
87 | 0 | let p3 = DoubleDouble::mul_add( |
88 | 0 | vz, |
89 | 0 | DoubleDouble::from_bit_pair(P[7]), |
90 | 0 | DoubleDouble::from_bit_pair(P[6]), |
91 | | ); |
92 | 0 | let p4 = DoubleDouble::mul_add( |
93 | 0 | vz, |
94 | 0 | DoubleDouble::from_bit_pair(P[9]), |
95 | 0 | DoubleDouble::from_bit_pair(P[8]), |
96 | | ); |
97 | | |
98 | 0 | let q0 = DoubleDouble::mul_add(vx2, p1, p0); |
99 | 0 | let q1 = DoubleDouble::mul_add(vx2, p3, p2); |
100 | | |
101 | 0 | let r0 = DoubleDouble::mul_add(vx4, q1, q0); |
102 | 0 | let num = DoubleDouble::mul_add(vx8, p4, r0); |
103 | | // Generated in Wolfram Mathematica: |
104 | | // <<FunctionApproximations` |
105 | | // ClearAll["Global`*"] |
106 | | // f[x_]:=InverseErf[x]/x |
107 | | // g[x_] =f[Sqrt[x]]; |
108 | | // {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100] |
109 | | // num=Numerator[approx][[1]]; |
110 | | // den=Denominator[approx][[1]]; |
111 | | // coeffs=CoefficientList[poly,z]; |
112 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
113 | | // x0=SetPrecision[0.5625,75]; |
114 | | // NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)] |
115 | | // coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}]; |
116 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]; |
117 | | const Q: [(u64, u64); 10] = [ |
118 | | (0xbc36337f24e57cb9, 0x3f92388d5d757e3a), |
119 | | (0xbc63dfae43d60e0b, 0xbfc6ca7da581358c), |
120 | | (0xbc77656389bd0e62, 0x3fe7c82ce417b4e0), |
121 | | (0xbc93679667bef2f0, 0xbffad58651fd1a51), |
122 | | (0x3ca2c6cb9eb17fb4, 0x4001bdb67e93a242), |
123 | | (0xbc9b58961ba253bc, 0xbffbdaeff6fbb81c), |
124 | | (0x3c7861f549c6aa61, 0x3fe91b12cf47da3a), |
125 | | (0xbc696dfd665b2f5e, 0xbfc7c5d0ffb7f1da), |
126 | | (0x3c1552b0ec0ba7b3, 0x3f939ada247f7609), |
127 | | (0xbbcaa226fb7b30a8, 0xbf41be65038ccfe6), |
128 | | ]; |
129 | | |
130 | 0 | let p0 = DoubleDouble::mul_add( |
131 | 0 | vz, |
132 | 0 | DoubleDouble::from_bit_pair(Q[1]), |
133 | 0 | DoubleDouble::from_bit_pair(Q[0]), |
134 | | ); |
135 | 0 | let p1 = DoubleDouble::mul_add( |
136 | 0 | vz, |
137 | 0 | DoubleDouble::from_bit_pair(Q[3]), |
138 | 0 | DoubleDouble::from_bit_pair(Q[2]), |
139 | | ); |
140 | 0 | let p2 = DoubleDouble::mul_add( |
141 | 0 | vz, |
142 | 0 | DoubleDouble::from_bit_pair(Q[5]), |
143 | 0 | DoubleDouble::from_bit_pair(Q[4]), |
144 | | ); |
145 | 0 | let p3 = DoubleDouble::mul_add( |
146 | 0 | vz, |
147 | 0 | DoubleDouble::from_bit_pair(Q[7]), |
148 | 0 | DoubleDouble::from_bit_pair(Q[6]), |
149 | | ); |
150 | 0 | let p4 = DoubleDouble::mul_add( |
151 | 0 | vz, |
152 | 0 | DoubleDouble::from_bit_pair(Q[9]), |
153 | 0 | DoubleDouble::from_bit_pair(Q[8]), |
154 | | ); |
155 | | |
156 | 0 | let q0 = DoubleDouble::mul_add(vx2, p1, p0); |
157 | 0 | let q1 = DoubleDouble::mul_add(vx2, p3, p2); |
158 | | |
159 | 0 | let r0 = DoubleDouble::mul_add(vx4, q1, q0); |
160 | 0 | let den = DoubleDouble::mul_add(vx8, p4, r0); |
161 | | |
162 | 0 | let r = DoubleDouble::div(num, den); |
163 | 0 | let k = DoubleDouble::quick_mult_f64(r, x); |
164 | 0 | k.to_f64() |
165 | 0 | } |
166 | | |
167 | | #[inline] |
168 | 0 | fn inverf_asympt_small(z: DoubleDouble, zeta_sqrt: DoubleDouble, x: f64) -> f64 { |
169 | | // Generated in Wolfram Mathematica: |
170 | | // <<FunctionApproximations` |
171 | | // ClearAll["Global`*"] |
172 | | // f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] ) |
173 | | // {err0, approx,err1}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},10,10},WorkingPrecision->90] |
174 | | // num=Numerator[approx]; |
175 | | // den=Denominator[approx]; |
176 | | // poly=num; |
177 | | // coeffs=CoefficientList[poly,z]; |
178 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
179 | | const P: [(u64, u64); 11] = [ |
180 | | (0x3c936555853a8b2c, 0x3ff0001df06a2515), |
181 | | (0x3cea488e802db3c3, 0x404406ba373221da), |
182 | | (0xbce27d42419754e3, 0x407b0442e38a9597), |
183 | | (0xbd224a407624cbdf, 0x409c9277e31ef446), |
184 | | (0x3d4f16ce65d6fea0, 0x40aec3ec005b1d8a), |
185 | | (0x3d105bc37bc61b58, 0x40b46be8f860f4d9), |
186 | | (0x3d5ca133dcdecaa0, 0x40b3826e6a32dad7), |
187 | | (0x3d1d52013ba8aa38, 0x40aae93a603cf3ea), |
188 | | (0xbd07a75306df0fc3, 0x4098ab8357dc2e51), |
189 | | (0x3d1bb6770bb7a27e, 0x407ebead00879010), |
190 | | (0xbbfcbff4a9737936, 0x3f8936117ccbff83), |
191 | | ]; |
192 | | |
193 | 0 | let z2 = DoubleDouble::quick_mult(z, z); |
194 | 0 | let z4 = DoubleDouble::quick_mult(z2, z2); |
195 | 0 | let z8 = DoubleDouble::quick_mult(z4, z4); |
196 | | |
197 | 0 | let q0 = DoubleDouble::mul_add( |
198 | 0 | DoubleDouble::from_bit_pair(P[1]), |
199 | 0 | z, |
200 | 0 | DoubleDouble::from_bit_pair(P[0]), |
201 | | ); |
202 | 0 | let q1 = DoubleDouble::mul_add( |
203 | 0 | DoubleDouble::from_bit_pair(P[3]), |
204 | 0 | z, |
205 | 0 | DoubleDouble::from_bit_pair(P[2]), |
206 | | ); |
207 | 0 | let q2 = DoubleDouble::mul_add( |
208 | 0 | DoubleDouble::from_bit_pair(P[5]), |
209 | 0 | z, |
210 | 0 | DoubleDouble::from_bit_pair(P[4]), |
211 | | ); |
212 | 0 | let q3 = DoubleDouble::mul_add( |
213 | 0 | DoubleDouble::from_bit_pair(P[7]), |
214 | 0 | z, |
215 | 0 | DoubleDouble::from_bit_pair(P[6]), |
216 | | ); |
217 | 0 | let q4 = DoubleDouble::mul_add( |
218 | 0 | DoubleDouble::from_bit_pair(P[9]), |
219 | 0 | z, |
220 | 0 | DoubleDouble::from_bit_pair(P[8]), |
221 | | ); |
222 | | |
223 | 0 | let r0 = DoubleDouble::mul_add(z2, q1, q0); |
224 | 0 | let r1 = DoubleDouble::mul_add(z2, q3, q2); |
225 | | |
226 | 0 | let s0 = DoubleDouble::mul_add(z4, r1, r0); |
227 | 0 | let s1 = DoubleDouble::mul_add(z2, DoubleDouble::from_bit_pair(P[10]), q4); |
228 | 0 | let num = DoubleDouble::mul_add(z8, s1, s0); |
229 | | |
230 | | // See numerator generation above: |
231 | | // poly=den; |
232 | | // coeffs=CoefficientList[poly,z]; |
233 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
234 | | const Q: [(u64, u64); 11] = [ |
235 | | (0x0000000000000000, 0x3ff0000000000000), |
236 | | (0xbc75b1109d4a3262, 0x40440782efaab17f), |
237 | | (0x3d1f7775b207d84f, 0x407b2da74b0d39f2), |
238 | | (0xbd3291fdbab49501, 0x409dac8d9e7c90b2), |
239 | | (0xbd58d8fdd27707a9, 0x40b178dfeffa3192), |
240 | | (0xbd57fc74ad705ce0, 0x40bad19b686f219f), |
241 | | (0x3d4075510031f2cd, 0x40be70a598208cea), |
242 | | (0xbd5442e109152efb, 0x40b9683ef36ae330), |
243 | | (0x3d5398192933962e, 0x40b04b7c4c3ca8ee), |
244 | | (0x3d2d04d03598e303, 0x409bd0080799fbf1), |
245 | | (0x3d2a988eb552ef44, 0x40815a46f12bafe3), |
246 | | ]; |
247 | | |
248 | 0 | let q0 = DoubleDouble::mul_add_f64( |
249 | 0 | DoubleDouble::from_bit_pair(Q[1]), |
250 | 0 | z, |
251 | 0 | f64::from_bits(0x3ff0000000000000), |
252 | | ); |
253 | 0 | let q1 = DoubleDouble::mul_add( |
254 | 0 | DoubleDouble::from_bit_pair(Q[3]), |
255 | 0 | z, |
256 | 0 | DoubleDouble::from_bit_pair(Q[2]), |
257 | | ); |
258 | 0 | let q2 = DoubleDouble::mul_add( |
259 | 0 | DoubleDouble::from_bit_pair(Q[5]), |
260 | 0 | z, |
261 | 0 | DoubleDouble::from_bit_pair(Q[4]), |
262 | | ); |
263 | 0 | let q3 = DoubleDouble::mul_add( |
264 | 0 | DoubleDouble::from_bit_pair(Q[7]), |
265 | 0 | z, |
266 | 0 | DoubleDouble::from_bit_pair(Q[6]), |
267 | | ); |
268 | 0 | let q4 = DoubleDouble::mul_add( |
269 | 0 | DoubleDouble::from_bit_pair(Q[9]), |
270 | 0 | z, |
271 | 0 | DoubleDouble::from_bit_pair(Q[8]), |
272 | | ); |
273 | | |
274 | 0 | let r0 = DoubleDouble::mul_add(z2, q1, q0); |
275 | 0 | let r1 = DoubleDouble::mul_add(z2, q3, q2); |
276 | | |
277 | 0 | let s0 = DoubleDouble::mul_add(z4, r1, r0); |
278 | 0 | let s1 = DoubleDouble::mul_add(z2, DoubleDouble::from_bit_pair(Q[10]), q4); |
279 | 0 | let den = DoubleDouble::mul_add(z8, s1, s0); |
280 | 0 | let r = DoubleDouble::div(num, den); |
281 | 0 | let k = DoubleDouble::quick_mult(r, zeta_sqrt); |
282 | 0 | f64::copysign(k.to_f64(), x) |
283 | 0 | } |
284 | | |
285 | | // branch for |x| > 0.9999 for extreme tail |
286 | | #[cold] |
287 | 0 | fn inverf_asympt_long(z: DoubleDouble, zeta_sqrt: DoubleDouble, x: f64) -> f64 { |
288 | | // First step rational approximant is generated, but it's ill-conditioned, thus |
289 | | // we're using taylor expansion to create Newton form at the point. |
290 | | // Generated in Wolfram Mathematica: |
291 | | // <<FunctionApproximations` |
292 | | // ClearAll["Global`*"] |
293 | | // f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] ) |
294 | | // {err0, approx}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},13,13},WorkingPrecision->90] |
295 | | // num=Numerator[approx][[1]]; |
296 | | // den=Denominator[approx][[1]]; |
297 | | // poly=num; |
298 | | // coeffs=CoefficientList[poly,z]; |
299 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
300 | | const P: [(u64, u64); 14] = [ |
301 | | (0x3c97612f9b24a614, 0x3ff0000ba84cc7a5), |
302 | | (0xbcee8fe2da463412, 0x40515246546f5d88), |
303 | | (0x3d2fa4a2b891b526, 0x40956b6837159b11), |
304 | | (0x3d5d673ffad4f817, 0x40c5a1aa3be58652), |
305 | | (0x3d8867a1e5506f88, 0x40e65ebb1e1e7c75), |
306 | | (0xbd9bbc0764ed8f5b, 0x40fd2064a652e5c2), |
307 | | (0xbda78e569c0d237f, 0x410a385c627c461c), |
308 | | (0xbdab3123ebc465d7, 0x4110f05ca2b65fe5), |
309 | | (0x3d960def35955192, 0x4110bb079af2fe08), |
310 | | (0xbd97904816054836, 0x410911c24610c11c), |
311 | | (0xbd937745e9192593, 0x40fc603244adca35), |
312 | | (0xbd65fbc476d63050, 0x40e6399103188c21), |
313 | | (0xbd61016ef381cce6, 0x40c6482b44995b89), |
314 | | (0x3c326105c49e5a1a, 0xbfab44bd8b4e3138), |
315 | | ]; |
316 | | |
317 | 0 | let z2 = z * z; |
318 | 0 | let z4 = z2 * z2; |
319 | 0 | let z8 = z4 * z4; |
320 | | |
321 | 0 | let g0 = DoubleDouble::mul_add( |
322 | 0 | z, |
323 | 0 | DoubleDouble::from_bit_pair(P[1]), |
324 | 0 | DoubleDouble::from_bit_pair(P[0]), |
325 | | ); |
326 | 0 | let g1 = DoubleDouble::mul_add( |
327 | 0 | z, |
328 | 0 | DoubleDouble::from_bit_pair(P[3]), |
329 | 0 | DoubleDouble::from_bit_pair(P[2]), |
330 | | ); |
331 | 0 | let g2 = DoubleDouble::mul_add( |
332 | 0 | z, |
333 | 0 | DoubleDouble::from_bit_pair(P[5]), |
334 | 0 | DoubleDouble::from_bit_pair(P[4]), |
335 | | ); |
336 | 0 | let g3 = DoubleDouble::mul_add( |
337 | 0 | z, |
338 | 0 | DoubleDouble::from_bit_pair(P[7]), |
339 | 0 | DoubleDouble::from_bit_pair(P[6]), |
340 | | ); |
341 | 0 | let g4 = DoubleDouble::mul_add( |
342 | 0 | z, |
343 | 0 | DoubleDouble::from_bit_pair(P[9]), |
344 | 0 | DoubleDouble::from_bit_pair(P[8]), |
345 | | ); |
346 | 0 | let g5 = DoubleDouble::mul_add( |
347 | 0 | z, |
348 | 0 | DoubleDouble::from_bit_pair(P[11]), |
349 | 0 | DoubleDouble::from_bit_pair(P[10]), |
350 | | ); |
351 | 0 | let g6 = DoubleDouble::mul_add( |
352 | 0 | z, |
353 | 0 | DoubleDouble::from_bit_pair(P[13]), |
354 | 0 | DoubleDouble::from_bit_pair(P[12]), |
355 | | ); |
356 | | |
357 | 0 | let h0 = DoubleDouble::mul_add(z2, g1, g0); |
358 | 0 | let h1 = DoubleDouble::mul_add(z2, g3, g2); |
359 | 0 | let h2 = DoubleDouble::mul_add(z2, g5, g4); |
360 | | |
361 | 0 | let q0 = DoubleDouble::mul_add(z4, h1, h0); |
362 | 0 | let q1 = DoubleDouble::mul_add(z4, g6, h2); |
363 | | |
364 | 0 | let num = DoubleDouble::mul_add(z8, q1, q0); |
365 | | |
366 | | // See numerator generation above: |
367 | | // poly=den; |
368 | | // coeffs=CoefficientList[poly,z]; |
369 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
370 | | const Q: [(u64, u64); 14] = [ |
371 | | (0x0000000000000000, 0x3ff0000000000000), |
372 | | (0xbcfc7b886ee61417, 0x405152838f711f3c), |
373 | | (0xbd33f933c14e831a, 0x409576cb78cab36e), |
374 | | (0x3d33fb09e2c4898a, 0x40c5e8a2c7602ced), |
375 | | (0x3d7be430c664bf7e, 0x40e766fdc8c7638c), |
376 | | (0x3dac662e74cdfc0e, 0x4100276b5f47b5f1), |
377 | | (0x3da67d06e82a8495, 0x410f843887f8a24a), |
378 | | (0x3dbbf2e22fc2550a, 0x4116d04271703e08), |
379 | | (0xbdb2fb3aed100853, 0x4119aff4ed32b74b), |
380 | | (0x3dba75e7b7171c3c, 0x4116b5eb8bf386bd), |
381 | | (0x3dab2d8b8c1937eb, 0x410f71c38e84cb34), |
382 | | (0xbda4e2e8a50b7370, 0x4100ca04b0f36b94), |
383 | | (0xbd86ed6df34fdaf9, 0x40e9151ded4cf4b7), |
384 | | (0x3d6938ea702c0328, 0x40c923ee1ab270c4), |
385 | | ]; |
386 | | |
387 | 0 | let g0 = DoubleDouble::mul_add( |
388 | 0 | z, |
389 | 0 | DoubleDouble::from_bit_pair(Q[1]), |
390 | 0 | DoubleDouble::from_bit_pair(Q[0]), |
391 | | ); |
392 | 0 | let g1 = DoubleDouble::mul_add( |
393 | 0 | z, |
394 | 0 | DoubleDouble::from_bit_pair(Q[3]), |
395 | 0 | DoubleDouble::from_bit_pair(Q[2]), |
396 | | ); |
397 | 0 | let g2 = DoubleDouble::mul_add( |
398 | 0 | z, |
399 | 0 | DoubleDouble::from_bit_pair(Q[5]), |
400 | 0 | DoubleDouble::from_bit_pair(Q[4]), |
401 | | ); |
402 | 0 | let g3 = DoubleDouble::mul_add( |
403 | 0 | z, |
404 | 0 | DoubleDouble::from_bit_pair(Q[7]), |
405 | 0 | DoubleDouble::from_bit_pair(Q[6]), |
406 | | ); |
407 | 0 | let g4 = DoubleDouble::mul_add( |
408 | 0 | z, |
409 | 0 | DoubleDouble::from_bit_pair(Q[9]), |
410 | 0 | DoubleDouble::from_bit_pair(Q[8]), |
411 | | ); |
412 | 0 | let g5 = DoubleDouble::mul_add( |
413 | 0 | z, |
414 | 0 | DoubleDouble::from_bit_pair(Q[11]), |
415 | 0 | DoubleDouble::from_bit_pair(Q[10]), |
416 | | ); |
417 | 0 | let g6 = DoubleDouble::mul_add( |
418 | 0 | z, |
419 | 0 | DoubleDouble::from_bit_pair(Q[13]), |
420 | 0 | DoubleDouble::from_bit_pair(Q[12]), |
421 | | ); |
422 | | |
423 | 0 | let h0 = DoubleDouble::mul_add(z2, g1, g0); |
424 | 0 | let h1 = DoubleDouble::mul_add(z2, g3, g2); |
425 | 0 | let h2 = DoubleDouble::mul_add(z2, g5, g4); |
426 | | |
427 | 0 | let q0 = DoubleDouble::mul_add(z4, h1, h0); |
428 | 0 | let q1 = DoubleDouble::mul_add(z4, g6, h2); |
429 | | |
430 | 0 | let den = DoubleDouble::mul_add(z8, q1, q0); |
431 | 0 | let r = DoubleDouble::div(num, den); |
432 | | |
433 | 0 | let k = DoubleDouble::quick_mult(r, zeta_sqrt); |
434 | 0 | f64::copysign(k.to_f64(), x) |
435 | 0 | } |
436 | | |
437 | | /// Inverse error function |
438 | | /// |
439 | | /// ulp 0.5 |
440 | 0 | pub fn f_erfinv(x: f64) -> f64 { |
441 | 0 | let ax = x.to_bits() & 0x7fff_ffff_ffff_ffff; |
442 | | |
443 | 0 | if ax >= 0x3ff0000000000000u64 || ax <= 0x3cb0000000000000u64 { |
444 | | // |x| >= 1, |x| == 0, |x| <= f64::EPSILON |
445 | 0 | if ax == 0 { |
446 | | // |x| == 0 |
447 | 0 | return 0.; |
448 | 0 | } |
449 | | |
450 | 0 | if ax <= 0x3cb0000000000000u64 { |
451 | | // |x| <= f64::EPSILON |
452 | | // inverf(x) ~ Sqrt[Pi]x/2+O[x]^3 |
453 | | const SQRT_PI_OVER_2: f64 = f64::from_bits(0x3fec5bf891b4ef6b); |
454 | 0 | return x * SQRT_PI_OVER_2; |
455 | 0 | } |
456 | | |
457 | | // |x| > 1 |
458 | 0 | if ax == 0x3ff0000000000000u64 { |
459 | | // |x| == 1 |
460 | 0 | return if x.is_sign_negative() { |
461 | 0 | f64::NEG_INFINITY |
462 | | } else { |
463 | 0 | f64::INFINITY |
464 | | }; |
465 | 0 | } |
466 | 0 | return f64::NAN; // x == NaN, x = Inf, x > 1 |
467 | 0 | } |
468 | | |
469 | 0 | let z = f64::from_bits(ax); |
470 | | |
471 | 0 | if ax <= 0x3f8374bc6a7ef9db { |
472 | | // 0.0095 |
473 | | // for small |x| using taylor series first 3 terms |
474 | | // Generated by SageMath: |
475 | | // from mpmath import mp, erf |
476 | | // |
477 | | // mp.prec = 100 |
478 | | // |
479 | | // def inverf_series(n_terms): |
480 | | // from mpmath import taylor |
481 | | // series_erf = taylor(mp.erfinv, 0, n_terms) |
482 | | // return series_erf |
483 | | // |
484 | | // ser = inverf_series(10) |
485 | | // for i in range(1, len(ser), 2): |
486 | | // k = ser[i] |
487 | | // print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),") |
488 | 0 | let z2 = DoubleDouble::from_exact_mult(z, z); |
489 | 0 | let p = f_fmla( |
490 | 0 | z2.hi, |
491 | 0 | f64::from_bits(0x3fb62847c47dda48), |
492 | 0 | f64::from_bits(0x3fc053c2c0ab91c5), |
493 | | ); |
494 | 0 | let mut r = DoubleDouble::mul_f64_add( |
495 | 0 | z2, |
496 | 0 | p, |
497 | 0 | DoubleDouble::from_bit_pair((0xbc33ea2ef8dde075, 0x3fcdb29fb2fee5e4)), |
498 | | ); |
499 | 0 | r = DoubleDouble::mul_add( |
500 | 0 | z2, |
501 | 0 | r, |
502 | 0 | DoubleDouble::from_bit_pair((0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b)), |
503 | 0 | ); |
504 | | // (rh + rl) * z = rh * z + rl*z |
505 | 0 | let v = DoubleDouble::quick_mult_f64(r, z); |
506 | 0 | return f64::copysign(v.to_f64(), x); |
507 | 0 | } else if ax <= 0x3faeb851eb851eb8 { |
508 | | // 0.06 |
509 | | // for |x| < 0.06 using taylor series first 5 terms |
510 | | // Generated by SageMath: |
511 | | // from mpmath import mp, erf |
512 | | // |
513 | | // mp.prec = 100 |
514 | | // |
515 | | // def inverf_series(n_terms): |
516 | | // from mpmath import taylor |
517 | | // series_erf = taylor(mp.erfinv, 0, n_terms) |
518 | | // return series_erf |
519 | | // |
520 | | // ser = inverf_series(10) |
521 | | // for i in range(1, len(ser), 2): |
522 | | // k = ser[i] |
523 | | // print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),") |
524 | 0 | let z2 = DoubleDouble::from_exact_mult(z, z); |
525 | 0 | let p = f_polyeval4( |
526 | 0 | z2.hi, |
527 | 0 | f64::from_bits(0x3fb62847c47dda48), |
528 | 0 | f64::from_bits(0x3fb0a13189c6ef7a), |
529 | 0 | f64::from_bits(0x3faa7c85c89bb08b), |
530 | 0 | f64::from_bits(0x3fa5eeb1d488e312), |
531 | | ); |
532 | 0 | let mut r = DoubleDouble::mul_f64_add( |
533 | 0 | z2, |
534 | 0 | p, |
535 | 0 | DoubleDouble::from_bit_pair((0x3c2cec68daff0d80, 0x3fc053c2c0ab91c5)), |
536 | | ); |
537 | 0 | r = DoubleDouble::mul_add( |
538 | 0 | z2, |
539 | 0 | r, |
540 | 0 | DoubleDouble::from_bit_pair((0xbc33ea2ef8dde075, 0x3fcdb29fb2fee5e4)), |
541 | 0 | ); |
542 | 0 | r = DoubleDouble::mul_add( |
543 | 0 | z2, |
544 | 0 | r, |
545 | 0 | DoubleDouble::from_bit_pair((0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b)), |
546 | 0 | ); |
547 | | // (rh + rl) * z = rh * z + rl*z |
548 | 0 | let v = DoubleDouble::quick_mult_f64(r, z); |
549 | 0 | return f64::copysign(v.to_f64(), x); |
550 | 0 | } |
551 | | |
552 | 0 | if ax <= 0x3fe8000000000000u64 { |
553 | | // |x| < 0.75 |
554 | | |
555 | | // First step rational approximant is generated, but it's ill-conditioned, thus |
556 | | // we're using taylor expansion to create Newton form at the point. |
557 | | // Generated in Wolfram Mathematica: |
558 | | // <<FunctionApproximations` |
559 | | // ClearAll["Global`*"] |
560 | | // f[x_]:=InverseErf[x]/x |
561 | | // g[x_] =f[Sqrt[x]]; |
562 | | // {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100] |
563 | | // num=Numerator[approx][[1]]; |
564 | | // den=Denominator[approx][[1]]; |
565 | | // poly=den; |
566 | | // coeffs=CoefficientList[poly,z]; |
567 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
568 | | // x0=SetPrecision[0.5625,75]; |
569 | | // NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)] |
570 | | // coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}]; |
571 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]; |
572 | | const P: [(u64, u64); 5] = [ |
573 | | (0xbc3e06eda42202a0, 0x3f93c2fc5d00e0c8), |
574 | | (0xbc6eb374406b33b4, 0xbfc76fcfd022e3ff), |
575 | | (0xbc857822d7ffd282, 0x3fe6f8443546010a), |
576 | | (0x3c68269c66dfb28a, 0xbff80996754ceb79), |
577 | | (0x3c543dce8990a9f9, 0x3ffcf778d5ef0504), |
578 | | ]; |
579 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
580 | 0 | let vz = DoubleDouble::full_add_f64(x2, -0.5625); |
581 | 0 | let ps_num = f_polyeval5( |
582 | 0 | vz.hi, |
583 | 0 | f64::from_bits(0xbff433be821423d0), |
584 | 0 | f64::from_bits(0x3fdf15f19e9d8da4), |
585 | 0 | f64::from_bits(0xbfb770b6827e0829), |
586 | 0 | f64::from_bits(0x3f7a98a2980282bb), |
587 | 0 | f64::from_bits(0xbf142a246fd2c07c), |
588 | | ); |
589 | 0 | let mut num = DoubleDouble::mul_f64_add(vz, ps_num, DoubleDouble::from_bit_pair(P[4])); |
590 | 0 | num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[3])); |
591 | 0 | num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[2])); |
592 | 0 | num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[1])); |
593 | 0 | num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[0])); |
594 | | |
595 | | // Generated in Wolfram Mathematica: |
596 | | // <<FunctionApproximations` |
597 | | // ClearAll["Global`*"] |
598 | | // f[x_]:=InverseErf[x]/x |
599 | | // g[x_] =f[Sqrt[x]]; |
600 | | // {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100] |
601 | | // num=Numerator[approx][[1]]; |
602 | | // den=Denominator[approx][[1]]; |
603 | | // coeffs=CoefficientList[poly,z]; |
604 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
605 | | // x0=SetPrecision[0.5625,75]; |
606 | | // NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)] |
607 | | // coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}]; |
608 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]; |
609 | | const Q: [(u64, u64); 5] = [ |
610 | | (0xbc36337f24e57cb9, 0x3f92388d5d757e3a), |
611 | | (0xbc63dfae43d60e0b, 0xbfc6ca7da581358c), |
612 | | (0xbc77656389bd0e62, 0x3fe7c82ce417b4e0), |
613 | | (0xbc93679667bef2f0, 0xbffad58651fd1a51), |
614 | | (0x3ca2c6cb9eb17fb4, 0x4001bdb67e93a242), |
615 | | ]; |
616 | | |
617 | 0 | let ps_den = f_polyeval5( |
618 | 0 | vz.hi, |
619 | 0 | f64::from_bits(0xbffbdaeff6fbb81c), |
620 | 0 | f64::from_bits(0x3fe91b12cf47da3a), |
621 | 0 | f64::from_bits(0xbfc7c5d0ffb7f1da), |
622 | 0 | f64::from_bits(0x3f939ada247f7609), |
623 | 0 | f64::from_bits(0xbf41be65038ccfe6), |
624 | | ); |
625 | | |
626 | 0 | let mut den = DoubleDouble::mul_f64_add(vz, ps_den, DoubleDouble::from_bit_pair(Q[4])); |
627 | 0 | den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[3])); |
628 | 0 | den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[2])); |
629 | 0 | den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[1])); |
630 | 0 | den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[0])); |
631 | 0 | let r = DoubleDouble::div(num, den); |
632 | 0 | let k = DoubleDouble::quick_mult_f64(r, z); |
633 | 0 | let err = f_fmla( |
634 | 0 | k.hi, |
635 | 0 | f64::from_bits(0x3c70000000000000), // 2^-56 |
636 | 0 | f64::from_bits(0x3c40000000000000), // 2^-59 |
637 | | ); |
638 | 0 | let ub = k.hi + (k.lo + err); |
639 | 0 | let lb = k.hi + (k.lo - err); |
640 | 0 | if ub == lb { |
641 | 0 | return f64::copysign(k.to_f64(), x); |
642 | 0 | } |
643 | 0 | return inverf_0p06_to_0p75(x); |
644 | 0 | } |
645 | | |
646 | 0 | let q = DoubleDouble::from_full_exact_add(1.0, -z); |
647 | | |
648 | 0 | let mut zeta = fast_log_dd(q); |
649 | 0 | zeta = DoubleDouble::from_exact_add(zeta.hi, zeta.lo); |
650 | 0 | zeta = -zeta; |
651 | 0 | let zeta_sqrt = zeta.fast_sqrt(); |
652 | 0 | let rz = zeta_sqrt.recip(); |
653 | | |
654 | 0 | if z < 0.9999 { |
655 | 0 | inverf_asympt_small(rz, zeta_sqrt, x) |
656 | | } else { |
657 | 0 | inverf_asympt_long(rz, zeta_sqrt, x) |
658 | | } |
659 | 0 | } |
660 | | |
661 | | #[cfg(test)] |
662 | | mod tests { |
663 | | use super::*; |
664 | | |
665 | | #[test] |
666 | | fn test_erfinv() { |
667 | | assert!(f_erfinv(f64::NEG_INFINITY).is_nan()); |
668 | | assert!(f_erfinv(f64::INFINITY).is_nan()); |
669 | | assert!(f_erfinv(f64::NAN).is_nan()); |
670 | | assert_eq!(f_erfinv(f64::EPSILON), 1.9678190753608283e-16); |
671 | | assert_eq!(f_erfinv(-0.5435340000000265), -0.5265673336010599); |
672 | | assert_eq!(f_erfinv(0.5435340000000265), 0.5265673336010599); |
673 | | assert_eq!(f_erfinv(0.001000000000084706), 0.0008862271575416209); |
674 | | assert_eq!(f_erfinv(-0.001000000000084706), -0.0008862271575416209); |
675 | | assert_eq!(f_erfinv(0.71), 0.7482049711849852); |
676 | | assert_eq!(f_erfinv(-0.71), -0.7482049711849852); |
677 | | assert_eq!(f_erfinv(0.41), 0.381014610957532); |
678 | | assert_eq!(f_erfinv(-0.41), -0.381014610957532); |
679 | | assert_eq!(f_erfinv(0.32), 0.29165547581744206); |
680 | | assert_eq!(f_erfinv(-0.32), -0.29165547581744206); |
681 | | assert_eq!(f_erfinv(0.82), 0.9480569762323499); |
682 | | assert_eq!(f_erfinv(-0.82), -0.9480569762323499); |
683 | | assert_eq!(f_erfinv(0.05), 0.044340387910005497); |
684 | | assert_eq!(f_erfinv(-0.05), -0.044340387910005497); |
685 | | assert_eq!(f_erfinv(0.99), 1.8213863677184494); |
686 | | assert_eq!(f_erfinv(-0.99), -1.8213863677184494); |
687 | | assert_eq!(f_erfinv(0.9900000000867389), 1.8213863698392927); |
688 | | assert_eq!(f_erfinv(-0.9900000000867389), -1.8213863698392927); |
689 | | assert_eq!(f_erfinv(0.99999), 3.123413274341571); |
690 | | assert_eq!(f_erfinv(-0.99999), -3.123413274341571); |
691 | | } |
692 | | } |