/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/bessel/i0e.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::bessel::bessel_exp::i0_exp_accurate; |
30 | | use crate::bessel::i0::{ |
31 | | bessel_rsqrt_hard, eval_small_hard_3p6_to_7p5, i0_0_to_3p6_dd, i0_0_to_3p6_hard, |
32 | | i0_3p6_to_7p5_dd, |
33 | | }; |
34 | | use crate::bessel::i0_exp; |
35 | | use crate::common::f_fmla; |
36 | | use crate::double_double::DoubleDouble; |
37 | | use crate::dyadic_float::{DyadicFloat128, DyadicSign}; |
38 | | |
39 | | /// Modified exponentially scaled Bessel of the first kind of order 0 |
40 | | /// |
41 | | /// Computes exp(-|x|)*I0(x) |
42 | 0 | pub fn f_i0e(x: f64) -> f64 { |
43 | 0 | let ux = x.to_bits().wrapping_shl(1); |
44 | | |
45 | 0 | if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 { |
46 | | // |x| <= f64::EPSILON, |x| == inf, x == NaN |
47 | 0 | if ux <= 0x760af31dc4611874u64 { |
48 | | // |x| <= 2.2204460492503131e-24f64 |
49 | 0 | return 1.; |
50 | 0 | } |
51 | 0 | if ux <= 0x7960000000000000u64 { |
52 | | // |x| <= f64::EPSILON |
53 | | // Power series of I0(x)Exp[-x] ~ 1 - x + O(x^2) |
54 | 0 | return 1. - x; |
55 | 0 | } |
56 | 0 | if x.is_infinite() { |
57 | 0 | return 0.; |
58 | 0 | } |
59 | 0 | return x + f64::NAN; // x = NaN |
60 | 0 | } |
61 | | |
62 | 0 | let xb = x.to_bits() & 0x7fff_ffff_ffff_ffff; |
63 | | |
64 | 0 | if xb <= 0x4023000000000000u64 { |
65 | | // |x| <= 9.5 |
66 | 0 | if xb <= 0x400ccccccccccccdu64 { |
67 | | // |x| <= 3.6 |
68 | 0 | return i0e_0_to_3p6_exec(f64::from_bits(xb)); |
69 | 0 | } else if xb <= 0x401e000000000000u64 { |
70 | | // |x| <= 7.5 |
71 | 0 | return i0e3p6_to_7p5(f64::from_bits(xb)); |
72 | 0 | } |
73 | 0 | return i0e_7p5_to_9p5(f64::from_bits(xb)); |
74 | 0 | } |
75 | | |
76 | 0 | i0e_asympt(f64::from_bits(xb)) |
77 | 0 | } |
78 | | |
79 | | /** |
80 | | Computes I0 on interval [-7.5; -3.6], [3.6; 7.5] |
81 | | **/ |
82 | | #[inline] |
83 | 0 | fn i0e3p6_to_7p5(x: f64) -> f64 { |
84 | 0 | let mut r = i0_3p6_to_7p5_dd(x); |
85 | | |
86 | 0 | let v_exp = i0_exp(-x); |
87 | 0 | r = DoubleDouble::quick_mult(r, v_exp); |
88 | | |
89 | 0 | let err = f_fmla( |
90 | 0 | r.hi, |
91 | 0 | f64::from_bits(0x3c56a09e667f3bcd), // 2^-57.5 |
92 | 0 | f64::from_bits(0x3c00000000000000), // 2^-63 |
93 | | ); |
94 | 0 | let ub = r.hi + (r.lo + err); |
95 | 0 | let lb = r.hi + (r.lo - err); |
96 | 0 | if ub == lb { |
97 | 0 | return ub; |
98 | 0 | } |
99 | 0 | let v = eval_small_hard_3p6_to_7p5(x); |
100 | 0 | let v_exp_accurate = i0_exp_accurate(-x); |
101 | 0 | DoubleDouble::quick_mult(v, v_exp_accurate).to_f64() |
102 | 0 | } |
103 | | |
104 | | #[inline] |
105 | 0 | fn i0e_0_to_3p6_exec(x: f64) -> f64 { |
106 | 0 | let mut r = i0_0_to_3p6_dd(x); |
107 | | |
108 | 0 | let v_exp = i0_exp(-x); |
109 | 0 | r = DoubleDouble::quick_mult(r, v_exp); |
110 | | |
111 | 0 | let err = f_fmla( |
112 | 0 | r.hi, |
113 | 0 | f64::from_bits(0x3c40000000000000), // 2^-59 |
114 | 0 | f64::from_bits(0x3be0000000000000), // 2^-66 |
115 | | ); |
116 | 0 | let ub = r.hi + (r.lo + err); |
117 | 0 | let lb = r.hi + (r.lo - err); |
118 | 0 | if ub == lb { |
119 | 0 | return ub; |
120 | 0 | } |
121 | 0 | let v = i0_0_to_3p6_hard(x); |
122 | 0 | let v_exp_accurate = i0_exp_accurate(-x); |
123 | 0 | DoubleDouble::quick_mult(v, v_exp_accurate).to_f64() |
124 | 0 | } |
125 | | |
126 | | /** |
127 | | Mid-interval [7.5;9.5] generated by Wolfram: |
128 | | I0(x)=R(1/x)/sqrt(x)*Exp(x) |
129 | | ```text |
130 | | <<FunctionApproximations` |
131 | | ClearAll["Global`*"] |
132 | | f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x] |
133 | | g[z_]:=f[1/z] |
134 | | {err, approx}=MiniMaxApproximation[g[z],{z,{1/9.5,1/7.5},11,11},WorkingPrecision->120] |
135 | | num=Numerator[approx][[1]]; |
136 | | den=Denominator[approx][[1]]; |
137 | | poly=den; |
138 | | coeffs=CoefficientList[poly,z]; |
139 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
140 | | ``` |
141 | | **/ |
142 | | #[inline] |
143 | 0 | fn i0e_7p5_to_9p5(x: f64) -> f64 { |
144 | 0 | let dx = x; |
145 | 0 | let recip = DoubleDouble::from_quick_recip(x); |
146 | | |
147 | | const P: [(u64, u64); 12] = [ |
148 | | (0x3c778e3de1f76f48, 0x3fd988450531281b), |
149 | | (0xbcb572f6149f389e, 0xc01a786676fb4d3a), |
150 | | (0x3cf2f373365347ed, 0x405c0e8405fdb642), |
151 | | (0x3d276a94c8f1e627, 0xc0885e4718dfb761), |
152 | | (0x3d569f8a993434e2, 0x40b756d52d5fa90c), |
153 | | (0xbd6f953f7dd1a223, 0xc0c8818365c47790), |
154 | | (0xbd74247967fbf7b2, 0x40e8cf89daf87353), |
155 | | (0x3db449add7abb056, 0x41145d3c2d96d159), |
156 | | (0xbdc5cc822b71f891, 0xc123694c58fd039b), |
157 | | (0x3da2047ac1a6fba8, 0x415462e630bf3e7e), |
158 | | (0xbdc2f2c06eda6a95, 0xc14c6984ebdd6792), |
159 | | (0xbdf51fa85dafeca5, 0x4166a437c202d27b), |
160 | | ]; |
161 | | |
162 | 0 | let x2 = DoubleDouble::quick_mult(recip, recip); |
163 | 0 | let x4 = DoubleDouble::quick_mult(x2, x2); |
164 | 0 | let x8 = DoubleDouble::quick_mult(x4, x4); |
165 | | |
166 | 0 | let e0 = DoubleDouble::mul_add( |
167 | 0 | recip, |
168 | 0 | DoubleDouble::from_bit_pair(P[1]), |
169 | 0 | DoubleDouble::from_bit_pair(P[0]), |
170 | | ); |
171 | 0 | let e1 = DoubleDouble::mul_add( |
172 | 0 | recip, |
173 | 0 | DoubleDouble::from_bit_pair(P[3]), |
174 | 0 | DoubleDouble::from_bit_pair(P[2]), |
175 | | ); |
176 | 0 | let e2 = DoubleDouble::mul_add( |
177 | 0 | recip, |
178 | 0 | DoubleDouble::from_bit_pair(P[5]), |
179 | 0 | DoubleDouble::from_bit_pair(P[4]), |
180 | | ); |
181 | 0 | let e3 = DoubleDouble::mul_add( |
182 | 0 | recip, |
183 | 0 | DoubleDouble::from_bit_pair(P[7]), |
184 | 0 | DoubleDouble::from_bit_pair(P[6]), |
185 | | ); |
186 | 0 | let e4 = DoubleDouble::mul_add( |
187 | 0 | recip, |
188 | 0 | DoubleDouble::from_bit_pair(P[9]), |
189 | 0 | DoubleDouble::from_bit_pair(P[8]), |
190 | | ); |
191 | 0 | let e5 = DoubleDouble::mul_add( |
192 | 0 | recip, |
193 | 0 | DoubleDouble::from_bit_pair(P[11]), |
194 | 0 | DoubleDouble::from_bit_pair(P[10]), |
195 | | ); |
196 | | |
197 | 0 | let f0 = DoubleDouble::mul_add(x2, e1, e0); |
198 | 0 | let f1 = DoubleDouble::mul_add(x2, e3, e2); |
199 | 0 | let f2 = DoubleDouble::mul_add(x2, e5, e4); |
200 | | |
201 | 0 | let g0 = DoubleDouble::mul_add(x4, f1, f0); |
202 | | |
203 | 0 | let p_num = DoubleDouble::mul_add(x8, f2, g0); |
204 | | |
205 | | const Q: [(u64, u64); 12] = [ |
206 | | (0x0000000000000000, 0x3ff0000000000000), |
207 | | (0x3cde08e4cbf324d1, 0xc030b67bd69af0ca), |
208 | | (0x3cec5e4ee7e77024, 0x4071b54c0f58409c), |
209 | | (0xbd340e1131739e2f, 0xc09f140a738b14b3), |
210 | | (0x3d607673189d6644, 0x40cdb44bd822add2), |
211 | | (0xbd7793a4f1dd74d1, 0xc0e03fe2689b802d), |
212 | | (0xbd8415501228a87e, 0x410009beafea72cc), |
213 | | (0x3dcecdac2702661f, 0x4128c2073da9a447), |
214 | | (0xbdd8386404f3dec5, 0xc1389ec7d7186bf4), |
215 | | (0xbe06eb53a3e86436, 0x4168b7a2dc85ed0b), |
216 | | (0x3e098e2cefaf8299, 0xc1604f8cf34af02c), |
217 | | (0x3e1a5e496b547001, 0x41776b1e0153d1e9), |
218 | | ]; |
219 | | |
220 | 0 | let e0 = DoubleDouble::mul_add_f64( |
221 | 0 | recip, |
222 | 0 | DoubleDouble::from_bit_pair(Q[1]), |
223 | 0 | f64::from_bits(0x3ff0000000000000), |
224 | | ); |
225 | 0 | let e1 = DoubleDouble::mul_add( |
226 | 0 | recip, |
227 | 0 | DoubleDouble::from_bit_pair(Q[3]), |
228 | 0 | DoubleDouble::from_bit_pair(Q[2]), |
229 | | ); |
230 | 0 | let e2 = DoubleDouble::mul_add( |
231 | 0 | recip, |
232 | 0 | DoubleDouble::from_bit_pair(Q[5]), |
233 | 0 | DoubleDouble::from_bit_pair(Q[4]), |
234 | | ); |
235 | 0 | let e3 = DoubleDouble::mul_add( |
236 | 0 | recip, |
237 | 0 | DoubleDouble::from_bit_pair(Q[7]), |
238 | 0 | DoubleDouble::from_bit_pair(Q[6]), |
239 | | ); |
240 | 0 | let e4 = DoubleDouble::mul_add( |
241 | 0 | recip, |
242 | 0 | DoubleDouble::from_bit_pair(Q[9]), |
243 | 0 | DoubleDouble::from_bit_pair(Q[8]), |
244 | | ); |
245 | 0 | let e5 = DoubleDouble::mul_add( |
246 | 0 | recip, |
247 | 0 | DoubleDouble::from_bit_pair(Q[11]), |
248 | 0 | DoubleDouble::from_bit_pair(Q[10]), |
249 | | ); |
250 | | |
251 | 0 | let f0 = DoubleDouble::mul_add(x2, e1, e0); |
252 | 0 | let f1 = DoubleDouble::mul_add(x2, e3, e2); |
253 | 0 | let f2 = DoubleDouble::mul_add(x2, e5, e4); |
254 | | |
255 | 0 | let g0 = DoubleDouble::mul_add(x4, f1, f0); |
256 | | |
257 | 0 | let p_den = DoubleDouble::mul_add(x8, f2, g0); |
258 | | |
259 | 0 | let z = DoubleDouble::div(p_num, p_den); |
260 | | |
261 | 0 | let r_sqrt = DoubleDouble::from_rsqrt_fast(dx); |
262 | 0 | let r = z * r_sqrt; |
263 | | |
264 | 0 | let err = f_fmla( |
265 | 0 | r.hi, |
266 | 0 | f64::from_bits(0x3bc0000000000000), |
267 | 0 | f64::from_bits(0x392bdb8cdadbe111), |
268 | | ); |
269 | 0 | let up = r.hi + (r.lo + err); |
270 | 0 | let lb = r.hi + (r.lo - err); |
271 | 0 | if up != lb { |
272 | 0 | return i0e_7p5_to_9p5_hard(x); |
273 | 0 | } |
274 | 0 | r.to_f64() |
275 | 0 | } |
276 | | |
277 | | /** |
278 | | Mid-interval [7.5;9.5] generated by Wolfram Mathematica: |
279 | | I0(x)=R(1/x)/sqrt(x)*Exp(x) |
280 | | Polynomial generated by Wolfram Mathematica: |
281 | | ```text |
282 | | <<FunctionApproximations` |
283 | | ClearAll["Global`*"] |
284 | | f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x] |
285 | | g[z_]:=f[1/z] |
286 | | {err,approx}=MiniMaxApproximation[g[z],{z,{1/9.5,1/7.5},13,13},WorkingPrecision->120] |
287 | | poly=Numerator[approx][[1]]; |
288 | | coeffs=CoefficientList[poly,z]; |
289 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
290 | | poly=Denominator[approx][[1]]; |
291 | | coeffs=CoefficientList[poly,z]; |
292 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
293 | | ``` |
294 | | **/ |
295 | | #[cold] |
296 | | #[inline(never)] |
297 | 0 | fn i0e_7p5_to_9p5_hard(x: f64) -> f64 { |
298 | | static P: [DyadicFloat128; 14] = [ |
299 | | DyadicFloat128 { |
300 | | sign: DyadicSign::Pos, |
301 | | exponent: -129, |
302 | | mantissa: 0xcc422a04_45cde144_75a3800b_45c38460_u128, |
303 | | }, |
304 | | DyadicFloat128 { |
305 | | sign: DyadicSign::Neg, |
306 | | exponent: -124, |
307 | | mantissa: 0xada66144_fcccc1a3_036f76b2_cabd6281_u128, |
308 | | }, |
309 | | DyadicFloat128 { |
310 | | sign: DyadicSign::Pos, |
311 | | exponent: -120, |
312 | | mantissa: 0xeabdda02_fa201d98_10e58d1f_7eb62bd7_u128, |
313 | | }, |
314 | | DyadicFloat128 { |
315 | | sign: DyadicSign::Neg, |
316 | | exponent: -116, |
317 | | mantissa: 0xbbfd3297_6f88a7df_5924587b_d5bdcdb8_u128, |
318 | | }, |
319 | | DyadicFloat128 { |
320 | | sign: DyadicSign::Pos, |
321 | | exponent: -113, |
322 | | mantissa: 0xfca29453_efe393bf_1651627b_7d543875_u128, |
323 | | }, |
324 | | DyadicFloat128 { |
325 | | sign: DyadicSign::Neg, |
326 | | exponent: -110, |
327 | | mantissa: 0xee7c7220_bbbd248e_bb6adac6_f9a5ce95_u128, |
328 | | }, |
329 | | DyadicFloat128 { |
330 | | sign: DyadicSign::Pos, |
331 | | exponent: -107, |
332 | | mantissa: 0xc07455dd_830ba705_414408c6_06732a5a_u128, |
333 | | }, |
334 | | DyadicFloat128 { |
335 | | sign: DyadicSign::Neg, |
336 | | exponent: -105, |
337 | | mantissa: 0xe2247793_b50cd0f0_80e8981d_933f75da_u128, |
338 | | }, |
339 | | DyadicFloat128 { |
340 | | sign: DyadicSign::Pos, |
341 | | exponent: -103, |
342 | | mantissa: 0xe14a9831_82582a0b_dd27e8b6_4ed9aac2_u128, |
343 | | }, |
344 | | DyadicFloat128 { |
345 | | sign: DyadicSign::Neg, |
346 | | exponent: -101, |
347 | | mantissa: 0xa3b2ae2f_5b64f37e_c1538435_34f02faf_u128, |
348 | | }, |
349 | | DyadicFloat128 { |
350 | | sign: DyadicSign::Pos, |
351 | | exponent: -100, |
352 | | mantissa: 0xbab73503_5b7e38d9_bbe4a84b_9007c6e8_u128, |
353 | | }, |
354 | | DyadicFloat128 { |
355 | | sign: DyadicSign::Neg, |
356 | | exponent: -99, |
357 | | mantissa: 0xa68911fc_5d87bbe7_0d4fe854_5c681ac5_u128, |
358 | | }, |
359 | | DyadicFloat128 { |
360 | | sign: DyadicSign::Pos, |
361 | | exponent: -99, |
362 | | mantissa: 0x9e997222_55ef4045_fa9f311d_57d082a2_u128, |
363 | | }, |
364 | | DyadicFloat128 { |
365 | | sign: DyadicSign::Neg, |
366 | | exponent: -99, |
367 | | mantissa: 0xbe93656a_b0a4f32d_3ebbfdeb_b1cbb839_u128, |
368 | | }, |
369 | | ]; |
370 | | static Q: [DyadicFloat128; 14] = [ |
371 | | DyadicFloat128 { |
372 | | sign: DyadicSign::Pos, |
373 | | exponent: -127, |
374 | | mantissa: 0x80000000_00000000_00000000_00000000_u128, |
375 | | }, |
376 | | DyadicFloat128 { |
377 | | sign: DyadicSign::Neg, |
378 | | exponent: -123, |
379 | | mantissa: 0xdaa34a7e_861dddff_a0642080_cd83dd65_u128, |
380 | | }, |
381 | | DyadicFloat128 { |
382 | | sign: DyadicSign::Pos, |
383 | | exponent: -118, |
384 | | mantissa: 0x93f05740_f4758772_bb9992f9_91e72795_u128, |
385 | | }, |
386 | | DyadicFloat128 { |
387 | | sign: DyadicSign::Neg, |
388 | | exponent: -115, |
389 | | mantissa: 0xeddcb810_054c9aab_fa7e4214_d59d18b0_u128, |
390 | | }, |
391 | | DyadicFloat128 { |
392 | | sign: DyadicSign::Pos, |
393 | | exponent: -111, |
394 | | mantissa: 0xa0180fcd_831ff6c0_ac2b8f02_37f3cfd1_u128, |
395 | | }, |
396 | | DyadicFloat128 { |
397 | | sign: DyadicSign::Neg, |
398 | | exponent: -108, |
399 | | mantissa: 0x97d25106_3b66907e_90b4f786_26daa0bb_u128, |
400 | | }, |
401 | | DyadicFloat128 { |
402 | | sign: DyadicSign::Pos, |
403 | | exponent: -106, |
404 | | mantissa: 0xf595ce38_aac16c11_001b874a_99603b45_u128, |
405 | | }, |
406 | | DyadicFloat128 { |
407 | | sign: DyadicSign::Neg, |
408 | | exponent: -103, |
409 | | mantissa: 0x912b3715_4aca68f6_5821c2ed_43d77111_u128, |
410 | | }, |
411 | | DyadicFloat128 { |
412 | | sign: DyadicSign::Pos, |
413 | | exponent: -101, |
414 | | mantissa: 0x90f97141_b896e2b6_38b87354_8945a43c_u128, |
415 | | }, |
416 | | DyadicFloat128 { |
417 | | sign: DyadicSign::Neg, |
418 | | exponent: -100, |
419 | | mantissa: 0xd3e5f967_89097d6b_3a3060fe_852ff580_u128, |
420 | | }, |
421 | | DyadicFloat128 { |
422 | | sign: DyadicSign::Pos, |
423 | | exponent: -99, |
424 | | mantissa: 0xf0d6de35_939da009_9ced21fd_48af7281_u128, |
425 | | }, |
426 | | DyadicFloat128 { |
427 | | sign: DyadicSign::Neg, |
428 | | exponent: -98, |
429 | | mantissa: 0xd2a0b183_6ac613b2_6745ce1d_8ed1c323_u128, |
430 | | }, |
431 | | DyadicFloat128 { |
432 | | sign: DyadicSign::Pos, |
433 | | exponent: -98, |
434 | | mantissa: 0xbb9c089a_b7e939a2_732b3fb5_2e66cd77_u128, |
435 | | }, |
436 | | DyadicFloat128 { |
437 | | sign: DyadicSign::Neg, |
438 | | exponent: -98, |
439 | | mantissa: 0xcb2107c3_736bef81_609718c0_ba82cd8e_u128, |
440 | | }, |
441 | | ]; |
442 | | |
443 | 0 | let recip = DyadicFloat128::accurate_reciprocal(x); |
444 | | |
445 | 0 | let mut p_num = P[13]; |
446 | 0 | for i in (0..13).rev() { |
447 | 0 | p_num = recip * p_num + P[i]; |
448 | 0 | } |
449 | 0 | let mut p_den = Q[13]; |
450 | 0 | for i in (0..13).rev() { |
451 | 0 | p_den = recip * p_den + Q[i]; |
452 | 0 | } |
453 | 0 | let z = p_num * p_den.reciprocal(); |
454 | | |
455 | 0 | let r_sqrt = bessel_rsqrt_hard(x, recip); |
456 | 0 | (z * r_sqrt).fast_as_f64() |
457 | 0 | } |
458 | | |
459 | | /** |
460 | | I0(x)=R(1/x)*Exp(x)/sqrt(x) |
461 | | Generated in Wolfram: |
462 | | ```text |
463 | | <<FunctionApproximations` |
464 | | ClearAll["Global`*"] |
465 | | f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x] |
466 | | g[z_]:=f[1/z] |
467 | | {err,approx, err1}=MiniMaxApproximation[g[z],{z,{1/709.3,1/9.5},11,11},WorkingPrecision->120] |
468 | | poly=Numerator[approx]; |
469 | | coeffs=CoefficientList[poly,z]; |
470 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
471 | | poly=Denominator[approx]; |
472 | | coeffs=CoefficientList[poly,z]; |
473 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]] |
474 | | ``` |
475 | | **/ |
476 | | #[inline] |
477 | 0 | fn i0e_asympt(x: f64) -> f64 { |
478 | 0 | let dx = x; |
479 | 0 | let recip = DoubleDouble::from_quick_recip(x); |
480 | | const P: [(u64, u64); 12] = [ |
481 | | (0xbc7ca19c5d824c54, 0x3fd9884533d43651), |
482 | | (0x3cca32eb734e010e, 0xc03b7ca35caf42eb), |
483 | | (0x3d03af8238d6f25e, 0x408b92cfcaa7070f), |
484 | | (0xbd7a8ff7fdebed70, 0xc0d0a3be432cce93), |
485 | | (0xbdababdb579bb076, 0x410a77dc51f1804d), |
486 | | (0x3dc5e4e3c972832a, 0xc13cb0be2f74839c), |
487 | | (0x3e01076f9b102e38, 0x41653b064cc61661), |
488 | | (0xbe2157e700d445f4, 0xc184e1b076927460), |
489 | | (0xbdfa4577156dde56, 0x41999e9653f9dc5f), |
490 | | (0xbe47aa238a739ffe, 0xc1a130f6ded40c00), |
491 | | (0xbe331b560b6fbf4a, 0x419317f11e674cae), |
492 | | (0xbe0765596077d1e3, 0xc16024404db59d3f), |
493 | | ]; |
494 | | |
495 | 0 | let x2 = DoubleDouble::quick_mult(recip, recip); |
496 | 0 | let x4 = DoubleDouble::quick_mult(x2, x2); |
497 | 0 | let x8 = DoubleDouble::quick_mult(x4, x4); |
498 | | |
499 | 0 | let e0 = DoubleDouble::mul_add( |
500 | 0 | recip, |
501 | 0 | DoubleDouble::from_bit_pair(P[1]), |
502 | 0 | DoubleDouble::from_bit_pair(P[0]), |
503 | | ); |
504 | 0 | let e1 = DoubleDouble::mul_add( |
505 | 0 | recip, |
506 | 0 | DoubleDouble::from_bit_pair(P[3]), |
507 | 0 | DoubleDouble::from_bit_pair(P[2]), |
508 | | ); |
509 | 0 | let e2 = DoubleDouble::mul_add( |
510 | 0 | recip, |
511 | 0 | DoubleDouble::from_bit_pair(P[5]), |
512 | 0 | DoubleDouble::from_bit_pair(P[4]), |
513 | | ); |
514 | 0 | let e3 = DoubleDouble::mul_add( |
515 | 0 | recip, |
516 | 0 | DoubleDouble::from_bit_pair(P[7]), |
517 | 0 | DoubleDouble::from_bit_pair(P[6]), |
518 | | ); |
519 | 0 | let e4 = DoubleDouble::mul_add( |
520 | 0 | recip, |
521 | 0 | DoubleDouble::from_bit_pair(P[9]), |
522 | 0 | DoubleDouble::from_bit_pair(P[8]), |
523 | | ); |
524 | 0 | let e5 = DoubleDouble::mul_add( |
525 | 0 | recip, |
526 | 0 | DoubleDouble::from_bit_pair(P[11]), |
527 | 0 | DoubleDouble::from_bit_pair(P[10]), |
528 | | ); |
529 | | |
530 | 0 | let f0 = DoubleDouble::mul_add(x2, e1, e0); |
531 | 0 | let f1 = DoubleDouble::mul_add(x2, e3, e2); |
532 | 0 | let f2 = DoubleDouble::mul_add(x2, e5, e4); |
533 | | |
534 | 0 | let g0 = DoubleDouble::mul_add(x4, f1, f0); |
535 | | |
536 | 0 | let p_num = DoubleDouble::mul_add(x8, f2, g0); |
537 | | |
538 | | const Q: [(u64, u64); 12] = [ |
539 | | (0x0000000000000000, 0x3ff0000000000000), |
540 | | (0xbcf687703e843d07, 0xc051418f1c4dd0b9), |
541 | | (0x3d468ab92cb87a0b, 0x40a15891d823e48d), |
542 | | (0x3d8bfc17e5183376, 0xc0e4fce40dd82796), |
543 | | (0xbdccbbcc2ecf8d4c, 0x4120beef869c61ec), |
544 | | (0xbdf42170b4d5d150, 0xc1523ad18834a7ed), |
545 | | (0xbe0eaa32f405afd4, 0x417b24ec57a8f480), |
546 | | (0x3e3ec900705e7252, 0xc19af2a91d23d62e), |
547 | | (0x3e3e220e274fa46b, 0x41b0cb905cc99ff5), |
548 | | (0xbe46c6c61dee11f6, 0xc1b7452e50518520), |
549 | | (0x3e3ed0fc063187bf, 0x41ac1772d1749896), |
550 | | (0xbe11c578f04f4be1, 0xc180feb5b2ca47cb), |
551 | | ]; |
552 | | |
553 | 0 | let e0 = DoubleDouble::mul_add_f64( |
554 | 0 | recip, |
555 | 0 | DoubleDouble::from_bit_pair(Q[1]), |
556 | 0 | f64::from_bits(0x3ff0000000000000), |
557 | | ); |
558 | 0 | let e1 = DoubleDouble::mul_add( |
559 | 0 | recip, |
560 | 0 | DoubleDouble::from_bit_pair(Q[3]), |
561 | 0 | DoubleDouble::from_bit_pair(Q[2]), |
562 | | ); |
563 | 0 | let e2 = DoubleDouble::mul_add( |
564 | 0 | recip, |
565 | 0 | DoubleDouble::from_bit_pair(Q[5]), |
566 | 0 | DoubleDouble::from_bit_pair(Q[4]), |
567 | | ); |
568 | 0 | let e3 = DoubleDouble::mul_add( |
569 | 0 | recip, |
570 | 0 | DoubleDouble::from_bit_pair(Q[7]), |
571 | 0 | DoubleDouble::from_bit_pair(Q[6]), |
572 | | ); |
573 | 0 | let e4 = DoubleDouble::mul_add( |
574 | 0 | recip, |
575 | 0 | DoubleDouble::from_bit_pair(Q[9]), |
576 | 0 | DoubleDouble::from_bit_pair(Q[8]), |
577 | | ); |
578 | 0 | let e5 = DoubleDouble::mul_add( |
579 | 0 | recip, |
580 | 0 | DoubleDouble::from_bit_pair(Q[11]), |
581 | 0 | DoubleDouble::from_bit_pair(Q[10]), |
582 | | ); |
583 | | |
584 | 0 | let f0 = DoubleDouble::mul_add(x2, e1, e0); |
585 | 0 | let f1 = DoubleDouble::mul_add(x2, e3, e2); |
586 | 0 | let f2 = DoubleDouble::mul_add(x2, e5, e4); |
587 | | |
588 | 0 | let g0 = DoubleDouble::mul_add(x4, f1, f0); |
589 | | |
590 | 0 | let p_den = DoubleDouble::mul_add(x8, f2, g0); |
591 | | |
592 | 0 | let z = DoubleDouble::div(p_num, p_den); |
593 | | |
594 | 0 | let r_sqrt = DoubleDouble::from_rsqrt_fast(dx); |
595 | 0 | let r = z * r_sqrt; |
596 | 0 | let err = f_fmla( |
597 | 0 | r.hi, |
598 | 0 | f64::from_bits(0x3c40000000000000), // 2^-59 |
599 | 0 | f64::from_bits(0x3be0000000000000), // 2^-65 |
600 | | ); |
601 | 0 | let up = r.hi + (r.lo + err); |
602 | 0 | let lb = r.hi + (r.lo - err); |
603 | 0 | if up != lb { |
604 | 0 | return i0e_asympt_hard(x); |
605 | 0 | } |
606 | 0 | lb |
607 | 0 | } |
608 | | |
609 | | /** |
610 | | I0(x)=R(1/x)*Exp(x)/sqrt(x) |
611 | | Generated in Wolfram: |
612 | | ```text |
613 | | <<FunctionApproximations` |
614 | | ClearAll["Global`*"] |
615 | | f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x] |
616 | | g[z_]:=f[1/z] |
617 | | {err,approx, err1}=MiniMaxApproximation[g[z],{z,{1/709.3,1/9.5},15,15},WorkingPrecision->120] |
618 | | poly=Numerator[approx]; |
619 | | coeffs=CoefficientList[poly,z]; |
620 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
621 | | poly=Denominator[approx]; |
622 | | coeffs=CoefficientList[poly,z]; |
623 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
624 | | ``` |
625 | | **/ |
626 | | #[cold] |
627 | | #[inline(never)] |
628 | 0 | fn i0e_asympt_hard(x: f64) -> f64 { |
629 | | static P: [DyadicFloat128; 16] = [ |
630 | | DyadicFloat128 { |
631 | | sign: DyadicSign::Pos, |
632 | | exponent: -129, |
633 | | mantissa: 0xcc42299e_a1b28468_7e5aad4a_70b749c4_u128, |
634 | | }, |
635 | | DyadicFloat128 { |
636 | | sign: DyadicSign::Neg, |
637 | | exponent: -122, |
638 | | mantissa: 0xabb4209d_ca11bdaa_186bef7f_390e6b77_u128, |
639 | | }, |
640 | | DyadicFloat128 { |
641 | | sign: DyadicSign::Pos, |
642 | | exponent: -116, |
643 | | mantissa: 0x8a2725e2_4749db25_625ad1f2_12a2a16c_u128, |
644 | | }, |
645 | | DyadicFloat128 { |
646 | | sign: DyadicSign::Neg, |
647 | | exponent: -111, |
648 | | mantissa: 0x8b4c2cd4_9e5d0c8b_c9be4d3e_781bb676_u128, |
649 | | }, |
650 | | DyadicFloat128 { |
651 | | sign: DyadicSign::Pos, |
652 | | exponent: -107, |
653 | | mantissa: 0xc33fad7c_40599f7d_713e3081_6b5ad791_u128, |
654 | | }, |
655 | | DyadicFloat128 { |
656 | | sign: DyadicSign::Neg, |
657 | | exponent: -103, |
658 | | mantissa: 0xc81da271_b623ba88_0be032b5_827d92fa_u128, |
659 | | }, |
660 | | DyadicFloat128 { |
661 | | sign: DyadicSign::Pos, |
662 | | exponent: -99, |
663 | | mantissa: 0x99ec4975_b6aa7cae_7692a287_ed8ae47c_u128, |
664 | | }, |
665 | | DyadicFloat128 { |
666 | | sign: DyadicSign::Neg, |
667 | | exponent: -96, |
668 | | mantissa: 0xb3aa4745_fc2dd441_2dbd3e3c_d4539687_u128, |
669 | | }, |
670 | | DyadicFloat128 { |
671 | | sign: DyadicSign::Pos, |
672 | | exponent: -93, |
673 | | mantissa: 0x9f14edc2_6882afca_29d2a067_dc459729_u128, |
674 | | }, |
675 | | DyadicFloat128 { |
676 | | sign: DyadicSign::Neg, |
677 | | exponent: -91, |
678 | | mantissa: 0xd35c4d01_78d8cec6_fc8ae0ee_834da837_u128, |
679 | | }, |
680 | | DyadicFloat128 { |
681 | | sign: DyadicSign::Pos, |
682 | | exponent: -89, |
683 | | mantissa: 0xcdc529c7_6e082342_faad3073_07a9b61f_u128, |
684 | | }, |
685 | | DyadicFloat128 { |
686 | | sign: DyadicSign::Neg, |
687 | | exponent: -87, |
688 | | mantissa: 0x8ccac88f_2598c8a6_423b1f42_63591cb9_u128, |
689 | | }, |
690 | | DyadicFloat128 { |
691 | | sign: DyadicSign::Pos, |
692 | | exponent: -87, |
693 | | mantissa: 0xfc044cfb_a20f0885_93d58660_17819ed5_u128, |
694 | | }, |
695 | | DyadicFloat128 { |
696 | | sign: DyadicSign::Neg, |
697 | | exponent: -86, |
698 | | mantissa: 0x813a700c_74d23f52_f81b179d_7ff0da9f_u128, |
699 | | }, |
700 | | DyadicFloat128 { |
701 | | sign: DyadicSign::Pos, |
702 | | exponent: -88, |
703 | | mantissa: 0xe6c43da4_297216bf_bdd987cb_636906cf_u128, |
704 | | }, |
705 | | DyadicFloat128 { |
706 | | sign: DyadicSign::Neg, |
707 | | exponent: -91, |
708 | | mantissa: 0xa4998323_649c3cf2_64477869_3d2c6afd_u128, |
709 | | }, |
710 | | ]; |
711 | | static Q: [DyadicFloat128; 16] = [ |
712 | | DyadicFloat128 { |
713 | | sign: DyadicSign::Pos, |
714 | | exponent: -127, |
715 | | mantissa: 0x80000000_00000000_00000000_00000000_u128, |
716 | | }, |
717 | | DyadicFloat128 { |
718 | | sign: DyadicSign::Neg, |
719 | | exponent: -121, |
720 | | mantissa: 0xd772d5fd_a7077638_6e007274_d83b013c_u128, |
721 | | }, |
722 | | DyadicFloat128 { |
723 | | sign: DyadicSign::Pos, |
724 | | exponent: -115, |
725 | | mantissa: 0xad914ef0_451ced2e_515657ef_fc7eeb53_u128, |
726 | | }, |
727 | | DyadicFloat128 { |
728 | | sign: DyadicSign::Neg, |
729 | | exponent: -110, |
730 | | mantissa: 0xaf41180c_dffe96e5_f192fa40_0b1bff1e_u128, |
731 | | }, |
732 | | DyadicFloat128 { |
733 | | sign: DyadicSign::Pos, |
734 | | exponent: -106, |
735 | | mantissa: 0xf60dc728_241f71fd_5b93e653_ccbedace_u128, |
736 | | }, |
737 | | DyadicFloat128 { |
738 | | sign: DyadicSign::Neg, |
739 | | exponent: -102, |
740 | | mantissa: 0xfcaefef9_39cf96e7_3cb75a98_da5e9761_u128, |
741 | | }, |
742 | | DyadicFloat128 { |
743 | | sign: DyadicSign::Pos, |
744 | | exponent: -98, |
745 | | mantissa: 0xc2d2c837_5789587a_13ef38c6_a24c3413_u128, |
746 | | }, |
747 | | DyadicFloat128 { |
748 | | sign: DyadicSign::Neg, |
749 | | exponent: -95, |
750 | | mantissa: 0xe41725c3_51d14486_a650044e_e8588f7b_u128, |
751 | | }, |
752 | | DyadicFloat128 { |
753 | | sign: DyadicSign::Pos, |
754 | | exponent: -92, |
755 | | mantissa: 0xcabeed9b_5e2e888d_81d32b11_d289a624_u128, |
756 | | }, |
757 | | DyadicFloat128 { |
758 | | sign: DyadicSign::Neg, |
759 | | exponent: -89, |
760 | | mantissa: 0x8764876d_11ad6607_8a8d5382_3ffe82d9_u128, |
761 | | }, |
762 | | DyadicFloat128 { |
763 | | sign: DyadicSign::Pos, |
764 | | exponent: -87, |
765 | | mantissa: 0x84c9f9e5_6a5f5034_ad2c8512_16cb1ba1_u128, |
766 | | }, |
767 | | DyadicFloat128 { |
768 | | sign: DyadicSign::Neg, |
769 | | exponent: -86, |
770 | | mantissa: 0xb7c1d143_a15d8aab_03a7fa3e_b7d07a36_u128, |
771 | | }, |
772 | | DyadicFloat128 { |
773 | | sign: DyadicSign::Pos, |
774 | | exponent: -85, |
775 | | mantissa: 0xa78f8257_4658040f_7a1ad39c_91ea9483_u128, |
776 | | }, |
777 | | DyadicFloat128 { |
778 | | sign: DyadicSign::Neg, |
779 | | exponent: -85, |
780 | | mantissa: 0xb231e0ca_b729a404_44c38f52_be208507_u128, |
781 | | }, |
782 | | DyadicFloat128 { |
783 | | sign: DyadicSign::Pos, |
784 | | exponent: -86, |
785 | | mantissa: 0xae317981_42349081_8bc68b28_f69b8e49_u128, |
786 | | }, |
787 | | DyadicFloat128 { |
788 | | sign: DyadicSign::Neg, |
789 | | exponent: -89, |
790 | | mantissa: 0xb451abd3_5cd79c6d_7e578164_32f16da1_u128, |
791 | | }, |
792 | | ]; |
793 | | |
794 | 0 | let recip = DyadicFloat128::accurate_reciprocal(x); |
795 | | |
796 | 0 | let mut p_num = P[15]; |
797 | 0 | for i in (0..15).rev() { |
798 | 0 | p_num = recip * p_num + P[i]; |
799 | 0 | } |
800 | | |
801 | 0 | let mut p_den = Q[15]; |
802 | 0 | for i in (0..15).rev() { |
803 | 0 | p_den = recip * p_den + Q[i]; |
804 | 0 | } |
805 | | |
806 | 0 | let z = p_num * p_den.reciprocal(); |
807 | | |
808 | 0 | let r_sqrt = bessel_rsqrt_hard(x, recip); |
809 | 0 | (z * r_sqrt).fast_as_f64() |
810 | 0 | } |
811 | | |
812 | | #[cfg(test)] |
813 | | mod tests { |
814 | | use super::*; |
815 | | #[test] |
816 | | fn test_i0e() { |
817 | | assert_eq!(f_i0e(0.00000000000000000000000000052342), 1.0); |
818 | | assert_eq!(f_i0e(f64::EPSILON), 0.9999999999999998); |
819 | | assert_eq!(f_i0e(9.500000000005492,), 0.13125126081422883); |
820 | | assert!(f_i0e(f64::NAN).is_nan()); |
821 | | assert_eq!(f_i0e(f64::INFINITY), 0.); |
822 | | assert_eq!(f_i0e(f64::NEG_INFINITY), 0.); |
823 | | assert_eq!(f_i0e(7.500000000788034), 0.14831583006929877); |
824 | | assert_eq!(f_i0e(715.), 0.014922205745802662); |
825 | | assert_eq!(f_i0e(12.), 0.11642622121344044); |
826 | | assert_eq!(f_i0e(16.), 0.10054412736125203); |
827 | | assert_eq!(f_i0e(18.432), 0.09357372647647); |
828 | | assert_eq!(f_i0e(26.432), 0.07797212360059241); |
829 | | assert_eq!(f_i0e(0.2), 0.8269385516343293); |
830 | | assert_eq!(f_i0e(7.5), 0.14831583007739552); |
831 | | assert_eq!(f_i0e(-1.5), 0.36743360905415834); |
832 | | assert_eq!(i0e_asympt_hard(18.432), 0.09357372647647); |
833 | | } |
834 | | } |