/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/bessel/i0.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::bessel::bessel_exp::i0_exp; |
30 | | use crate::common::f_fmla; |
31 | | use crate::double_double::DoubleDouble; |
32 | | use crate::dyadic_float::{DyadicFloat128, DyadicSign}; |
33 | | use crate::exponents::rational128_exp; |
34 | | use crate::polyeval::{f_estrin_polyeval5, f_polyeval4}; |
35 | | |
36 | | /// Modified Bessel of the first kind of order 0 |
37 | | /// |
38 | | /// Max ULP 0.5 |
39 | 0 | pub fn f_i0(x: f64) -> f64 { |
40 | 0 | let ux = x.to_bits().wrapping_shl(1); |
41 | | |
42 | 0 | if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 { |
43 | | // |x| <= f64::EPSILON, |x| == inf, x == NaN |
44 | 0 | if ux <= 0x760af31dc4611874u64 { |
45 | | // |x| <= 2.2204460492503131e-24 |
46 | 0 | return 1.; |
47 | 0 | } |
48 | 0 | if ux <= 0x7960000000000000u64 { |
49 | | // |x| <= f64::EPSILON |
50 | | // Power series of I0(x) ~ 1 + x^2/4 + O(x^4) |
51 | 0 | let half_x = x * 0.5; |
52 | 0 | return f_fmla(half_x, half_x, 1.); |
53 | 0 | } |
54 | 0 | if x.is_infinite() { |
55 | 0 | return f64::INFINITY; |
56 | 0 | } |
57 | 0 | return x + f64::NAN; // x == NaN |
58 | 0 | } |
59 | | |
60 | 0 | let xb = x.to_bits() & 0x7fff_ffff_ffff_ffff; |
61 | | |
62 | 0 | if xb > 0x40864fe5304e83e4u64 { |
63 | | // |x| > 713.9869085439682 |
64 | 0 | return f64::INFINITY; |
65 | 0 | } |
66 | | |
67 | 0 | if xb <= 0x4023000000000000u64 { |
68 | | // |x| <= 9.5 |
69 | 0 | if xb <= 0x400ccccccccccccdu64 { |
70 | | // |x| <= 3.6 |
71 | 0 | return i0_0_to_3p6_exec(f64::from_bits(xb)); |
72 | 0 | } else if xb <= 0x401e000000000000u64 { |
73 | | // |x| <= 7.5 |
74 | 0 | return i3p6_to_7p5(f64::from_bits(xb)); |
75 | 0 | } |
76 | 0 | return i0_7p5_to_9p5(f64::from_bits(xb)); |
77 | 0 | } |
78 | | |
79 | 0 | i0_asympt(f64::from_bits(xb)) |
80 | 0 | } |
81 | | |
82 | | /** |
83 | | Computes I0 on interval [-7.5; -3.6], [3.6; 7.5] |
84 | | **/ |
85 | | #[inline] |
86 | 0 | fn i3p6_to_7p5(x: f64) -> f64 { |
87 | 0 | let r = i0_3p6_to_7p5_dd(x); |
88 | | |
89 | 0 | let err = f_fmla( |
90 | 0 | r.hi, |
91 | 0 | f64::from_bits(0x3c56a09e667f3bcd), // 2^-57.5 |
92 | 0 | f64::from_bits(0x3c00000000000000), // 2^-63 |
93 | | ); |
94 | 0 | let ub = r.hi + (r.lo + err); |
95 | 0 | let lb = r.hi + (r.lo - err); |
96 | 0 | if ub != lb { |
97 | 0 | return eval_small_hard_3p6_to_7p5(x).to_f64(); |
98 | 0 | } |
99 | 0 | r.to_f64() |
100 | 0 | } |
101 | | |
102 | | /** |
103 | | Computes I0 on interval [-7.5; -3.6], [3.6; 7.5] |
104 | | as rational approximation I0 = 1 + (x/2)^2 * Pn((x/2)^2)/Qm((x/2)^2)) |
105 | | |
106 | | Relative error on interval 2^-(105.71). |
107 | | |
108 | | Generated by Wolfram Mathematica: |
109 | | ```text |
110 | | <<FunctionApproximations` |
111 | | ClearAll["Global`*"] |
112 | | f[x_]:=(BesselI[0,x]-1)/(x/2)^2 |
113 | | g[z_]:=f[2 Sqrt[z]] |
114 | | {err, approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,3.6},7,6},WorkingPrecision->60] |
115 | | poly=Numerator[approx][[1]]; |
116 | | coeffs=CoefficientList[poly,z]; |
117 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
118 | | poly=Denominator[approx][[1]]; |
119 | | coeffs=CoefficientList[poly,z]; |
120 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
121 | | ``` |
122 | | **/ |
123 | | #[inline] |
124 | 0 | pub(crate) fn i0_0_to_3p6_dd(x: f64) -> DoubleDouble { |
125 | 0 | let half_x = x * 0.5; // this is exact |
126 | 0 | let eval_x = DoubleDouble::from_exact_mult(half_x, half_x); |
127 | | const P: [(u64, u64); 8] = [ |
128 | | (0xba93dec1e5396e30, 0x3ff0000000000000), |
129 | | (0xbc5d3d919a2b891a, 0x3fcb128f49a1f59f), |
130 | | (0xbc3c4d80de165459, 0x3f9353508fce278f), |
131 | | (0x3be7e0e75c00d411, 0x3f4b760657892e15), |
132 | | (0xbb9bc959d02076a4, 0x3ef588ff0ba5872e), |
133 | | (0x3b257756675180e4, 0x3e932e320d411521), |
134 | | (0xbaca098436a47ec6, 0x3e2285f524a51de0), |
135 | | (0x3a0e48fa0331db75, 0x3d9ee6518d82a209), |
136 | | ]; |
137 | 0 | let ps_num = f_estrin_polyeval5( |
138 | 0 | eval_x.hi, |
139 | 0 | f64::from_bits(0x3f4b760657892e15), |
140 | 0 | f64::from_bits(0x3ef588ff0ba5872e), |
141 | 0 | f64::from_bits(0x3e932e320d411521), |
142 | 0 | f64::from_bits(0x3e2285f524a51de0), |
143 | 0 | f64::from_bits(0x3d9ee6518d82a209), |
144 | | ); |
145 | 0 | let mut p_num = DoubleDouble::mul_f64_add(eval_x, ps_num, DoubleDouble::from_bit_pair(P[2])); |
146 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[1])); |
147 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[0])); |
148 | | const Q: [(u64, u64); 7] = [ |
149 | | (0x0000000000000000, 0x3ff0000000000000), |
150 | | (0x3c26136ec7050a58, 0xbfa3b5c2d9782985), |
151 | | (0x3bdf9cd5be66297b, 0x3f478d5c030ea692), |
152 | | (0xbb5036196d4b865c, 0xbee1a83b6e8c6fd6), |
153 | | (0xbb1a9dafadc75858, 0x3e71ba443893032b), |
154 | | (0xba7a719ba9ed7e7f, 0xbdf6e673af3e0c66), |
155 | | (0xb9e17740b37a23ec, 0x3d6e2c993fef696f), |
156 | | ]; |
157 | 0 | let ps_den = f_polyeval4( |
158 | 0 | eval_x.hi, |
159 | 0 | f64::from_bits(0xbee1a83b6e8c6fd6), |
160 | 0 | f64::from_bits(0x3e71ba443893032b), |
161 | 0 | f64::from_bits(0xbdf6e673af3e0c66), |
162 | 0 | f64::from_bits(0x3d6e2c993fef696f), |
163 | | ); |
164 | 0 | let mut p_den = DoubleDouble::mul_f64_add(eval_x, ps_den, DoubleDouble::from_bit_pair(Q[2])); |
165 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[1])); |
166 | 0 | p_den = DoubleDouble::mul_add_f64(eval_x, p_den, f64::from_bits(0x3ff0000000000000)); |
167 | 0 | DoubleDouble::mul_add_f64(DoubleDouble::div(p_num, p_den), eval_x, 1.) |
168 | 0 | } |
169 | | |
170 | | // Generated by Wolfram Mathematica: |
171 | | // I0(x) = 1+(x/2)^2 * R((x/2)^2) |
172 | | // <<FunctionApproximations` |
173 | | // ClearAll["Global`*"] |
174 | | // f[x_]:=(BesselI[0,x]-1)/(x/2)^2 |
175 | | // g[z_]:=f[2 Sqrt[z]] |
176 | | // {err, approx}=MiniMaxApproximation[g[z],{z,{3.6,7.51},8,8},WorkingPrecision->60] |
177 | | // poly=Numerator[approx][[1]]; |
178 | | // coeffs=CoefficientList[poly,z]; |
179 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
180 | | // poly=Denominator[approx][[1]]; |
181 | | // coeffs=CoefficientList[poly,z]; |
182 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
183 | | #[inline] |
184 | 0 | pub(crate) fn i0_3p6_to_7p5_dd(x: f64) -> DoubleDouble { |
185 | 0 | let half_x = x * 0.5; // this is exact |
186 | 0 | let eval_x = DoubleDouble::from_exact_mult(half_x, half_x); |
187 | | const P: [(u64, u64); 10] = [ |
188 | | (0xbae8195e94c833a1, 0x3ff0000000000000), |
189 | | (0xbc6f4db3a04cf778, 0x3fcbca374cf8efde), |
190 | | (0x3c31e334a32ed081, 0x3f9493391f88f49c), |
191 | | (0x3bb77456438b622e, 0x3f4f2aff2cd821b7), |
192 | | (0x3b312b847b83fa80, 0x3efb249e459c00fa), |
193 | | (0x3b1d3faf77d3ee5b, 0x3e9cd199c39f6d6c), |
194 | | (0xbaaf6a6a3d483df8, 0x3e331192e34cb81f), |
195 | | (0x3a406e234cb7aede, 0x3dbef6023ba17d1a), |
196 | | (0x39dee1ec666e30b5, 0x3d3c8bab78d825e9), |
197 | | (0x3910b9821c993936, 0x3ca73bf438398234), |
198 | | ]; |
199 | 0 | let ps_num = f_estrin_polyeval5( |
200 | 0 | eval_x.hi, |
201 | 0 | f64::from_bits(0x3e9cd199c39f6d6c), |
202 | 0 | f64::from_bits(0x3e331192e34cb81f), |
203 | 0 | f64::from_bits(0x3dbef6023ba17d1a), |
204 | 0 | f64::from_bits(0x3d3c8bab78d825e9), |
205 | 0 | f64::from_bits(0x3ca73bf438398234), |
206 | | ); |
207 | 0 | let mut p_num = DoubleDouble::mul_f64_add(eval_x, ps_num, DoubleDouble::from_bit_pair(P[4])); |
208 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[3])); |
209 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[2])); |
210 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[1])); |
211 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[0])); |
212 | | const Q: [(u64, u64); 10] = [ |
213 | | (0x0000000000000000, 0x3ff0000000000000), |
214 | | (0x3c16498103ae0f29, 0xbfa0d722cc1c408a), |
215 | | (0x3ba9b44df49b7368, 0x3f41a06d24a9b89a), |
216 | | (0x3b43ef4989b8a3ed, 0xbed8363c48ecd98c), |
217 | | (0xbaf6197838a8a2ef, 0x3e6830647038f0ac), |
218 | | (0x3a96c443c7d52296, 0xbdf257666a799e31), |
219 | | (0x3a118e8a97f0df20, 0x3d753ffeb530f0c8), |
220 | | (0xb99e90b659ab1bb7, 0xbcf243374f2b7d6c), |
221 | | (0x38f647cfef513fc5, 0x3c654740fd120da3), |
222 | | (0x386d691099d0e8fc, 0xbbc9c9c826756a76), |
223 | | ]; |
224 | 0 | let ps_den = f_estrin_polyeval5( |
225 | 0 | eval_x.hi, |
226 | 0 | f64::from_bits(0xbdf257666a799e31), |
227 | 0 | f64::from_bits(0x3d753ffeb530f0c8), |
228 | 0 | f64::from_bits(0xbcf243374f2b7d6c), |
229 | 0 | f64::from_bits(0x3c654740fd120da3), |
230 | 0 | f64::from_bits(0xbbc9c9c826756a76), |
231 | | ); |
232 | 0 | let mut p_den = DoubleDouble::mul_f64_add(eval_x, ps_den, DoubleDouble::from_bit_pair(Q[4])); |
233 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[3])); |
234 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[2])); |
235 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[1])); |
236 | 0 | p_den = DoubleDouble::mul_add_f64(eval_x, p_den, f64::from_bits(0x3ff0000000000000)); |
237 | 0 | DoubleDouble::mul_add_f64(DoubleDouble::div(p_num, p_den), eval_x, 1.) |
238 | 0 | } |
239 | | |
240 | | // Generated by Wolfram Mathematica: |
241 | | // I0(x) = 1+(x/2)^2 * R((x/2)^2) |
242 | | // <<FunctionApproximations` |
243 | | // ClearAll["Global`*"] |
244 | | // f[x_]:=(BesselI[0,x]-1)/(x/2)^2 |
245 | | // g[z_]:=f[2 Sqrt[z]] |
246 | | // {err, approx}=MiniMaxApproximation[g[z],{z,{3.6,7.51},8,8},WorkingPrecision->60] |
247 | | // poly=Numerator[approx][[1]]; |
248 | | // coeffs=CoefficientList[poly,z]; |
249 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
250 | | // poly=Denominator[approx][[1]]; |
251 | | // coeffs=CoefficientList[poly,z]; |
252 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
253 | | #[cold] |
254 | | #[inline(never)] |
255 | 0 | pub(crate) fn eval_small_hard_3p6_to_7p5(x: f64) -> DoubleDouble { |
256 | 0 | let half_x = x * 0.5; // this is exact |
257 | 0 | let eval_x = DoubleDouble::from_exact_mult(half_x, half_x); |
258 | | const P: [(u64, u64); 10] = [ |
259 | | (0xbae8195e94c833a1, 0x3ff0000000000000), |
260 | | (0xbc6f4db3a04cf778, 0x3fcbca374cf8efde), |
261 | | (0x3c31e334a32ed081, 0x3f9493391f88f49c), |
262 | | (0x3bb77456438b622e, 0x3f4f2aff2cd821b7), |
263 | | (0x3b312b847b83fa80, 0x3efb249e459c00fa), |
264 | | (0x3b1d3faf77d3ee5b, 0x3e9cd199c39f6d6c), |
265 | | (0xbaaf6a6a3d483df8, 0x3e331192e34cb81f), |
266 | | (0x3a406e234cb7aede, 0x3dbef6023ba17d1a), |
267 | | (0x39dee1ec666e30b5, 0x3d3c8bab78d825e9), |
268 | | (0x3910b9821c993936, 0x3ca73bf438398234), |
269 | | ]; |
270 | 0 | let mut p_num = DoubleDouble::mul_add( |
271 | 0 | eval_x, |
272 | 0 | DoubleDouble::from_bit_pair(P[9]), |
273 | 0 | DoubleDouble::from_bit_pair(P[8]), |
274 | | ); |
275 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[7])); |
276 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[6])); |
277 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[5])); |
278 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[4])); |
279 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[3])); |
280 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[2])); |
281 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[1])); |
282 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[0])); |
283 | | const Q: [(u64, u64); 10] = [ |
284 | | (0x0000000000000000, 0x3ff0000000000000), |
285 | | (0x3c16498103ae0f29, 0xbfa0d722cc1c408a), |
286 | | (0x3ba9b44df49b7368, 0x3f41a06d24a9b89a), |
287 | | (0x3b43ef4989b8a3ed, 0xbed8363c48ecd98c), |
288 | | (0xbaf6197838a8a2ef, 0x3e6830647038f0ac), |
289 | | (0x3a96c443c7d52296, 0xbdf257666a799e31), |
290 | | (0x3a118e8a97f0df20, 0x3d753ffeb530f0c8), |
291 | | (0xb99e90b659ab1bb7, 0xbcf243374f2b7d6c), |
292 | | (0x38f647cfef513fc5, 0x3c654740fd120da3), |
293 | | (0x386d691099d0e8fc, 0xbbc9c9c826756a76), |
294 | | ]; |
295 | 0 | let mut p_den = DoubleDouble::mul_add( |
296 | 0 | eval_x, |
297 | 0 | DoubleDouble::from_bit_pair(Q[9]), |
298 | 0 | DoubleDouble::from_bit_pair(Q[8]), |
299 | | ); |
300 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[7])); |
301 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[6])); |
302 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[5])); |
303 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[4])); |
304 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[3])); |
305 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[2])); |
306 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[1])); |
307 | 0 | p_den = DoubleDouble::mul_add_f64(eval_x, p_den, f64::from_bits(0x3ff0000000000000)); |
308 | 0 | DoubleDouble::mul_add_f64(DoubleDouble::div(p_num, p_den), eval_x, 1.) |
309 | 0 | } |
310 | | |
311 | | #[inline] |
312 | 0 | pub(crate) fn i0_0_to_3p6_exec(x: f64) -> f64 { |
313 | 0 | let r = i0_0_to_3p6_dd(x); |
314 | 0 | let err = f_fmla( |
315 | 0 | r.hi, |
316 | 0 | f64::from_bits(0x3c40000000000000), // 2^-59 |
317 | 0 | f64::from_bits(0x3be0000000000000), // 2^-66 |
318 | | ); |
319 | 0 | let ub = r.hi + (r.lo + err); |
320 | 0 | let lb = r.hi + (r.lo - err); |
321 | 0 | if ub == lb { |
322 | 0 | return r.to_f64(); |
323 | 0 | } |
324 | 0 | i0_0_to_3p6_hard(x).to_f64() |
325 | 0 | } |
326 | | |
327 | | // Generated by Wolfram Mathematica: |
328 | | // <<FunctionApproximations` |
329 | | // ClearAll["Global`*"] |
330 | | // f[x_]:=(BesselI[0,x]-1)/(x/2)^2 |
331 | | // g[z_]:=f[2 Sqrt[z]] |
332 | | // {err, approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,3.6},7,6},WorkingPrecision->60] |
333 | | // poly=Numerator[approx][[1]]; |
334 | | // coeffs=CoefficientList[poly,z]; |
335 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
336 | | // poly=Denominator[approx][[1]]; |
337 | | // coeffs=CoefficientList[poly,z]; |
338 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
339 | | #[cold] |
340 | | #[inline(never)] |
341 | 0 | pub(crate) fn i0_0_to_3p6_hard(x: f64) -> DoubleDouble { |
342 | 0 | let half_x = x * 0.5; // this is exact |
343 | 0 | let eval_x = DoubleDouble::from_exact_mult(half_x, half_x); |
344 | | const P: [(u64, u64); 8] = [ |
345 | | (0xba93dec1e5396e30, 0x3ff0000000000000), |
346 | | (0xbc5d3d919a2b891a, 0x3fcb128f49a1f59f), |
347 | | (0xbc3c4d80de165459, 0x3f9353508fce278f), |
348 | | (0x3be7e0e75c00d411, 0x3f4b760657892e15), |
349 | | (0xbb9bc959d02076a4, 0x3ef588ff0ba5872e), |
350 | | (0x3b257756675180e4, 0x3e932e320d411521), |
351 | | (0xbaca098436a47ec6, 0x3e2285f524a51de0), |
352 | | (0x3a0e48fa0331db75, 0x3d9ee6518d82a209), |
353 | | ]; |
354 | 0 | let mut p_num = DoubleDouble::mul_add( |
355 | 0 | eval_x, |
356 | 0 | DoubleDouble::from_bit_pair(P[7]), |
357 | 0 | DoubleDouble::from_bit_pair(P[6]), |
358 | | ); |
359 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[5])); |
360 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[4])); |
361 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[3])); |
362 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[2])); |
363 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[1])); |
364 | 0 | p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[0])); |
365 | | const Q: [(u64, u64); 7] = [ |
366 | | (0x0000000000000000, 0x3ff0000000000000), |
367 | | (0x3c26136ec7050a58, 0xbfa3b5c2d9782985), |
368 | | (0x3bdf9cd5be66297b, 0x3f478d5c030ea692), |
369 | | (0xbb5036196d4b865c, 0xbee1a83b6e8c6fd6), |
370 | | (0xbb1a9dafadc75858, 0x3e71ba443893032b), |
371 | | (0xba7a719ba9ed7e7f, 0xbdf6e673af3e0c66), |
372 | | (0xb9e17740b37a23ec, 0x3d6e2c993fef696f), |
373 | | ]; |
374 | 0 | let mut p_den = DoubleDouble::mul_add( |
375 | 0 | eval_x, |
376 | 0 | DoubleDouble::from_bit_pair(Q[6]), |
377 | 0 | DoubleDouble::from_bit_pair(Q[5]), |
378 | | ); |
379 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[4])); |
380 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[3])); |
381 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[2])); |
382 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[1])); |
383 | 0 | p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[0])); |
384 | 0 | DoubleDouble::mul_add_f64(DoubleDouble::div(p_num, p_den), eval_x, 1.) |
385 | 0 | } |
386 | | |
387 | | /** |
388 | | Mid-interval [7.5;9.5] generated by Wolfram: |
389 | | I0(x)=R(1/x)/sqrt(x)*Exp(x) |
390 | | ```text |
391 | | <<FunctionApproximations` |
392 | | ClearAll["Global`*"] |
393 | | f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x] |
394 | | g[z_]:=f[1/z] |
395 | | {err, approx}=MiniMaxApproximation[g[z],{z,{1/9.5,1/7.5},11,11},WorkingPrecision->120] |
396 | | num=Numerator[approx][[1]]; |
397 | | den=Denominator[approx][[1]]; |
398 | | poly=den; |
399 | | coeffs=CoefficientList[poly,z]; |
400 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
401 | | ``` |
402 | | **/ |
403 | | #[inline] |
404 | 0 | fn i0_7p5_to_9p5(x: f64) -> f64 { |
405 | 0 | let dx = x; |
406 | 0 | let recip = DoubleDouble::from_quick_recip(x); |
407 | | |
408 | | const P: [(u64, u64); 12] = [ |
409 | | (0x3c778e3de1f76f48, 0x3fd988450531281b), |
410 | | (0xbcb572f6149f389e, 0xc01a786676fb4d3a), |
411 | | (0x3cf2f373365347ed, 0x405c0e8405fdb642), |
412 | | (0x3d276a94c8f1e627, 0xc0885e4718dfb761), |
413 | | (0x3d569f8a993434e2, 0x40b756d52d5fa90c), |
414 | | (0xbd6f953f7dd1a223, 0xc0c8818365c47790), |
415 | | (0xbd74247967fbf7b2, 0x40e8cf89daf87353), |
416 | | (0x3db449add7abb056, 0x41145d3c2d96d159), |
417 | | (0xbdc5cc822b71f891, 0xc123694c58fd039b), |
418 | | (0x3da2047ac1a6fba8, 0x415462e630bf3e7e), |
419 | | (0xbdc2f2c06eda6a95, 0xc14c6984ebdd6792), |
420 | | (0xbdf51fa85dafeca5, 0x4166a437c202d27b), |
421 | | ]; |
422 | | |
423 | 0 | let x2 = DoubleDouble::quick_mult(recip, recip); |
424 | 0 | let x4 = DoubleDouble::quick_mult(x2, x2); |
425 | 0 | let x8 = DoubleDouble::quick_mult(x4, x4); |
426 | | |
427 | 0 | let e0 = DoubleDouble::mul_add( |
428 | 0 | recip, |
429 | 0 | DoubleDouble::from_bit_pair(P[1]), |
430 | 0 | DoubleDouble::from_bit_pair(P[0]), |
431 | | ); |
432 | 0 | let e1 = DoubleDouble::mul_add( |
433 | 0 | recip, |
434 | 0 | DoubleDouble::from_bit_pair(P[3]), |
435 | 0 | DoubleDouble::from_bit_pair(P[2]), |
436 | | ); |
437 | 0 | let e2 = DoubleDouble::mul_add( |
438 | 0 | recip, |
439 | 0 | DoubleDouble::from_bit_pair(P[5]), |
440 | 0 | DoubleDouble::from_bit_pair(P[4]), |
441 | | ); |
442 | 0 | let e3 = DoubleDouble::mul_add( |
443 | 0 | recip, |
444 | 0 | DoubleDouble::from_bit_pair(P[7]), |
445 | 0 | DoubleDouble::from_bit_pair(P[6]), |
446 | | ); |
447 | 0 | let e4 = DoubleDouble::mul_add( |
448 | 0 | recip, |
449 | 0 | DoubleDouble::from_bit_pair(P[9]), |
450 | 0 | DoubleDouble::from_bit_pair(P[8]), |
451 | | ); |
452 | 0 | let e5 = DoubleDouble::mul_add( |
453 | 0 | recip, |
454 | 0 | DoubleDouble::from_bit_pair(P[11]), |
455 | 0 | DoubleDouble::from_bit_pair(P[10]), |
456 | | ); |
457 | | |
458 | 0 | let f0 = DoubleDouble::mul_add(x2, e1, e0); |
459 | 0 | let f1 = DoubleDouble::mul_add(x2, e3, e2); |
460 | 0 | let f2 = DoubleDouble::mul_add(x2, e5, e4); |
461 | | |
462 | 0 | let g0 = DoubleDouble::mul_add(x4, f1, f0); |
463 | | |
464 | 0 | let p_num = DoubleDouble::mul_add(x8, f2, g0); |
465 | | |
466 | | const Q: [(u64, u64); 12] = [ |
467 | | (0x0000000000000000, 0x3ff0000000000000), |
468 | | (0x3cde08e4cbf324d1, 0xc030b67bd69af0ca), |
469 | | (0x3cec5e4ee7e77024, 0x4071b54c0f58409c), |
470 | | (0xbd340e1131739e2f, 0xc09f140a738b14b3), |
471 | | (0x3d607673189d6644, 0x40cdb44bd822add2), |
472 | | (0xbd7793a4f1dd74d1, 0xc0e03fe2689b802d), |
473 | | (0xbd8415501228a87e, 0x410009beafea72cc), |
474 | | (0x3dcecdac2702661f, 0x4128c2073da9a447), |
475 | | (0xbdd8386404f3dec5, 0xc1389ec7d7186bf4), |
476 | | (0xbe06eb53a3e86436, 0x4168b7a2dc85ed0b), |
477 | | (0x3e098e2cefaf8299, 0xc1604f8cf34af02c), |
478 | | (0x3e1a5e496b547001, 0x41776b1e0153d1e9), |
479 | | ]; |
480 | | |
481 | 0 | let e0 = DoubleDouble::mul_add_f64( |
482 | 0 | recip, |
483 | 0 | DoubleDouble::from_bit_pair(Q[1]), |
484 | 0 | f64::from_bits(0x3ff0000000000000), |
485 | | ); |
486 | 0 | let e1 = DoubleDouble::mul_add( |
487 | 0 | recip, |
488 | 0 | DoubleDouble::from_bit_pair(Q[3]), |
489 | 0 | DoubleDouble::from_bit_pair(Q[2]), |
490 | | ); |
491 | 0 | let e2 = DoubleDouble::mul_add( |
492 | 0 | recip, |
493 | 0 | DoubleDouble::from_bit_pair(Q[5]), |
494 | 0 | DoubleDouble::from_bit_pair(Q[4]), |
495 | | ); |
496 | 0 | let e3 = DoubleDouble::mul_add( |
497 | 0 | recip, |
498 | 0 | DoubleDouble::from_bit_pair(Q[7]), |
499 | 0 | DoubleDouble::from_bit_pair(Q[6]), |
500 | | ); |
501 | 0 | let e4 = DoubleDouble::mul_add( |
502 | 0 | recip, |
503 | 0 | DoubleDouble::from_bit_pair(Q[9]), |
504 | 0 | DoubleDouble::from_bit_pair(Q[8]), |
505 | | ); |
506 | 0 | let e5 = DoubleDouble::mul_add( |
507 | 0 | recip, |
508 | 0 | DoubleDouble::from_bit_pair(Q[11]), |
509 | 0 | DoubleDouble::from_bit_pair(Q[10]), |
510 | | ); |
511 | | |
512 | 0 | let f0 = DoubleDouble::mul_add(x2, e1, e0); |
513 | 0 | let f1 = DoubleDouble::mul_add(x2, e3, e2); |
514 | 0 | let f2 = DoubleDouble::mul_add(x2, e5, e4); |
515 | | |
516 | 0 | let g0 = DoubleDouble::mul_add(x4, f1, f0); |
517 | | |
518 | 0 | let p_den = DoubleDouble::mul_add(x8, f2, g0); |
519 | | |
520 | 0 | let z = DoubleDouble::div(p_num, p_den); |
521 | | |
522 | 0 | let e = i0_exp(dx); |
523 | 0 | let r_sqrt = DoubleDouble::from_rsqrt_fast(dx); |
524 | 0 | let r = DoubleDouble::quick_mult(z * r_sqrt, e); |
525 | | |
526 | 0 | let err = f_fmla( |
527 | 0 | r.hi, |
528 | 0 | f64::from_bits(0x3bc0000000000000), |
529 | 0 | f64::from_bits(0x392bdb8cdadbe111), |
530 | | ); |
531 | 0 | let up = r.hi + (r.lo + err); |
532 | 0 | let lb = r.hi + (r.lo - err); |
533 | 0 | if up != lb { |
534 | 0 | return i0_7p5_to_9p5_hard(x); |
535 | 0 | } |
536 | 0 | r.to_f64() |
537 | 0 | } |
538 | | |
539 | | /** |
540 | | Mid-interval [7.5;9.5] generated by Wolfram Mathematica: |
541 | | I0(x)=R(1/x)/sqrt(x)*Exp(x) |
542 | | Polynomial generated by Wolfram Mathematica: |
543 | | ```text |
544 | | <<FunctionApproximations` |
545 | | ClearAll["Global`*"] |
546 | | f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x] |
547 | | g[z_]:=f[1/z] |
548 | | {err,approx}=MiniMaxApproximation[g[z],{z,{1/9.5,1/7.5},13,13},WorkingPrecision->120] |
549 | | poly=Numerator[approx][[1]]; |
550 | | coeffs=CoefficientList[poly,z]; |
551 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
552 | | poly=Denominator[approx][[1]]; |
553 | | coeffs=CoefficientList[poly,z]; |
554 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
555 | | ``` |
556 | | **/ |
557 | | #[cold] |
558 | | #[inline(never)] |
559 | 0 | fn i0_7p5_to_9p5_hard(x: f64) -> f64 { |
560 | | static P: [DyadicFloat128; 14] = [ |
561 | | DyadicFloat128 { |
562 | | sign: DyadicSign::Pos, |
563 | | exponent: -129, |
564 | | mantissa: 0xcc422a04_45cde144_75a3800b_45c38460_u128, |
565 | | }, |
566 | | DyadicFloat128 { |
567 | | sign: DyadicSign::Neg, |
568 | | exponent: -124, |
569 | | mantissa: 0xada66144_fcccc1a3_036f76b2_cabd6281_u128, |
570 | | }, |
571 | | DyadicFloat128 { |
572 | | sign: DyadicSign::Pos, |
573 | | exponent: -120, |
574 | | mantissa: 0xeabdda02_fa201d98_10e58d1f_7eb62bd7_u128, |
575 | | }, |
576 | | DyadicFloat128 { |
577 | | sign: DyadicSign::Neg, |
578 | | exponent: -116, |
579 | | mantissa: 0xbbfd3297_6f88a7df_5924587b_d5bdcdb8_u128, |
580 | | }, |
581 | | DyadicFloat128 { |
582 | | sign: DyadicSign::Pos, |
583 | | exponent: -113, |
584 | | mantissa: 0xfca29453_efe393bf_1651627b_7d543875_u128, |
585 | | }, |
586 | | DyadicFloat128 { |
587 | | sign: DyadicSign::Neg, |
588 | | exponent: -110, |
589 | | mantissa: 0xee7c7220_bbbd248e_bb6adac6_f9a5ce95_u128, |
590 | | }, |
591 | | DyadicFloat128 { |
592 | | sign: DyadicSign::Pos, |
593 | | exponent: -107, |
594 | | mantissa: 0xc07455dd_830ba705_414408c6_06732a5a_u128, |
595 | | }, |
596 | | DyadicFloat128 { |
597 | | sign: DyadicSign::Neg, |
598 | | exponent: -105, |
599 | | mantissa: 0xe2247793_b50cd0f0_80e8981d_933f75da_u128, |
600 | | }, |
601 | | DyadicFloat128 { |
602 | | sign: DyadicSign::Pos, |
603 | | exponent: -103, |
604 | | mantissa: 0xe14a9831_82582a0b_dd27e8b6_4ed9aac2_u128, |
605 | | }, |
606 | | DyadicFloat128 { |
607 | | sign: DyadicSign::Neg, |
608 | | exponent: -101, |
609 | | mantissa: 0xa3b2ae2f_5b64f37e_c1538435_34f02faf_u128, |
610 | | }, |
611 | | DyadicFloat128 { |
612 | | sign: DyadicSign::Pos, |
613 | | exponent: -100, |
614 | | mantissa: 0xbab73503_5b7e38d9_bbe4a84b_9007c6e8_u128, |
615 | | }, |
616 | | DyadicFloat128 { |
617 | | sign: DyadicSign::Neg, |
618 | | exponent: -99, |
619 | | mantissa: 0xa68911fc_5d87bbe7_0d4fe854_5c681ac5_u128, |
620 | | }, |
621 | | DyadicFloat128 { |
622 | | sign: DyadicSign::Pos, |
623 | | exponent: -99, |
624 | | mantissa: 0x9e997222_55ef4045_fa9f311d_57d082a2_u128, |
625 | | }, |
626 | | DyadicFloat128 { |
627 | | sign: DyadicSign::Neg, |
628 | | exponent: -99, |
629 | | mantissa: 0xbe93656a_b0a4f32d_3ebbfdeb_b1cbb839_u128, |
630 | | }, |
631 | | ]; |
632 | | static Q: [DyadicFloat128; 14] = [ |
633 | | DyadicFloat128 { |
634 | | sign: DyadicSign::Pos, |
635 | | exponent: -127, |
636 | | mantissa: 0x80000000_00000000_00000000_00000000_u128, |
637 | | }, |
638 | | DyadicFloat128 { |
639 | | sign: DyadicSign::Neg, |
640 | | exponent: -123, |
641 | | mantissa: 0xdaa34a7e_861dddff_a0642080_cd83dd65_u128, |
642 | | }, |
643 | | DyadicFloat128 { |
644 | | sign: DyadicSign::Pos, |
645 | | exponent: -118, |
646 | | mantissa: 0x93f05740_f4758772_bb9992f9_91e72795_u128, |
647 | | }, |
648 | | DyadicFloat128 { |
649 | | sign: DyadicSign::Neg, |
650 | | exponent: -115, |
651 | | mantissa: 0xeddcb810_054c9aab_fa7e4214_d59d18b0_u128, |
652 | | }, |
653 | | DyadicFloat128 { |
654 | | sign: DyadicSign::Pos, |
655 | | exponent: -111, |
656 | | mantissa: 0xa0180fcd_831ff6c0_ac2b8f02_37f3cfd1_u128, |
657 | | }, |
658 | | DyadicFloat128 { |
659 | | sign: DyadicSign::Neg, |
660 | | exponent: -108, |
661 | | mantissa: 0x97d25106_3b66907e_90b4f786_26daa0bb_u128, |
662 | | }, |
663 | | DyadicFloat128 { |
664 | | sign: DyadicSign::Pos, |
665 | | exponent: -106, |
666 | | mantissa: 0xf595ce38_aac16c11_001b874a_99603b45_u128, |
667 | | }, |
668 | | DyadicFloat128 { |
669 | | sign: DyadicSign::Neg, |
670 | | exponent: -103, |
671 | | mantissa: 0x912b3715_4aca68f6_5821c2ed_43d77111_u128, |
672 | | }, |
673 | | DyadicFloat128 { |
674 | | sign: DyadicSign::Pos, |
675 | | exponent: -101, |
676 | | mantissa: 0x90f97141_b896e2b6_38b87354_8945a43c_u128, |
677 | | }, |
678 | | DyadicFloat128 { |
679 | | sign: DyadicSign::Neg, |
680 | | exponent: -100, |
681 | | mantissa: 0xd3e5f967_89097d6b_3a3060fe_852ff580_u128, |
682 | | }, |
683 | | DyadicFloat128 { |
684 | | sign: DyadicSign::Pos, |
685 | | exponent: -99, |
686 | | mantissa: 0xf0d6de35_939da009_9ced21fd_48af7281_u128, |
687 | | }, |
688 | | DyadicFloat128 { |
689 | | sign: DyadicSign::Neg, |
690 | | exponent: -98, |
691 | | mantissa: 0xd2a0b183_6ac613b2_6745ce1d_8ed1c323_u128, |
692 | | }, |
693 | | DyadicFloat128 { |
694 | | sign: DyadicSign::Pos, |
695 | | exponent: -98, |
696 | | mantissa: 0xbb9c089a_b7e939a2_732b3fb5_2e66cd77_u128, |
697 | | }, |
698 | | DyadicFloat128 { |
699 | | sign: DyadicSign::Neg, |
700 | | exponent: -98, |
701 | | mantissa: 0xcb2107c3_736bef81_609718c0_ba82cd8e_u128, |
702 | | }, |
703 | | ]; |
704 | | |
705 | 0 | let recip = DyadicFloat128::accurate_reciprocal(x); |
706 | | |
707 | 0 | let mut p_num = P[13]; |
708 | 0 | for i in (0..13).rev() { |
709 | 0 | p_num = recip * p_num + P[i]; |
710 | 0 | } |
711 | 0 | let mut p_den = Q[13]; |
712 | 0 | for i in (0..13).rev() { |
713 | 0 | p_den = recip * p_den + Q[i]; |
714 | 0 | } |
715 | 0 | let z = p_num * p_den.reciprocal(); |
716 | | |
717 | 0 | let r_sqrt = bessel_rsqrt_hard(x, recip); |
718 | 0 | let f_exp = rational128_exp(x); |
719 | 0 | (z * r_sqrt * f_exp).fast_as_f64() |
720 | 0 | } |
721 | | |
722 | | /** |
723 | | I0(x)=R(1/x)*Exp(x)/sqrt(x) |
724 | | Generated in Wolfram: |
725 | | ```text |
726 | | <<FunctionApproximations` |
727 | | ClearAll["Global`*"] |
728 | | f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x] |
729 | | g[z_]:=f[1/z] |
730 | | {err,approx, err1}=MiniMaxApproximation[g[z],{z,{1/709.3,1/9.5},11,11},WorkingPrecision->120] |
731 | | poly=Numerator[approx]; |
732 | | coeffs=CoefficientList[poly,z]; |
733 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
734 | | poly=Denominator[approx]; |
735 | | coeffs=CoefficientList[poly,z]; |
736 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]] |
737 | | ``` |
738 | | **/ |
739 | | #[inline] |
740 | 0 | fn i0_asympt(x: f64) -> f64 { |
741 | 0 | let dx = x; |
742 | 0 | let recip = DoubleDouble::from_quick_recip(x); |
743 | | const P: [(u64, u64); 12] = [ |
744 | | (0xbc7ca19c5d824c54, 0x3fd9884533d43651), |
745 | | (0x3cca32eb734e010e, 0xc03b7ca35caf42eb), |
746 | | (0x3d03af8238d6f25e, 0x408b92cfcaa7070f), |
747 | | (0xbd7a8ff7fdebed70, 0xc0d0a3be432cce93), |
748 | | (0xbdababdb579bb076, 0x410a77dc51f1804d), |
749 | | (0x3dc5e4e3c972832a, 0xc13cb0be2f74839c), |
750 | | (0x3e01076f9b102e38, 0x41653b064cc61661), |
751 | | (0xbe2157e700d445f4, 0xc184e1b076927460), |
752 | | (0xbdfa4577156dde56, 0x41999e9653f9dc5f), |
753 | | (0xbe47aa238a739ffe, 0xc1a130f6ded40c00), |
754 | | (0xbe331b560b6fbf4a, 0x419317f11e674cae), |
755 | | (0xbe0765596077d1e3, 0xc16024404db59d3f), |
756 | | ]; |
757 | | |
758 | 0 | let x2 = DoubleDouble::quick_mult(recip, recip); |
759 | 0 | let x4 = DoubleDouble::quick_mult(x2, x2); |
760 | 0 | let x8 = DoubleDouble::quick_mult(x4, x4); |
761 | | |
762 | 0 | let e0 = DoubleDouble::mul_add( |
763 | 0 | recip, |
764 | 0 | DoubleDouble::from_bit_pair(P[1]), |
765 | 0 | DoubleDouble::from_bit_pair(P[0]), |
766 | | ); |
767 | 0 | let e1 = DoubleDouble::mul_add( |
768 | 0 | recip, |
769 | 0 | DoubleDouble::from_bit_pair(P[3]), |
770 | 0 | DoubleDouble::from_bit_pair(P[2]), |
771 | | ); |
772 | 0 | let e2 = DoubleDouble::mul_add( |
773 | 0 | recip, |
774 | 0 | DoubleDouble::from_bit_pair(P[5]), |
775 | 0 | DoubleDouble::from_bit_pair(P[4]), |
776 | | ); |
777 | 0 | let e3 = DoubleDouble::mul_add( |
778 | 0 | recip, |
779 | 0 | DoubleDouble::from_bit_pair(P[7]), |
780 | 0 | DoubleDouble::from_bit_pair(P[6]), |
781 | | ); |
782 | 0 | let e4 = DoubleDouble::mul_add( |
783 | 0 | recip, |
784 | 0 | DoubleDouble::from_bit_pair(P[9]), |
785 | 0 | DoubleDouble::from_bit_pair(P[8]), |
786 | | ); |
787 | 0 | let e5 = DoubleDouble::mul_add( |
788 | 0 | recip, |
789 | 0 | DoubleDouble::from_bit_pair(P[11]), |
790 | 0 | DoubleDouble::from_bit_pair(P[10]), |
791 | | ); |
792 | | |
793 | 0 | let f0 = DoubleDouble::mul_add(x2, e1, e0); |
794 | 0 | let f1 = DoubleDouble::mul_add(x2, e3, e2); |
795 | 0 | let f2 = DoubleDouble::mul_add(x2, e5, e4); |
796 | | |
797 | 0 | let g0 = DoubleDouble::mul_add(x4, f1, f0); |
798 | | |
799 | 0 | let p_num = DoubleDouble::mul_add(x8, f2, g0); |
800 | | |
801 | | const Q: [(u64, u64); 12] = [ |
802 | | (0x0000000000000000, 0x3ff0000000000000), |
803 | | (0xbcf687703e843d07, 0xc051418f1c4dd0b9), |
804 | | (0x3d468ab92cb87a0b, 0x40a15891d823e48d), |
805 | | (0x3d8bfc17e5183376, 0xc0e4fce40dd82796), |
806 | | (0xbdccbbcc2ecf8d4c, 0x4120beef869c61ec), |
807 | | (0xbdf42170b4d5d150, 0xc1523ad18834a7ed), |
808 | | (0xbe0eaa32f405afd4, 0x417b24ec57a8f480), |
809 | | (0x3e3ec900705e7252, 0xc19af2a91d23d62e), |
810 | | (0x3e3e220e274fa46b, 0x41b0cb905cc99ff5), |
811 | | (0xbe46c6c61dee11f6, 0xc1b7452e50518520), |
812 | | (0x3e3ed0fc063187bf, 0x41ac1772d1749896), |
813 | | (0xbe11c578f04f4be1, 0xc180feb5b2ca47cb), |
814 | | ]; |
815 | | |
816 | 0 | let e0 = DoubleDouble::mul_add_f64( |
817 | 0 | recip, |
818 | 0 | DoubleDouble::from_bit_pair(Q[1]), |
819 | 0 | f64::from_bits(0x3ff0000000000000), |
820 | | ); |
821 | 0 | let e1 = DoubleDouble::mul_add( |
822 | 0 | recip, |
823 | 0 | DoubleDouble::from_bit_pair(Q[3]), |
824 | 0 | DoubleDouble::from_bit_pair(Q[2]), |
825 | | ); |
826 | 0 | let e2 = DoubleDouble::mul_add( |
827 | 0 | recip, |
828 | 0 | DoubleDouble::from_bit_pair(Q[5]), |
829 | 0 | DoubleDouble::from_bit_pair(Q[4]), |
830 | | ); |
831 | 0 | let e3 = DoubleDouble::mul_add( |
832 | 0 | recip, |
833 | 0 | DoubleDouble::from_bit_pair(Q[7]), |
834 | 0 | DoubleDouble::from_bit_pair(Q[6]), |
835 | | ); |
836 | 0 | let e4 = DoubleDouble::mul_add( |
837 | 0 | recip, |
838 | 0 | DoubleDouble::from_bit_pair(Q[9]), |
839 | 0 | DoubleDouble::from_bit_pair(Q[8]), |
840 | | ); |
841 | 0 | let e5 = DoubleDouble::mul_add( |
842 | 0 | recip, |
843 | 0 | DoubleDouble::from_bit_pair(Q[11]), |
844 | 0 | DoubleDouble::from_bit_pair(Q[10]), |
845 | | ); |
846 | | |
847 | 0 | let f0 = DoubleDouble::mul_add(x2, e1, e0); |
848 | 0 | let f1 = DoubleDouble::mul_add(x2, e3, e2); |
849 | 0 | let f2 = DoubleDouble::mul_add(x2, e5, e4); |
850 | | |
851 | 0 | let g0 = DoubleDouble::mul_add(x4, f1, f0); |
852 | | |
853 | 0 | let p_den = DoubleDouble::mul_add(x8, f2, g0); |
854 | | |
855 | 0 | let z = DoubleDouble::div(p_num, p_den); |
856 | | |
857 | 0 | let mut e = i0_exp(dx * 0.5); |
858 | 0 | e = DoubleDouble::from_exact_add(e.hi, e.lo); |
859 | 0 | let r_sqrt = DoubleDouble::from_rsqrt_fast(dx); |
860 | 0 | let r = DoubleDouble::quick_mult(z * r_sqrt * e, e); |
861 | 0 | let err = f_fmla( |
862 | 0 | r.hi, |
863 | 0 | f64::from_bits(0x3c40000000000000), // 2^-59 |
864 | 0 | f64::from_bits(0x3be0000000000000), // 2^-65 |
865 | | ); |
866 | 0 | let up = r.hi + (r.lo + err); |
867 | 0 | let lb = r.hi + (r.lo - err); |
868 | 0 | if up != lb { |
869 | 0 | return i0_asympt_hard(x); |
870 | 0 | } |
871 | 0 | lb |
872 | 0 | } |
873 | | |
874 | | #[inline] |
875 | 0 | pub(crate) fn bessel_rsqrt_hard(x: f64, recip: DyadicFloat128) -> DyadicFloat128 { |
876 | 0 | let r = DyadicFloat128::new_from_f64(x.sqrt()) * recip; |
877 | 0 | let fx = DyadicFloat128::new_from_f64(x); |
878 | 0 | let rx = r * fx; |
879 | 0 | let drx = r * fx - rx; |
880 | | const M_ONE: DyadicFloat128 = DyadicFloat128 { |
881 | | sign: DyadicSign::Neg, |
882 | | exponent: -127, |
883 | | mantissa: 0x80000000_00000000_00000000_00000000_u128, |
884 | | }; |
885 | 0 | let h = r * rx + M_ONE + r * drx; |
886 | 0 | let mut ddr = r; |
887 | 0 | ddr.exponent -= 1; // ddr * 0.5 |
888 | 0 | let dr = ddr * h; |
889 | 0 | r - dr |
890 | 0 | } |
891 | | |
892 | | /** |
893 | | I0(x)=R(1/x)*Exp(x)/sqrt(x) |
894 | | Generated in Wolfram: |
895 | | ```text |
896 | | <<FunctionApproximations` |
897 | | ClearAll["Global`*"] |
898 | | f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x] |
899 | | g[z_]:=f[1/z] |
900 | | {err,approx, err1}=MiniMaxApproximation[g[z],{z,{1/709.3,1/9.5},15,15},WorkingPrecision->120] |
901 | | poly=Numerator[approx]; |
902 | | coeffs=CoefficientList[poly,z]; |
903 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
904 | | poly=Denominator[approx]; |
905 | | coeffs=CoefficientList[poly,z]; |
906 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
907 | | ``` |
908 | | **/ |
909 | | #[cold] |
910 | | #[inline(never)] |
911 | 0 | fn i0_asympt_hard(x: f64) -> f64 { |
912 | | static P: [DyadicFloat128; 16] = [ |
913 | | DyadicFloat128 { |
914 | | sign: DyadicSign::Pos, |
915 | | exponent: -129, |
916 | | mantissa: 0xcc42299e_a1b28468_7e5aad4a_70b749c4_u128, |
917 | | }, |
918 | | DyadicFloat128 { |
919 | | sign: DyadicSign::Neg, |
920 | | exponent: -122, |
921 | | mantissa: 0xabb4209d_ca11bdaa_186bef7f_390e6b77_u128, |
922 | | }, |
923 | | DyadicFloat128 { |
924 | | sign: DyadicSign::Pos, |
925 | | exponent: -116, |
926 | | mantissa: 0x8a2725e2_4749db25_625ad1f2_12a2a16c_u128, |
927 | | }, |
928 | | DyadicFloat128 { |
929 | | sign: DyadicSign::Neg, |
930 | | exponent: -111, |
931 | | mantissa: 0x8b4c2cd4_9e5d0c8b_c9be4d3e_781bb676_u128, |
932 | | }, |
933 | | DyadicFloat128 { |
934 | | sign: DyadicSign::Pos, |
935 | | exponent: -107, |
936 | | mantissa: 0xc33fad7c_40599f7d_713e3081_6b5ad791_u128, |
937 | | }, |
938 | | DyadicFloat128 { |
939 | | sign: DyadicSign::Neg, |
940 | | exponent: -103, |
941 | | mantissa: 0xc81da271_b623ba88_0be032b5_827d92fa_u128, |
942 | | }, |
943 | | DyadicFloat128 { |
944 | | sign: DyadicSign::Pos, |
945 | | exponent: -99, |
946 | | mantissa: 0x99ec4975_b6aa7cae_7692a287_ed8ae47c_u128, |
947 | | }, |
948 | | DyadicFloat128 { |
949 | | sign: DyadicSign::Neg, |
950 | | exponent: -96, |
951 | | mantissa: 0xb3aa4745_fc2dd441_2dbd3e3c_d4539687_u128, |
952 | | }, |
953 | | DyadicFloat128 { |
954 | | sign: DyadicSign::Pos, |
955 | | exponent: -93, |
956 | | mantissa: 0x9f14edc2_6882afca_29d2a067_dc459729_u128, |
957 | | }, |
958 | | DyadicFloat128 { |
959 | | sign: DyadicSign::Neg, |
960 | | exponent: -91, |
961 | | mantissa: 0xd35c4d01_78d8cec6_fc8ae0ee_834da837_u128, |
962 | | }, |
963 | | DyadicFloat128 { |
964 | | sign: DyadicSign::Pos, |
965 | | exponent: -89, |
966 | | mantissa: 0xcdc529c7_6e082342_faad3073_07a9b61f_u128, |
967 | | }, |
968 | | DyadicFloat128 { |
969 | | sign: DyadicSign::Neg, |
970 | | exponent: -87, |
971 | | mantissa: 0x8ccac88f_2598c8a6_423b1f42_63591cb9_u128, |
972 | | }, |
973 | | DyadicFloat128 { |
974 | | sign: DyadicSign::Pos, |
975 | | exponent: -87, |
976 | | mantissa: 0xfc044cfb_a20f0885_93d58660_17819ed5_u128, |
977 | | }, |
978 | | DyadicFloat128 { |
979 | | sign: DyadicSign::Neg, |
980 | | exponent: -86, |
981 | | mantissa: 0x813a700c_74d23f52_f81b179d_7ff0da9f_u128, |
982 | | }, |
983 | | DyadicFloat128 { |
984 | | sign: DyadicSign::Pos, |
985 | | exponent: -88, |
986 | | mantissa: 0xe6c43da4_297216bf_bdd987cb_636906cf_u128, |
987 | | }, |
988 | | DyadicFloat128 { |
989 | | sign: DyadicSign::Neg, |
990 | | exponent: -91, |
991 | | mantissa: 0xa4998323_649c3cf2_64477869_3d2c6afd_u128, |
992 | | }, |
993 | | ]; |
994 | | static Q: [DyadicFloat128; 16] = [ |
995 | | DyadicFloat128 { |
996 | | sign: DyadicSign::Pos, |
997 | | exponent: -127, |
998 | | mantissa: 0x80000000_00000000_00000000_00000000_u128, |
999 | | }, |
1000 | | DyadicFloat128 { |
1001 | | sign: DyadicSign::Neg, |
1002 | | exponent: -121, |
1003 | | mantissa: 0xd772d5fd_a7077638_6e007274_d83b013c_u128, |
1004 | | }, |
1005 | | DyadicFloat128 { |
1006 | | sign: DyadicSign::Pos, |
1007 | | exponent: -115, |
1008 | | mantissa: 0xad914ef0_451ced2e_515657ef_fc7eeb53_u128, |
1009 | | }, |
1010 | | DyadicFloat128 { |
1011 | | sign: DyadicSign::Neg, |
1012 | | exponent: -110, |
1013 | | mantissa: 0xaf41180c_dffe96e5_f192fa40_0b1bff1e_u128, |
1014 | | }, |
1015 | | DyadicFloat128 { |
1016 | | sign: DyadicSign::Pos, |
1017 | | exponent: -106, |
1018 | | mantissa: 0xf60dc728_241f71fd_5b93e653_ccbedace_u128, |
1019 | | }, |
1020 | | DyadicFloat128 { |
1021 | | sign: DyadicSign::Neg, |
1022 | | exponent: -102, |
1023 | | mantissa: 0xfcaefef9_39cf96e7_3cb75a98_da5e9761_u128, |
1024 | | }, |
1025 | | DyadicFloat128 { |
1026 | | sign: DyadicSign::Pos, |
1027 | | exponent: -98, |
1028 | | mantissa: 0xc2d2c837_5789587a_13ef38c6_a24c3413_u128, |
1029 | | }, |
1030 | | DyadicFloat128 { |
1031 | | sign: DyadicSign::Neg, |
1032 | | exponent: -95, |
1033 | | mantissa: 0xe41725c3_51d14486_a650044e_e8588f7b_u128, |
1034 | | }, |
1035 | | DyadicFloat128 { |
1036 | | sign: DyadicSign::Pos, |
1037 | | exponent: -92, |
1038 | | mantissa: 0xcabeed9b_5e2e888d_81d32b11_d289a624_u128, |
1039 | | }, |
1040 | | DyadicFloat128 { |
1041 | | sign: DyadicSign::Neg, |
1042 | | exponent: -89, |
1043 | | mantissa: 0x8764876d_11ad6607_8a8d5382_3ffe82d9_u128, |
1044 | | }, |
1045 | | DyadicFloat128 { |
1046 | | sign: DyadicSign::Pos, |
1047 | | exponent: -87, |
1048 | | mantissa: 0x84c9f9e5_6a5f5034_ad2c8512_16cb1ba1_u128, |
1049 | | }, |
1050 | | DyadicFloat128 { |
1051 | | sign: DyadicSign::Neg, |
1052 | | exponent: -86, |
1053 | | mantissa: 0xb7c1d143_a15d8aab_03a7fa3e_b7d07a36_u128, |
1054 | | }, |
1055 | | DyadicFloat128 { |
1056 | | sign: DyadicSign::Pos, |
1057 | | exponent: -85, |
1058 | | mantissa: 0xa78f8257_4658040f_7a1ad39c_91ea9483_u128, |
1059 | | }, |
1060 | | DyadicFloat128 { |
1061 | | sign: DyadicSign::Neg, |
1062 | | exponent: -85, |
1063 | | mantissa: 0xb231e0ca_b729a404_44c38f52_be208507_u128, |
1064 | | }, |
1065 | | DyadicFloat128 { |
1066 | | sign: DyadicSign::Pos, |
1067 | | exponent: -86, |
1068 | | mantissa: 0xae317981_42349081_8bc68b28_f69b8e49_u128, |
1069 | | }, |
1070 | | DyadicFloat128 { |
1071 | | sign: DyadicSign::Neg, |
1072 | | exponent: -89, |
1073 | | mantissa: 0xb451abd3_5cd79c6d_7e578164_32f16da1_u128, |
1074 | | }, |
1075 | | ]; |
1076 | | |
1077 | 0 | let recip = DyadicFloat128::accurate_reciprocal(x); |
1078 | | |
1079 | 0 | let mut p_num = P[15]; |
1080 | 0 | for i in (0..15).rev() { |
1081 | 0 | p_num = recip * p_num + P[i]; |
1082 | 0 | } |
1083 | | |
1084 | 0 | let mut p_den = Q[15]; |
1085 | 0 | for i in (0..15).rev() { |
1086 | 0 | p_den = recip * p_den + Q[i]; |
1087 | 0 | } |
1088 | | |
1089 | 0 | let z = p_num * p_den.reciprocal(); |
1090 | | |
1091 | 0 | let r_sqrt = bessel_rsqrt_hard(x, recip); |
1092 | 0 | let f_exp = rational128_exp(x); |
1093 | 0 | (z * r_sqrt * f_exp).fast_as_f64() |
1094 | 0 | } |
1095 | | |
1096 | | #[cfg(test)] |
1097 | | mod tests { |
1098 | | use super::*; |
1099 | | |
1100 | | #[test] |
1101 | | fn test_i0() { |
1102 | | assert_eq!(f_i0(2.2204460492503131e-24f64), 1.0); |
1103 | | assert_eq!(f_i0(f64::EPSILON), 1.0); |
1104 | | assert_eq!(f_i0(9.500000000005492,), 1753.4809905364318); |
1105 | | assert!(f_i0(f64::NAN).is_nan()); |
1106 | | assert_eq!(f_i0(f64::INFINITY), f64::INFINITY); |
1107 | | assert_eq!(f_i0(f64::NEG_INFINITY), f64::INFINITY); |
1108 | | assert_eq!(f_i0(7.500000000788034), 268.1613117118702); |
1109 | | assert_eq!(f_i0(715.), f64::INFINITY); |
1110 | | assert_eq!(f_i0(12.), 18948.925349296307); |
1111 | | assert_eq!(f_i0(16.), 893446.227920105); |
1112 | | assert_eq!(f_i0(18.432), 9463892.87209841); |
1113 | | assert_eq!(f_i0(26.432), 23507752424.350075); |
1114 | | assert_eq!(f_i0(0.2), 1.010025027795146); |
1115 | | assert_eq!(f_i0(7.5), 268.16131151518937); |
1116 | | assert_eq!(f_i0(-1.5), 1.646723189772891); |
1117 | | } |
1118 | | } |