/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/bessel/j0.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::bessel::alpha0::{ |
30 | | bessel_0_asympt_alpha, bessel_0_asympt_alpha_fast, bessel_0_asympt_alpha_hard, |
31 | | }; |
32 | | use crate::bessel::beta0::{ |
33 | | bessel_0_asympt_beta, bessel_0_asympt_beta_fast, bessel_0_asympt_beta_hard, |
34 | | }; |
35 | | use crate::bessel::i0::bessel_rsqrt_hard; |
36 | | use crate::bessel::j0_coeffs_remez::J0_COEFFS_REMEZ; |
37 | | use crate::bessel::j0_coeffs_taylor::J0_COEFFS_TAYLOR; |
38 | | use crate::bessel::j0f_coeffs::{J0_ZEROS, J0_ZEROS_VALUE}; |
39 | | use crate::common::f_fmla; |
40 | | use crate::double_double::DoubleDouble; |
41 | | use crate::dyadic_float::{DyadicFloat128, DyadicSign}; |
42 | | use crate::polyeval::{f_polyeval9, f_polyeval10, f_polyeval12, f_polyeval19}; |
43 | | use crate::rounding::CpuCeil; |
44 | | use crate::sin_helper::{cos_dd_small, cos_dd_small_fast, cos_f128_small}; |
45 | | use crate::sincos_reduce::{AngleReduced, rem2pi_any, rem2pi_f128}; |
46 | | |
47 | | /// Bessel of the first kind of order 0 |
48 | 0 | pub fn f_j0(x: f64) -> f64 { |
49 | 0 | let ux = x.to_bits().wrapping_shl(1); |
50 | | |
51 | 0 | if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 { |
52 | | // |x| <= f64::EPSILON, |x| == inf, |x| == NaN |
53 | 0 | if ux <= 0x77723ef88126da90u64 { |
54 | | // |x| <= 0.00000000000000000000532 |
55 | 0 | return 1.; |
56 | 0 | } |
57 | 0 | if ux <= 0x7960000000000000u64 { |
58 | | // |x| <= f64::EPSILON |
59 | | // J0(x) ~ 1-x^2/4+O[x]^4 |
60 | 0 | let half_x = 0.5 * x; // exact. |
61 | 0 | return f_fmla(half_x, -half_x, 1.); |
62 | 0 | } |
63 | 0 | if x.is_infinite() { |
64 | 0 | return 0.; |
65 | 0 | } |
66 | 0 | return x + f64::NAN; // x == NaN |
67 | 0 | } |
68 | | |
69 | 0 | let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff; |
70 | | |
71 | 0 | let ax = f64::from_bits(x_abs); |
72 | | |
73 | 0 | if x_abs <= 0x4052b33333333333u64 { |
74 | | // |x| <= 74.8 |
75 | 0 | if x_abs <= 0x3ff199999999999au64 { |
76 | | // |x| <= 1.1 |
77 | 0 | return j0_maclaurin_series_fast(ax); |
78 | 0 | } |
79 | 0 | return j0_small_argument_fast(ax); |
80 | 0 | } |
81 | | |
82 | 0 | j0_asympt_fast(ax) |
83 | 0 | } |
84 | | |
85 | | /** |
86 | | Generated by SageMath: |
87 | | ```python |
88 | | mp.prec = 180 |
89 | | def print_expansion_at_0(): |
90 | | print(f"const J0_MACLAURIN_SERIES: [(u64, u64); 12] = [") |
91 | | from mpmath import mp, j0, taylor |
92 | | poly = taylor(lambda val: j0(val), 0, 24) |
93 | | real_i = 0 |
94 | | for i in range(0, 24, 2): |
95 | | print_double_double("", DD(poly[i])) |
96 | | real_i = real_i + 1 |
97 | | print("];") |
98 | | print(poly) |
99 | | |
100 | | print_expansion_at_0() |
101 | | ``` |
102 | | **/ |
103 | | #[inline] |
104 | 0 | pub(crate) fn j0_maclaurin_series_fast(x: f64) -> f64 { |
105 | | const C: [u64; 12] = [ |
106 | | 0x3ff0000000000000, |
107 | | 0xbfd0000000000000, |
108 | | 0x3f90000000000000, |
109 | | 0xbf3c71c71c71c71c, |
110 | | 0x3edc71c71c71c71c, |
111 | | 0xbe723456789abcdf, |
112 | | 0x3e002e85c0898b71, |
113 | | 0xbd8522a43f65486a, |
114 | | 0x3d0522a43f65486a, |
115 | | 0xbc80b313289be0b9, |
116 | | 0x3bf5601885e63e5d, |
117 | | 0xbb669ca9cf3b7f54, |
118 | | ]; |
119 | | |
120 | 0 | let dx2 = DoubleDouble::from_exact_mult(x, x); |
121 | | |
122 | 0 | let p = f_polyeval10( |
123 | 0 | dx2.hi, |
124 | 0 | f64::from_bits(C[2]), |
125 | 0 | f64::from_bits(C[3]), |
126 | 0 | f64::from_bits(C[4]), |
127 | 0 | f64::from_bits(C[5]), |
128 | 0 | f64::from_bits(C[6]), |
129 | 0 | f64::from_bits(C[7]), |
130 | 0 | f64::from_bits(C[8]), |
131 | 0 | f64::from_bits(C[9]), |
132 | 0 | f64::from_bits(C[10]), |
133 | 0 | f64::from_bits(C[11]), |
134 | | ); |
135 | 0 | let mut z = DoubleDouble::mul_f64_add_f64(dx2, p, f64::from_bits(C[1])); |
136 | 0 | z = DoubleDouble::mul_add_f64(dx2, z, f64::from_bits(C[0])); |
137 | | |
138 | | // squaring error (2^-56) + poly error 2^-75 |
139 | 0 | let err = f_fmla( |
140 | 0 | dx2.hi, |
141 | 0 | f64::from_bits(0x3c70000000000000), // 2^-56 |
142 | 0 | f64::from_bits(0x3b40000000000000), // 2^-75 |
143 | | ); |
144 | 0 | let ub = z.hi + (z.lo + err); |
145 | 0 | let lb = z.hi + (z.lo - err); |
146 | | |
147 | 0 | if ub == lb { |
148 | 0 | return z.to_f64(); |
149 | 0 | } |
150 | 0 | j0_maclaurin_series(x) |
151 | 0 | } |
152 | | |
153 | | /** |
154 | | Generated by SageMath: |
155 | | ```python |
156 | | mp.prec = 180 |
157 | | def print_expansion_at_0(): |
158 | | print(f"const J0_MACLAURIN_SERIES: [(u64, u64); 12] = [") |
159 | | from mpmath import mp, j0, taylor |
160 | | poly = taylor(lambda val: j0(val), 0, 24) |
161 | | real_i = 0 |
162 | | for i in range(0, 24, 2): |
163 | | print_double_double("", DD(poly[i])) |
164 | | real_i = real_i + 1 |
165 | | print("];") |
166 | | print(poly) |
167 | | |
168 | | print_expansion_at_0() |
169 | | ``` |
170 | | **/ |
171 | | #[cold] |
172 | 0 | pub(crate) fn j0_maclaurin_series(x: f64) -> f64 { |
173 | | const C: [(u64, u64); 12] = [ |
174 | | (0x0000000000000000, 0x3ff0000000000000), |
175 | | (0x0000000000000000, 0xbfd0000000000000), |
176 | | (0x0000000000000000, 0x3f90000000000000), |
177 | | (0xbbdc71c71c71c71c, 0xbf3c71c71c71c71c), |
178 | | (0x3b7c71c71c71c71c, 0x3edc71c71c71c71c), |
179 | | (0xbab23456789abcdf, 0xbe723456789abcdf), |
180 | | (0xba8b6edec0692e65, 0x3e002e85c0898b71), |
181 | | (0x3a2604db055bd075, 0xbd8522a43f65486a), |
182 | | (0xb9a604db055bd075, 0x3d0522a43f65486a), |
183 | | (0x3928824198c6f6e1, 0xbc80b313289be0b9), |
184 | | (0xb869b0b430eb27b8, 0x3bf5601885e63e5d), |
185 | | (0x380ee6b4638f3a25, 0xbb669ca9cf3b7f54), |
186 | | ]; |
187 | | |
188 | 0 | let dx2 = DoubleDouble::from_exact_mult(x, x); |
189 | | |
190 | 0 | let p = f_polyeval12( |
191 | 0 | dx2, |
192 | 0 | DoubleDouble::from_bit_pair(C[0]), |
193 | 0 | DoubleDouble::from_bit_pair(C[1]), |
194 | 0 | DoubleDouble::from_bit_pair(C[2]), |
195 | 0 | DoubleDouble::from_bit_pair(C[3]), |
196 | 0 | DoubleDouble::from_bit_pair(C[4]), |
197 | 0 | DoubleDouble::from_bit_pair(C[5]), |
198 | 0 | DoubleDouble::from_bit_pair(C[6]), |
199 | 0 | DoubleDouble::from_bit_pair(C[7]), |
200 | 0 | DoubleDouble::from_bit_pair(C[8]), |
201 | 0 | DoubleDouble::from_bit_pair(C[9]), |
202 | 0 | DoubleDouble::from_bit_pair(C[10]), |
203 | 0 | DoubleDouble::from_bit_pair(C[11]), |
204 | | ); |
205 | 0 | let r = DoubleDouble::from_exact_add(p.hi, p.lo); |
206 | | const ERR: f64 = f64::from_bits(0x39d0000000000000); // 2^-98 |
207 | 0 | let ub = r.hi + (r.lo + ERR); |
208 | 0 | let lb = r.hi + (r.lo - ERR); |
209 | 0 | if ub == lb { |
210 | 0 | return r.to_f64(); |
211 | 0 | } |
212 | 0 | j0_maclaurin_series_hard(x) |
213 | 0 | } |
214 | | |
215 | | /** |
216 | | Generated by SageMath: |
217 | | |
218 | | ```python |
219 | | mp.prec = 180 |
220 | | def print_expansion_at_0(): |
221 | | print(f"const P: [DyadicFloat128; 12] = [") |
222 | | from mpmath import mp, j0, taylor |
223 | | poly = taylor(lambda val: j0(val), 0, 24) |
224 | | # print(poly) |
225 | | real_i = 0 |
226 | | for i in range(0, 24, 2): |
227 | | print_dyadic(DD(poly[i])) |
228 | | real_i = real_i + 1 |
229 | | print("];") |
230 | | print(poly) |
231 | | |
232 | | print_expansion_at_0() |
233 | | ``` |
234 | | **/ |
235 | | #[cold] |
236 | | #[inline(never)] |
237 | 0 | pub(crate) fn j0_maclaurin_series_hard(x: f64) -> f64 { |
238 | | static P: [DyadicFloat128; 12] = [ |
239 | | DyadicFloat128 { |
240 | | sign: DyadicSign::Pos, |
241 | | exponent: -127, |
242 | | mantissa: 0x80000000_00000000_00000000_00000000_u128, |
243 | | }, |
244 | | DyadicFloat128 { |
245 | | sign: DyadicSign::Neg, |
246 | | exponent: -129, |
247 | | mantissa: 0x80000000_00000000_00000000_00000000_u128, |
248 | | }, |
249 | | DyadicFloat128 { |
250 | | sign: DyadicSign::Pos, |
251 | | exponent: -133, |
252 | | mantissa: 0x80000000_00000000_00000000_00000000_u128, |
253 | | }, |
254 | | DyadicFloat128 { |
255 | | sign: DyadicSign::Neg, |
256 | | exponent: -139, |
257 | | mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128, |
258 | | }, |
259 | | DyadicFloat128 { |
260 | | sign: DyadicSign::Pos, |
261 | | exponent: -145, |
262 | | mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128, |
263 | | }, |
264 | | DyadicFloat128 { |
265 | | sign: DyadicSign::Neg, |
266 | | exponent: -151, |
267 | | mantissa: 0x91a2b3c4_d5e6f809_1a2b3c4d_5e6f8092_u128, |
268 | | }, |
269 | | DyadicFloat128 { |
270 | | sign: DyadicSign::Pos, |
271 | | exponent: -158, |
272 | | mantissa: 0x81742e04_4c5b8724_8909fcb6_8cd4e410_u128, |
273 | | }, |
274 | | DyadicFloat128 { |
275 | | sign: DyadicSign::Neg, |
276 | | exponent: -166, |
277 | | mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128, |
278 | | }, |
279 | | DyadicFloat128 { |
280 | | sign: DyadicSign::Pos, |
281 | | exponent: -174, |
282 | | mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128, |
283 | | }, |
284 | | DyadicFloat128 { |
285 | | sign: DyadicSign::Neg, |
286 | | exponent: -182, |
287 | | mantissa: 0x85989944_df05c4ef_b7cce721_23e1b391_u128, |
288 | | }, |
289 | | DyadicFloat128 { |
290 | | sign: DyadicSign::Pos, |
291 | | exponent: -191, |
292 | | mantissa: 0xab00c42f_31f2e799_3d2f3c53_6120e5d8_u128, |
293 | | }, |
294 | | DyadicFloat128 { |
295 | | sign: DyadicSign::Neg, |
296 | | exponent: -200, |
297 | | mantissa: 0xb4e54e79_dbfa9c23_29738e18_bb602809_u128, |
298 | | }, |
299 | | ]; |
300 | 0 | let dx = DyadicFloat128::new_from_f64(x); |
301 | 0 | let x2 = dx * dx; |
302 | | |
303 | 0 | let mut p = P[11]; |
304 | 0 | for i in (0..11).rev() { |
305 | 0 | p = x2 * p + P[i]; |
306 | 0 | } |
307 | | |
308 | 0 | p.fast_as_f64() |
309 | 0 | } |
310 | | |
311 | | /// This method on small range searches for nearest zero or extremum. |
312 | | /// Then picks stored series expansion at the point end evaluates the poly at the point. |
313 | | #[inline] |
314 | 0 | pub(crate) fn j0_small_argument_fast(x: f64) -> f64 { |
315 | | // let avg_step = 74.6145 / 47.0; |
316 | | // let inv_step = 1.0 / avg_step; |
317 | | |
318 | | const INV_STEP: f64 = 0.6299043751549631; |
319 | | |
320 | 0 | let fx = x * INV_STEP; |
321 | | const J0_ZEROS_COUNT: f64 = (J0_ZEROS.len() - 1) as f64; |
322 | 0 | let idx0 = unsafe { fx.min(J0_ZEROS_COUNT).to_int_unchecked::<usize>() }; |
323 | 0 | let idx1 = unsafe { |
324 | 0 | fx.cpu_ceil() |
325 | 0 | .min(J0_ZEROS_COUNT) |
326 | 0 | .to_int_unchecked::<usize>() |
327 | | }; |
328 | | |
329 | 0 | let found_zero0 = DoubleDouble::from_bit_pair(J0_ZEROS[idx0]); |
330 | 0 | let found_zero1 = DoubleDouble::from_bit_pair(J0_ZEROS[idx1]); |
331 | | |
332 | 0 | let dist0 = (found_zero0.hi - x).abs(); |
333 | 0 | let dist1 = (found_zero1.hi - x).abs(); |
334 | | |
335 | 0 | let (found_zero, idx, dist) = if dist0 < dist1 { |
336 | 0 | (found_zero0, idx0, dist0) |
337 | | } else { |
338 | 0 | (found_zero1, idx1, dist1) |
339 | | }; |
340 | | |
341 | 0 | if idx == 0 { |
342 | 0 | return j0_maclaurin_series_fast(x); |
343 | 0 | } |
344 | | |
345 | 0 | let is_too_close_too_zero = dist.abs() < 1e-3; |
346 | | |
347 | 0 | let c = if is_too_close_too_zero { |
348 | 0 | &J0_COEFFS_TAYLOR[idx - 1] |
349 | | } else { |
350 | 0 | &J0_COEFFS_REMEZ[idx - 1] |
351 | | }; |
352 | | |
353 | 0 | let r = DoubleDouble::full_add_f64(-found_zero, x.abs()); |
354 | | |
355 | | // We hit exact zero, value, better to return it directly |
356 | 0 | if dist == 0. { |
357 | 0 | return f64::from_bits(J0_ZEROS_VALUE[idx]); |
358 | 0 | } |
359 | 0 | let p = f_polyeval19( |
360 | 0 | r.hi, |
361 | 0 | f64::from_bits(c[5].1), |
362 | 0 | f64::from_bits(c[6].1), |
363 | 0 | f64::from_bits(c[7].1), |
364 | 0 | f64::from_bits(c[8].1), |
365 | 0 | f64::from_bits(c[9].1), |
366 | 0 | f64::from_bits(c[10].1), |
367 | 0 | f64::from_bits(c[11].1), |
368 | 0 | f64::from_bits(c[12].1), |
369 | 0 | f64::from_bits(c[13].1), |
370 | 0 | f64::from_bits(c[14].1), |
371 | 0 | f64::from_bits(c[15].1), |
372 | 0 | f64::from_bits(c[16].1), |
373 | 0 | f64::from_bits(c[17].1), |
374 | 0 | f64::from_bits(c[18].1), |
375 | 0 | f64::from_bits(c[19].1), |
376 | 0 | f64::from_bits(c[20].1), |
377 | 0 | f64::from_bits(c[21].1), |
378 | 0 | f64::from_bits(c[22].1), |
379 | 0 | f64::from_bits(c[23].1), |
380 | | ); |
381 | | |
382 | 0 | let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[4])); |
383 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3])); |
384 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2])); |
385 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1])); |
386 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0])); |
387 | 0 | let err = f_fmla( |
388 | 0 | z.hi, |
389 | 0 | f64::from_bits(0x3c70000000000000), // 2^-56 |
390 | 0 | f64::from_bits(0x3bf0000000000000), // 2^-64 |
391 | | ); |
392 | 0 | let ub = z.hi + (z.lo + err); |
393 | 0 | let lb = z.hi + (z.lo - err); |
394 | | |
395 | 0 | if ub == lb { |
396 | 0 | return z.to_f64(); |
397 | 0 | } |
398 | | |
399 | 0 | j0_small_argument_dd(r, c) |
400 | 0 | } |
401 | | |
402 | | #[cold] |
403 | 0 | fn j0_small_argument_dd(r: DoubleDouble, c0: &[(u64, u64); 24]) -> f64 { |
404 | 0 | let c = &c0[15..]; |
405 | | |
406 | 0 | let p0 = f_polyeval9( |
407 | 0 | r.to_f64(), |
408 | 0 | f64::from_bits(c[0].1), |
409 | 0 | f64::from_bits(c[1].1), |
410 | 0 | f64::from_bits(c[2].1), |
411 | 0 | f64::from_bits(c[3].1), |
412 | 0 | f64::from_bits(c[4].1), |
413 | 0 | f64::from_bits(c[5].1), |
414 | 0 | f64::from_bits(c[6].1), |
415 | 0 | f64::from_bits(c[7].1), |
416 | 0 | f64::from_bits(c[8].1), |
417 | | ); |
418 | | |
419 | 0 | let c = c0; |
420 | | |
421 | 0 | let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14])); |
422 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13])); |
423 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12])); |
424 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11])); |
425 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10])); |
426 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9])); |
427 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8])); |
428 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7])); |
429 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6])); |
430 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5])); |
431 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4])); |
432 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3])); |
433 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2])); |
434 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1])); |
435 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0])); |
436 | | |
437 | 0 | let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo); |
438 | 0 | let err = f_fmla( |
439 | 0 | p.hi, |
440 | 0 | f64::from_bits(0x3c10000000000000), // 2^-62 |
441 | 0 | f64::from_bits(0x3a90000000000000), // 2^-86 |
442 | | ); |
443 | 0 | let ub = p.hi + (p.lo + err); |
444 | 0 | let lb = p.hi + (p.lo - err); |
445 | 0 | if ub != lb { |
446 | 0 | return j0_small_argument_hard(r, c); |
447 | 0 | } |
448 | 0 | p.to_f64() |
449 | 0 | } |
450 | | |
451 | | #[cold] |
452 | | #[inline(never)] |
453 | 0 | fn j0_small_argument_hard(r: DoubleDouble, c: &[(u64, u64); 24]) -> f64 { |
454 | 0 | let mut p = DoubleDouble::from_bit_pair(c[23]); |
455 | 0 | for i in (0..23).rev() { |
456 | 0 | p = DoubleDouble::mul_add(r, p, DoubleDouble::from_bit_pair(c[i])); |
457 | 0 | p = DoubleDouble::from_exact_add(p.hi, p.lo); |
458 | 0 | } |
459 | 0 | p.to_f64() |
460 | 0 | } |
461 | | |
462 | | /* |
463 | | Evaluates: |
464 | | J0 = sqrt(2/(PI*x)) * beta(x) * cos(x - PI/4 - alpha(x)) |
465 | | */ |
466 | | #[inline] |
467 | 0 | pub(crate) fn j0_asympt_fast(x: f64) -> f64 { |
468 | 0 | let x = x.abs(); |
469 | | |
470 | | const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new( |
471 | | f64::from_bits(0xbc8cbc0d30ebfd15), |
472 | | f64::from_bits(0x3fe9884533d43651), |
473 | | ); |
474 | | |
475 | | const MPI_OVER_4: DoubleDouble = DoubleDouble::new( |
476 | | f64::from_bits(0xbc81a62633145c07), |
477 | | f64::from_bits(0xbfe921fb54442d18), |
478 | | ); |
479 | | |
480 | 0 | let recip = if x.to_bits() > 0x7fd000000000000u64 { |
481 | 0 | DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_div_fma(4.0, x), 0.25) |
482 | | } else { |
483 | 0 | DoubleDouble::from_recip(x) |
484 | | }; |
485 | | |
486 | 0 | let alpha = bessel_0_asympt_alpha_fast(recip); |
487 | 0 | let beta = bessel_0_asympt_beta_fast(recip); |
488 | | |
489 | 0 | let AngleReduced { angle } = rem2pi_any(x); |
490 | | |
491 | | // Without full subtraction cancellation happens sometimes |
492 | 0 | let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha); |
493 | 0 | let r0 = DoubleDouble::full_dd_add(angle, x0pi34); |
494 | | |
495 | 0 | let m_cos = cos_dd_small_fast(r0); |
496 | 0 | let z0 = DoubleDouble::quick_mult(beta, m_cos); |
497 | 0 | let r_sqrt = DoubleDouble::from_rsqrt_fast(x); |
498 | 0 | let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt); |
499 | 0 | let p = DoubleDouble::quick_mult(scale, z0); |
500 | | |
501 | 0 | let err = f_fmla( |
502 | 0 | p.hi, |
503 | 0 | f64::from_bits(0x3be0000000000000), // 2^-65 |
504 | 0 | f64::from_bits(0x3a60000000000000), // 2^-89 |
505 | | ); |
506 | 0 | let ub = p.hi + (p.lo + err); |
507 | 0 | let lb = p.hi + (p.lo - err); |
508 | | |
509 | 0 | if ub == lb { |
510 | 0 | return p.to_f64(); |
511 | 0 | } |
512 | 0 | j0_asympt(x, recip, r_sqrt, angle) |
513 | 0 | } |
514 | | |
515 | | /* |
516 | | Evaluates: |
517 | | J0 = sqrt(2/(PI*x)) * beta(x) * cos(x - PI/4 - alpha(x)) |
518 | | */ |
519 | 0 | pub(crate) fn j0_asympt( |
520 | 0 | x: f64, |
521 | 0 | recip: DoubleDouble, |
522 | 0 | r_sqrt: DoubleDouble, |
523 | 0 | angle: DoubleDouble, |
524 | 0 | ) -> f64 { |
525 | | const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new( |
526 | | f64::from_bits(0xbc8cbc0d30ebfd15), |
527 | | f64::from_bits(0x3fe9884533d43651), |
528 | | ); |
529 | | |
530 | | const MPI_OVER_4: DoubleDouble = DoubleDouble::new( |
531 | | f64::from_bits(0xbc81a62633145c07), |
532 | | f64::from_bits(0xbfe921fb54442d18), |
533 | | ); |
534 | | |
535 | 0 | let alpha = bessel_0_asympt_alpha(recip); |
536 | 0 | let beta = bessel_0_asympt_beta(recip); |
537 | | |
538 | | // Without full subtraction cancellation happens sometimes |
539 | 0 | let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha); |
540 | 0 | let r0 = DoubleDouble::full_dd_add(angle, x0pi34); |
541 | | |
542 | 0 | let m_cos = cos_dd_small(r0); |
543 | 0 | let z0 = DoubleDouble::quick_mult(beta, m_cos); |
544 | 0 | let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt); |
545 | 0 | let r = DoubleDouble::quick_mult(scale, z0); |
546 | | |
547 | 0 | let p = DoubleDouble::from_exact_add(r.hi, r.lo); |
548 | 0 | let err = f_fmla( |
549 | 0 | p.hi, |
550 | 0 | f64::from_bits(0x3bd0000000000000), // 2^-66 |
551 | 0 | f64::from_bits(0x39e0000000000000), // 2^-97 |
552 | | ); |
553 | 0 | let ub = p.hi + (p.lo + err); |
554 | 0 | let lb = p.hi + (p.lo - err); |
555 | | |
556 | 0 | if ub == lb { |
557 | 0 | return p.to_f64(); |
558 | 0 | } |
559 | 0 | j0_asympt_hard(x) |
560 | 0 | } |
561 | | |
562 | | /* |
563 | | Evaluates: |
564 | | J0 = sqrt(2/(PI*x)) * beta(x) * cos(x - PI/4 - alpha(x)) |
565 | | */ |
566 | | #[cold] |
567 | | #[inline(never)] |
568 | 0 | pub(crate) fn j0_asympt_hard(x: f64) -> f64 { |
569 | | const SQRT_2_OVER_PI: DyadicFloat128 = DyadicFloat128 { |
570 | | sign: DyadicSign::Pos, |
571 | | exponent: -128, |
572 | | mantissa: 0xcc42299e_a1b28468_7e59e280_5d5c7180_u128, |
573 | | }; |
574 | | |
575 | | const MPI_OVER_4: DyadicFloat128 = DyadicFloat128 { |
576 | | sign: DyadicSign::Neg, |
577 | | exponent: -128, |
578 | | mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128, |
579 | | }; |
580 | | |
581 | 0 | let x_dyadic = DyadicFloat128::new_from_f64(x); |
582 | 0 | let recip = DyadicFloat128::accurate_reciprocal(x); |
583 | | |
584 | 0 | let alpha = bessel_0_asympt_alpha_hard(recip); |
585 | 0 | let beta = bessel_0_asympt_beta_hard(recip); |
586 | | |
587 | 0 | let angle = rem2pi_f128(x_dyadic); |
588 | | |
589 | 0 | let x0pi34 = MPI_OVER_4 - alpha; |
590 | 0 | let r0 = angle + x0pi34; |
591 | | |
592 | 0 | let m_sin = cos_f128_small(r0); |
593 | | |
594 | 0 | let z0 = beta * m_sin; |
595 | 0 | let r_sqrt = bessel_rsqrt_hard(x, recip); |
596 | 0 | let scale = SQRT_2_OVER_PI * r_sqrt; |
597 | 0 | let p = scale * z0; |
598 | 0 | p.fast_as_f64() |
599 | 0 | } |
600 | | |
601 | | #[cfg(test)] |
602 | | mod tests { |
603 | | use super::*; |
604 | | |
605 | | #[test] |
606 | | fn test_j0() { |
607 | | assert_eq!(f_j0(f64::EPSILON), 1.0); |
608 | | assert_eq!(f_j0(-0.000000000000000000000532), 1.0); |
609 | | assert_eq!(f_j0(0.0000000000000000000532), 1.0); |
610 | | assert_eq!(f_j0(-2.000976555054876), 0.22332760641907712); |
611 | | assert_eq!(f_j0(-2.3369499004222215E+304), -3.3630754230844632e-155); |
612 | | assert_eq!( |
613 | | f_j0(f64::from_bits(0xd71a31ffe2ff7e9f)), |
614 | | f64::from_bits(0xb2e58532f95056ff) |
615 | | ); |
616 | | assert_eq!(f_j0(6.1795701510782757E+307), 6.075192922402001e-155); |
617 | | assert_eq!(f_j0(6.1795701510782757E+301), 4.118334155030934e-152); |
618 | | assert_eq!(f_j0(6.1795701510782757E+157), 9.5371668900364e-80); |
619 | | assert_eq!(f_j0(79.), -0.08501719554953485); |
620 | | // Without FMA 2.703816901253004e-16 |
621 | | #[cfg(any( |
622 | | all(target_arch = "x86_64", target_feature = "fma"), |
623 | | target_arch = "aarch64" |
624 | | ))] |
625 | | assert_eq!(f_j0(93.463718781944774171190), 2.7038169012530046e-16); |
626 | | assert_eq!(f_j0(99.746819858680596470279979), -8.419106281522749e-17); |
627 | | assert_eq!(f_j0(f64::INFINITY), 0.); |
628 | | assert_eq!(f_j0(f64::NEG_INFINITY), 0.); |
629 | | assert!(f_j0(f64::NAN).is_nan()); |
630 | | } |
631 | | } |