/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/bessel/j1.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | |
30 | | #![allow(clippy::excessive_precision)] |
31 | | |
32 | | use crate::bessel::alpha1::{ |
33 | | bessel_1_asympt_alpha, bessel_1_asympt_alpha_fast, bessel_1_asympt_alpha_hard, |
34 | | }; |
35 | | use crate::bessel::beta1::{ |
36 | | bessel_1_asympt_beta, bessel_1_asympt_beta_fast, bessel_1_asympt_beta_hard, |
37 | | }; |
38 | | use crate::bessel::i0::bessel_rsqrt_hard; |
39 | | use crate::bessel::j1_coeffs::{J1_COEFFS, J1_ZEROS, J1_ZEROS_VALUE}; |
40 | | use crate::bessel::j1_coeffs_taylor::J1_COEFFS_TAYLOR; |
41 | | use crate::common::f_fmla; |
42 | | use crate::double_double::DoubleDouble; |
43 | | use crate::dyadic_float::{DyadicFloat128, DyadicSign}; |
44 | | use crate::polyeval::{f_polyeval8, f_polyeval9, f_polyeval12, f_polyeval19}; |
45 | | use crate::rounding::CpuCeil; |
46 | | use crate::sin_helper::{sin_dd_small, sin_dd_small_fast, sin_f128_small}; |
47 | | use crate::sincos_reduce::{AngleReduced, rem2pi_any, rem2pi_f128}; |
48 | | |
49 | | /// Bessel of the first kind of order 1 |
50 | | /// |
51 | | /// Note about accuracy: |
52 | | /// - Close to zero Bessel have tiny values such that testing against MPFR must be done exactly |
53 | | /// in the same precision, since any nearest representable number have ULP > 0.5, |
54 | | /// for example `J1(0.000000000000000000000000000000000000023509886)` in single precision |
55 | | /// have 0.7 ULP for any number with extended precision that would be represented in f32 |
56 | | /// Same applies to J1(4.4501477170144018E-309) in double precision and some others subnormal numbers |
57 | 0 | pub fn f_j1(x: f64) -> f64 { |
58 | 0 | let ux = x.to_bits().wrapping_shl(1); |
59 | | |
60 | 0 | if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 { |
61 | | // |x| <= f64::EPSILON, |x| == inf, x == NaN |
62 | 0 | if ux <= 0x72338c9356bb0314u64 { |
63 | | // |x| <= 0.000000000000000000000000000000001241 |
64 | | // J1(x) ~ x/2+O[x]^3 |
65 | 0 | return x * 0.5; |
66 | 0 | } |
67 | 0 | if ux <= 0x7960000000000000u64 { |
68 | | // |x| <= f64::EPSILON |
69 | | // J1(x) ~ x/2-x^3/16+O[x]^5 |
70 | 0 | let quad_part_x = x * 0.125; // exact. x / 8 |
71 | 0 | return f_fmla(quad_part_x, -quad_part_x, 0.5) * x; |
72 | 0 | } |
73 | 0 | if x.is_infinite() { |
74 | 0 | return 0.; |
75 | 0 | } |
76 | 0 | return x + f64::NAN; // x == NaN |
77 | 0 | } |
78 | | |
79 | 0 | let ax: u64 = x.to_bits() & 0x7fff_ffff_ffff_ffff; |
80 | | |
81 | 0 | if ax < 0x4052a6784230fcf8u64 { |
82 | | // |x| < 74.60109 |
83 | 0 | if ax < 0x3feccccccccccccd { |
84 | | // |x| < 0.9 |
85 | 0 | return j1_maclaurin_series_fast(x); |
86 | 0 | } |
87 | 0 | return j1_small_argument_fast(x); |
88 | 0 | } |
89 | | |
90 | 0 | j1_asympt_fast(x) |
91 | 0 | } |
92 | | |
93 | | /* |
94 | | Evaluates: |
95 | | J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x)) |
96 | | discarding 1*PI/2 using identities gives: |
97 | | J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x)) |
98 | | |
99 | | to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is: |
100 | | |
101 | | J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x)) |
102 | | */ |
103 | | #[inline] |
104 | 0 | fn j1_asympt_fast(x: f64) -> f64 { |
105 | 0 | let origin_x = x; |
106 | | static SGN: [f64; 2] = [1., -1.]; |
107 | 0 | let sign_scale = SGN[x.is_sign_negative() as usize]; |
108 | 0 | let x = x.abs(); |
109 | | |
110 | | const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new( |
111 | | f64::from_bits(0xbc8cbc0d30ebfd15), |
112 | | f64::from_bits(0x3fe9884533d43651), |
113 | | ); |
114 | | const MPI_OVER_4: DoubleDouble = DoubleDouble::new( |
115 | | f64::from_bits(0xbc81a62633145c07), |
116 | | f64::from_bits(0xbfe921fb54442d18), |
117 | | ); |
118 | | |
119 | 0 | let recip = if x.to_bits() > 0x7fd000000000000u64 { |
120 | 0 | DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_safe_div(4.0, x), 0.25) |
121 | | } else { |
122 | 0 | DoubleDouble::from_recip(x) |
123 | | }; |
124 | | |
125 | 0 | let alpha = bessel_1_asympt_alpha_fast(recip); |
126 | 0 | let beta = bessel_1_asympt_beta_fast(recip); |
127 | | |
128 | 0 | let AngleReduced { angle } = rem2pi_any(x); |
129 | | |
130 | | // Without full subtraction cancellation happens sometimes |
131 | 0 | let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha); |
132 | 0 | let r0 = DoubleDouble::full_dd_add(angle, x0pi34); |
133 | | |
134 | 0 | let m_sin = sin_dd_small_fast(r0); |
135 | 0 | let z0 = DoubleDouble::quick_mult(beta, m_sin); |
136 | 0 | let r_sqrt = DoubleDouble::from_rsqrt_fast(x); |
137 | 0 | let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt); |
138 | 0 | let p = DoubleDouble::quick_mult(scale, z0); |
139 | | |
140 | 0 | let err = f_fmla( |
141 | 0 | p.hi, |
142 | 0 | f64::from_bits(0x3be0000000000000), // 2^-65 |
143 | 0 | f64::from_bits(0x3a60000000000000), // 2^-89 |
144 | | ); |
145 | 0 | let ub = p.hi + (p.lo + err); |
146 | 0 | let lb = p.hi + (p.lo - err); |
147 | | |
148 | 0 | if ub == lb { |
149 | 0 | return p.to_f64() * sign_scale; |
150 | 0 | } |
151 | | |
152 | 0 | j1_asympt(origin_x, recip, r_sqrt, angle) |
153 | 0 | } |
154 | | |
155 | | /* |
156 | | Evaluates: |
157 | | J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x)) |
158 | | discarding 1*PI/2 using identities gives: |
159 | | J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x)) |
160 | | |
161 | | to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is: |
162 | | |
163 | | J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x)) |
164 | | */ |
165 | 0 | fn j1_asympt(x: f64, recip: DoubleDouble, r_sqrt: DoubleDouble, angle: DoubleDouble) -> f64 { |
166 | 0 | let origin_x = x; |
167 | | static SGN: [f64; 2] = [1., -1.]; |
168 | 0 | let sign_scale = SGN[x.is_sign_negative() as usize]; |
169 | | |
170 | | const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new( |
171 | | f64::from_bits(0xbc8cbc0d30ebfd15), |
172 | | f64::from_bits(0x3fe9884533d43651), |
173 | | ); |
174 | | const MPI_OVER_4: DoubleDouble = DoubleDouble::new( |
175 | | f64::from_bits(0xbc81a62633145c07), |
176 | | f64::from_bits(0xbfe921fb54442d18), |
177 | | ); |
178 | | |
179 | 0 | let alpha = bessel_1_asympt_alpha(recip); |
180 | 0 | let beta = bessel_1_asympt_beta(recip); |
181 | | |
182 | | // Without full subtraction cancellation happens sometimes |
183 | 0 | let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha); |
184 | 0 | let r0 = DoubleDouble::full_dd_add(angle, x0pi34); |
185 | | |
186 | 0 | let m_sin = sin_dd_small(r0); |
187 | 0 | let z0 = DoubleDouble::quick_mult(beta, m_sin); |
188 | 0 | let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt); |
189 | 0 | let r = DoubleDouble::quick_mult(scale, z0); |
190 | | |
191 | 0 | let p = DoubleDouble::from_exact_add(r.hi, r.lo); |
192 | | |
193 | 0 | let err = f_fmla( |
194 | 0 | p.hi, |
195 | 0 | f64::from_bits(0x3bc0000000000000), // 2^-67 |
196 | 0 | f64::from_bits(0x39c0000000000000), // 2^-99 |
197 | | ); |
198 | | |
199 | 0 | let ub = p.hi + (p.lo + err); |
200 | 0 | let lb = p.hi + (p.lo - err); |
201 | | |
202 | 0 | if ub == lb { |
203 | 0 | return p.to_f64() * sign_scale; |
204 | 0 | } |
205 | | |
206 | 0 | j1_asympt_hard(origin_x) |
207 | 0 | } |
208 | | |
209 | | /** |
210 | | Generated in Sollya: |
211 | | ```text |
212 | | pretty = proc(u) { |
213 | | return ~(floor(u*1000)/1000); |
214 | | }; |
215 | | |
216 | | bessel_j1 = library("./cmake-build-release/libbessel_sollya.dylib"); |
217 | | |
218 | | f = bessel_j1(x)/x; |
219 | | d = [0, 0.921]; |
220 | | w = 1; |
221 | | pf = fpminimax(f, [|0,2,4,6,8,10,12,14,16,18,20,22,24|], [|107, 107, 107, 107, 107, D...|], d, absolute, floating); |
222 | | |
223 | | w = 1; |
224 | | or_f = bessel_j1(x); |
225 | | pf1 = pf * x; |
226 | | err_p = -log2(dirtyinfnorm(pf1*w-or_f, d)); |
227 | | print ("relative error:", pretty(err_p)); |
228 | | |
229 | | for i from 0 to degree(pf) by 2 do { |
230 | | print("'", coeff(pf, i), "',"); |
231 | | }; |
232 | | ``` |
233 | | See ./notes/bessel_sollya/bessel_j1_at_zero_fast.sollya |
234 | | **/ |
235 | | #[inline] |
236 | 0 | pub(crate) fn j1_maclaurin_series_fast(x: f64) -> f64 { |
237 | | const C0: DoubleDouble = DoubleDouble::from_bit_pair((0x3b30e9e087200000, 0x3fe0000000000000)); |
238 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
239 | 0 | let p = f_polyeval12( |
240 | 0 | x2.hi, |
241 | 0 | f64::from_bits(0xbfb0000000000000), |
242 | 0 | f64::from_bits(0x3f65555555555555), |
243 | 0 | f64::from_bits(0xbf0c71c71c71c45e), |
244 | 0 | f64::from_bits(0x3ea6c16c16b82b02), |
245 | 0 | f64::from_bits(0xbe3845c87ec0cbef), |
246 | 0 | f64::from_bits(0x3dc27e0313e8534c), |
247 | 0 | f64::from_bits(0xbd4443dd2d0305d0), |
248 | 0 | f64::from_bits(0xbd0985a435fe9aa1), |
249 | 0 | f64::from_bits(0x3d10c82d92c46d30), |
250 | 0 | f64::from_bits(0xbd0aa3684321f219), |
251 | 0 | f64::from_bits(0x3cf8351f29ac345a), |
252 | 0 | f64::from_bits(0xbcd333fe6cd52c9f), |
253 | | ); |
254 | 0 | let mut z = DoubleDouble::mul_f64_add(x2, p, C0); |
255 | 0 | z = DoubleDouble::quick_mult_f64(z, x); |
256 | | |
257 | | // squaring error (2^-56) + poly error 2^-75 |
258 | 0 | let err = f_fmla( |
259 | 0 | x2.hi, |
260 | 0 | f64::from_bits(0x3c70000000000000), // 2^-56 |
261 | 0 | f64::from_bits(0x3b40000000000000), // 2^-75 |
262 | | ); |
263 | 0 | let ub = z.hi + (z.lo + err); |
264 | 0 | let lb = z.hi + (z.lo - err); |
265 | | |
266 | 0 | if ub == lb { |
267 | 0 | return z.to_f64(); |
268 | 0 | } |
269 | 0 | j1_maclaurin_series(x) |
270 | 0 | } |
271 | | |
272 | | /** |
273 | | Generated in Sollya: |
274 | | ```text |
275 | | pretty = proc(u) { |
276 | | return ~(floor(u*1000)/1000); |
277 | | }; |
278 | | |
279 | | bessel_j1 = library("./cmake-build-release/libbessel_sollya.dylib"); |
280 | | |
281 | | f = bessel_j1(x)/x; |
282 | | d = [0, 0.921]; |
283 | | w = 1; |
284 | | pf = fpminimax(f, [|0,2,4,6,8,10,12,14,16,18,20,22,24|], [|107, 107, 107, 107, 107, D...|], d, absolute, floating); |
285 | | |
286 | | w = 1; |
287 | | or_f = bessel_j1(x); |
288 | | pf1 = pf * x; |
289 | | err_p = -log2(dirtyinfnorm(pf1*w-or_f, d)); |
290 | | print ("relative error:", pretty(err_p)); |
291 | | |
292 | | for i from 0 to degree(pf) by 2 do { |
293 | | print("'", coeff(pf, i), "',"); |
294 | | }; |
295 | | ``` |
296 | | See ./notes/bessel_sollya/bessel_j1_at_zero.sollya |
297 | | **/ |
298 | 0 | pub(crate) fn j1_maclaurin_series(x: f64) -> f64 { |
299 | 0 | let origin_x = x; |
300 | | static SGN: [f64; 2] = [1., -1.]; |
301 | 0 | let sign_scale = SGN[x.is_sign_negative() as usize]; |
302 | 0 | let x = x.abs(); |
303 | | |
304 | | const CL: [(u64, u64); 5] = [ |
305 | | (0xb930000000000000, 0x3fe0000000000000), |
306 | | (0x39c8e80000000000, 0xbfb0000000000000), |
307 | | (0x3c05555554f3add7, 0x3f65555555555555), |
308 | | (0xbbac71c4eb0f8c94, 0xbf0c71c71c71c71c), |
309 | | (0xbb3f56b7a43206d4, 0x3ea6c16c16c16c17), |
310 | | ]; |
311 | | |
312 | 0 | let dx2 = DoubleDouble::from_exact_mult(x, x); |
313 | | |
314 | 0 | let p = f_polyeval8( |
315 | 0 | dx2.hi, |
316 | 0 | f64::from_bits(0xbe3845c8a0ce5129), |
317 | 0 | f64::from_bits(0x3dc27e4fb7789ea2), |
318 | 0 | f64::from_bits(0xbd4522a43f633af1), |
319 | 0 | f64::from_bits(0x3cc2c97589d53f97), |
320 | 0 | f64::from_bits(0xbc3ab8151dca7912), |
321 | 0 | f64::from_bits(0x3baf08732286d1d4), |
322 | 0 | f64::from_bits(0xbb10ac65637413f4), |
323 | 0 | f64::from_bits(0xbae4d8336e4f779c), |
324 | | ); |
325 | | |
326 | 0 | let mut p_e = DoubleDouble::mul_f64_add(dx2, p, DoubleDouble::from_bit_pair(CL[4])); |
327 | 0 | p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[3])); |
328 | 0 | p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[2])); |
329 | 0 | p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[1])); |
330 | 0 | p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[0])); |
331 | | |
332 | 0 | let p = DoubleDouble::quick_mult_f64(p_e, x); |
333 | | |
334 | 0 | let err = f_fmla( |
335 | 0 | p.hi, |
336 | 0 | f64::from_bits(0x3bd0000000000000), // 2^-66 |
337 | 0 | f64::from_bits(0x3a00000000000000), // 2^-95 |
338 | | ); |
339 | 0 | let ub = p.hi + (p.lo + err); |
340 | 0 | let lb = p.hi + (p.lo - err); |
341 | 0 | if ub != lb { |
342 | 0 | return j1_maclaurin_series_hard(origin_x); |
343 | 0 | } |
344 | | |
345 | 0 | p.to_f64() * sign_scale |
346 | 0 | } |
347 | | |
348 | | /** |
349 | | Taylor expansion at 0 |
350 | | |
351 | | Generated by SageMath: |
352 | | ```python |
353 | | def print_expansion_at_0(): |
354 | | print(f"static C: [DyadicFloat128; 13] = ") |
355 | | from mpmath import mp, j1, taylor, expm1 |
356 | | poly = taylor(lambda val: j1(val), 0, 26) |
357 | | real_i = 0 |
358 | | print("[") |
359 | | for i in range(1, len(poly), 2): |
360 | | print_dyadic(poly[i]) |
361 | | real_i = real_i + 1 |
362 | | print("],") |
363 | | |
364 | | print("];") |
365 | | |
366 | | mp.prec = 180 |
367 | | |
368 | | print_expansion_at_0() |
369 | | ``` |
370 | | **/ |
371 | | #[cold] |
372 | | #[inline(never)] |
373 | 0 | fn j1_maclaurin_series_hard(x: f64) -> f64 { |
374 | | static SGN: [f64; 2] = [1., -1.]; |
375 | 0 | let sign_scale = SGN[x.is_sign_negative() as usize]; |
376 | 0 | let x = x.abs(); |
377 | | static C: [DyadicFloat128; 13] = [ |
378 | | DyadicFloat128 { |
379 | | sign: DyadicSign::Pos, |
380 | | exponent: -128, |
381 | | mantissa: 0x80000000_00000000_00000000_00000000_u128, |
382 | | }, |
383 | | DyadicFloat128 { |
384 | | sign: DyadicSign::Neg, |
385 | | exponent: -131, |
386 | | mantissa: 0x80000000_00000000_00000000_00000000_u128, |
387 | | }, |
388 | | DyadicFloat128 { |
389 | | sign: DyadicSign::Pos, |
390 | | exponent: -136, |
391 | | mantissa: 0xaaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaab_u128, |
392 | | }, |
393 | | DyadicFloat128 { |
394 | | sign: DyadicSign::Neg, |
395 | | exponent: -142, |
396 | | mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128, |
397 | | }, |
398 | | DyadicFloat128 { |
399 | | sign: DyadicSign::Pos, |
400 | | exponent: -148, |
401 | | mantissa: 0xb60b60b6_0b60b60b_60b60b60_b60b60b6_u128, |
402 | | }, |
403 | | DyadicFloat128 { |
404 | | sign: DyadicSign::Neg, |
405 | | exponent: -155, |
406 | | mantissa: 0xc22e4506_72894ab6_cd8efb11_d33f5618_u128, |
407 | | }, |
408 | | DyadicFloat128 { |
409 | | sign: DyadicSign::Pos, |
410 | | exponent: -162, |
411 | | mantissa: 0x93f27dbb_c4fae397_780b69f5_333c725b_u128, |
412 | | }, |
413 | | DyadicFloat128 { |
414 | | sign: DyadicSign::Neg, |
415 | | exponent: -170, |
416 | | mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128, |
417 | | }, |
418 | | DyadicFloat128 { |
419 | | sign: DyadicSign::Pos, |
420 | | exponent: -178, |
421 | | mantissa: 0x964bac6d_7ae67d8d_aec68405_485dea03_u128, |
422 | | }, |
423 | | DyadicFloat128 { |
424 | | sign: DyadicSign::Neg, |
425 | | exponent: -187, |
426 | | mantissa: 0xd5c0f53a_fe6fa17f_8c7b0b68_39691f4e_u128, |
427 | | }, |
428 | | DyadicFloat128 { |
429 | | sign: DyadicSign::Pos, |
430 | | exponent: -196, |
431 | | mantissa: 0xf8bb4be7_8e7896b0_58fee362_01a4370c_u128, |
432 | | }, |
433 | | DyadicFloat128 { |
434 | | sign: DyadicSign::Neg, |
435 | | exponent: -205, |
436 | | mantissa: 0xf131bdf7_cff8d02e_e1ef6820_f9d58ab6_u128, |
437 | | }, |
438 | | DyadicFloat128 { |
439 | | sign: DyadicSign::Pos, |
440 | | exponent: -214, |
441 | | mantissa: 0xc5e72c48_0d1aec75_3caa2e0d_edd008ca_u128, |
442 | | }, |
443 | | ]; |
444 | | |
445 | 0 | let rx = DyadicFloat128::new_from_f64(x); |
446 | 0 | let dx = rx * rx; |
447 | | |
448 | 0 | let mut p = C[12]; |
449 | 0 | for i in (0..12).rev() { |
450 | 0 | p = dx * p + C[i]; |
451 | 0 | } |
452 | | |
453 | 0 | (p * rx).fast_as_f64() * sign_scale |
454 | 0 | } |
455 | | |
456 | | /// This method on small range searches for nearest zero or extremum. |
457 | | /// Then picks stored series expansion at the point end evaluates the poly at the point. |
458 | | #[inline] |
459 | 0 | pub(crate) fn j1_small_argument_fast(x: f64) -> f64 { |
460 | | static SIGN: [f64; 2] = [1., -1.]; |
461 | 0 | let sign_scale = SIGN[x.is_sign_negative() as usize]; |
462 | 0 | let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff); |
463 | | |
464 | | // let avg_step = 74.60109 / 47.0; |
465 | | // let inv_step = 1.0 / avg_step; |
466 | | |
467 | | const INV_STEP: f64 = 0.6300176043004198; |
468 | | |
469 | 0 | let fx = x_abs * INV_STEP; |
470 | | const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64; |
471 | 0 | let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() }; |
472 | 0 | let idx1 = unsafe { |
473 | 0 | fx.cpu_ceil() |
474 | 0 | .min(J1_ZEROS_COUNT) |
475 | 0 | .to_int_unchecked::<usize>() |
476 | | }; |
477 | | |
478 | 0 | let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]); |
479 | 0 | let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]); |
480 | | |
481 | 0 | let dist0 = (found_zero0.hi - x_abs).abs(); |
482 | 0 | let dist1 = (found_zero1.hi - x_abs).abs(); |
483 | | |
484 | 0 | let (found_zero, idx, dist) = if dist0 < dist1 { |
485 | 0 | (found_zero0, idx0, dist0) |
486 | | } else { |
487 | 0 | (found_zero1, idx1, dist1) |
488 | | }; |
489 | | |
490 | 0 | if idx == 0 { |
491 | 0 | return j1_maclaurin_series_fast(x); |
492 | 0 | } |
493 | | |
494 | 0 | let r = DoubleDouble::full_add_f64(-found_zero, x_abs); |
495 | | |
496 | | // We hit exact zero, value, better to return it directly |
497 | 0 | if dist == 0. { |
498 | 0 | return f64::from_bits(J1_ZEROS_VALUE[idx]) * sign_scale; |
499 | 0 | } |
500 | | |
501 | 0 | let is_zero_too_close = dist.abs() < 1e-3; |
502 | | |
503 | 0 | let c = if is_zero_too_close { |
504 | 0 | &J1_COEFFS_TAYLOR[idx - 1] |
505 | | } else { |
506 | 0 | &J1_COEFFS[idx - 1] |
507 | | }; |
508 | | |
509 | 0 | let p = f_polyeval19( |
510 | 0 | r.hi, |
511 | 0 | f64::from_bits(c[5].1), |
512 | 0 | f64::from_bits(c[6].1), |
513 | 0 | f64::from_bits(c[7].1), |
514 | 0 | f64::from_bits(c[8].1), |
515 | 0 | f64::from_bits(c[9].1), |
516 | 0 | f64::from_bits(c[10].1), |
517 | 0 | f64::from_bits(c[11].1), |
518 | 0 | f64::from_bits(c[12].1), |
519 | 0 | f64::from_bits(c[13].1), |
520 | 0 | f64::from_bits(c[14].1), |
521 | 0 | f64::from_bits(c[15].1), |
522 | 0 | f64::from_bits(c[16].1), |
523 | 0 | f64::from_bits(c[17].1), |
524 | 0 | f64::from_bits(c[18].1), |
525 | 0 | f64::from_bits(c[19].1), |
526 | 0 | f64::from_bits(c[20].1), |
527 | 0 | f64::from_bits(c[21].1), |
528 | 0 | f64::from_bits(c[22].1), |
529 | 0 | f64::from_bits(c[23].1), |
530 | | ); |
531 | | |
532 | 0 | let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[4])); |
533 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3])); |
534 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2])); |
535 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1])); |
536 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0])); |
537 | 0 | let err = f_fmla( |
538 | 0 | z.hi, |
539 | 0 | f64::from_bits(0x3c70000000000000), // 2^-56 |
540 | 0 | f64::from_bits(0x3bf0000000000000), // 2^-64 |
541 | | ); |
542 | 0 | let ub = z.hi + (z.lo + err); |
543 | 0 | let lb = z.hi + (z.lo - err); |
544 | | |
545 | 0 | if ub == lb { |
546 | 0 | return z.to_f64() * sign_scale; |
547 | 0 | } |
548 | | |
549 | 0 | j1_small_argument_dd(sign_scale, r, c) |
550 | 0 | } |
551 | | |
552 | 0 | fn j1_small_argument_dd(sign_scale: f64, r: DoubleDouble, c0: &[(u64, u64); 24]) -> f64 { |
553 | 0 | let c = &c0[15..]; |
554 | | |
555 | 0 | let p0 = f_polyeval9( |
556 | 0 | r.to_f64(), |
557 | 0 | f64::from_bits(c[0].1), |
558 | 0 | f64::from_bits(c[1].1), |
559 | 0 | f64::from_bits(c[2].1), |
560 | 0 | f64::from_bits(c[3].1), |
561 | 0 | f64::from_bits(c[4].1), |
562 | 0 | f64::from_bits(c[5].1), |
563 | 0 | f64::from_bits(c[6].1), |
564 | 0 | f64::from_bits(c[7].1), |
565 | 0 | f64::from_bits(c[8].1), |
566 | | ); |
567 | | |
568 | 0 | let c = c0; |
569 | | |
570 | 0 | let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14])); |
571 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13])); |
572 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12])); |
573 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11])); |
574 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10])); |
575 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9])); |
576 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8])); |
577 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7])); |
578 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6])); |
579 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5])); |
580 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4])); |
581 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3])); |
582 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2])); |
583 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1])); |
584 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0])); |
585 | | |
586 | 0 | let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo); |
587 | 0 | let err = f_fmla( |
588 | 0 | p.hi, |
589 | 0 | f64::from_bits(0x3c10000000000000), // 2^-62 |
590 | 0 | f64::from_bits(0x3a00000000000000), // 2^-95 |
591 | | ); |
592 | 0 | let ub = p.hi + (p.lo + err); |
593 | 0 | let lb = p.hi + (p.lo - err); |
594 | 0 | if ub != lb { |
595 | 0 | return j1_small_argument_path_hard(sign_scale, r, c); |
596 | 0 | } |
597 | 0 | p.to_f64() * sign_scale |
598 | 0 | } |
599 | | |
600 | | #[cold] |
601 | | #[inline(never)] |
602 | 0 | fn j1_small_argument_path_hard(sign_scale: f64, r: DoubleDouble, c: &[(u64, u64); 24]) -> f64 { |
603 | 0 | let mut p = DoubleDouble::from_bit_pair(c[23]); |
604 | 0 | for i in (0..23).rev() { |
605 | 0 | p = DoubleDouble::mul_add(r, p, DoubleDouble::from_bit_pair(c[i])); |
606 | 0 | p = DoubleDouble::from_exact_add(p.hi, p.lo); |
607 | 0 | } |
608 | 0 | p.to_f64() * sign_scale |
609 | 0 | } |
610 | | |
611 | | /* |
612 | | Evaluates: |
613 | | J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x)) |
614 | | discarding 1*PI/2 using identities gives: |
615 | | J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x)) |
616 | | |
617 | | to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is: |
618 | | |
619 | | J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x)) |
620 | | |
621 | | This method is required for situations where x*x or 1/(x*x) will overflow |
622 | | */ |
623 | | #[cold] |
624 | | #[inline(never)] |
625 | 0 | fn j1_asympt_hard(x: f64) -> f64 { |
626 | | static SGN: [f64; 2] = [1., -1.]; |
627 | 0 | let sign_scale = SGN[x.is_sign_negative() as usize]; |
628 | 0 | let x = x.abs(); |
629 | | |
630 | | const SQRT_2_OVER_PI: DyadicFloat128 = DyadicFloat128 { |
631 | | sign: DyadicSign::Pos, |
632 | | exponent: -128, |
633 | | mantissa: 0xcc42299e_a1b28468_7e59e280_5d5c7180_u128, |
634 | | }; |
635 | | |
636 | | const MPI_OVER_4: DyadicFloat128 = DyadicFloat128 { |
637 | | sign: DyadicSign::Neg, |
638 | | exponent: -128, |
639 | | mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128, |
640 | | }; |
641 | | |
642 | 0 | let x_dyadic = DyadicFloat128::new_from_f64(x); |
643 | 0 | let recip = DyadicFloat128::accurate_reciprocal(x); |
644 | | |
645 | 0 | let alpha = bessel_1_asympt_alpha_hard(recip); |
646 | 0 | let beta = bessel_1_asympt_beta_hard(recip); |
647 | | |
648 | 0 | let angle = rem2pi_f128(x_dyadic); |
649 | | |
650 | 0 | let x0pi34 = MPI_OVER_4 - alpha; |
651 | 0 | let r0 = angle + x0pi34; |
652 | | |
653 | 0 | let m_sin = sin_f128_small(r0); |
654 | | |
655 | 0 | let z0 = beta * m_sin; |
656 | 0 | let r_sqrt = bessel_rsqrt_hard(x, recip); |
657 | 0 | let scale = SQRT_2_OVER_PI * r_sqrt; |
658 | 0 | let p = scale * z0; |
659 | 0 | p.fast_as_f64() * sign_scale |
660 | 0 | } |
661 | | |
662 | | #[cfg(test)] |
663 | | mod tests { |
664 | | use super::*; |
665 | | |
666 | | #[test] |
667 | | fn test_j1() { |
668 | | assert_eq!(f_j1(0.000000000000000000000000000000001241), 6.205e-34); |
669 | | assert_eq!(f_j1(0.0000000000000000000000000000004321), 2.1605e-31); |
670 | | assert_eq!(f_j1(0.00000000000000000004321), 2.1605e-20); |
671 | | assert_eq!(f_j1(73.81695991658546), -0.06531447184607607); |
672 | | assert_eq!(f_j1(0.01), 0.004999937500260416); |
673 | | assert_eq!(f_j1(0.9), 0.4059495460788057); |
674 | | assert_eq!( |
675 | | f_j1(162605674999778540000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.), |
676 | | 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008686943178258183 |
677 | | ); |
678 | | assert_eq!(f_j1(3.831705970207517), -1.8501090915423025e-15); |
679 | | assert_eq!(f_j1(-3.831705970207517), 1.8501090915423025e-15); |
680 | | assert_eq!(f_j1(-6.1795701510782757E+307), 8.130935041593236e-155); |
681 | | assert_eq!( |
682 | | f_j1(0.000000000000000000000000000000000000008827127), |
683 | | 0.0000000000000000000000000000000000000044135635 |
684 | | ); |
685 | | assert_eq!( |
686 | | f_j1(-0.000000000000000000000000000000000000008827127), |
687 | | -0.0000000000000000000000000000000000000044135635 |
688 | | ); |
689 | | assert_eq!(f_j1(5.4), -0.3453447907795863); |
690 | | assert_eq!( |
691 | | f_j1(77.743162408196766932633181568235159), |
692 | | 0.09049267898021947 |
693 | | ); |
694 | | assert_eq!( |
695 | | f_j1(84.027189586293545175976760219782591), |
696 | | 0.0870430264022591 |
697 | | ); |
698 | | assert_eq!(f_j1(f64::NEG_INFINITY), 0.0); |
699 | | assert_eq!(f_j1(f64::INFINITY), 0.0); |
700 | | assert!(f_j1(f64::NAN).is_nan()); |
701 | | } |
702 | | } |