/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.28/src/bessel/jincpif.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | #![allow(clippy::too_many_arguments)] |
30 | | use crate::bessel::j0f::j1f_rsqrt; |
31 | | use crate::bessel::j1_coeffs::{J1_ZEROS, J1_ZEROS_VALUE}; |
32 | | use crate::bessel::j1f::{j1f_asympt_alpha, j1f_asympt_beta}; |
33 | | use crate::bessel::j1f_coeffs::J1F_COEFFS; |
34 | | use crate::bessel::trigo_bessel::sin_small; |
35 | | use crate::common::f_fmla; |
36 | | use crate::double_double::DoubleDouble; |
37 | | use crate::rounding::CpuCeil; |
38 | | use crate::rounding::CpuRound; |
39 | | |
40 | | pub(crate) trait JincpifBackend { |
41 | | fn fma(&self, x: f64, y: f64, z: f64) -> f64; |
42 | | fn round(&self, x: f64) -> f64; |
43 | | fn ceil(&self, x: f64) -> f64; |
44 | | fn polyeval6(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64, a5: f64) -> f64; |
45 | | fn polyeval14( |
46 | | &self, |
47 | | x: f64, |
48 | | a0: f64, |
49 | | a1: f64, |
50 | | a2: f64, |
51 | | a3: f64, |
52 | | a4: f64, |
53 | | a5: f64, |
54 | | a6: f64, |
55 | | a7: f64, |
56 | | a8: f64, |
57 | | a9: f64, |
58 | | a10: f64, |
59 | | a11: f64, |
60 | | a12: f64, |
61 | | a13: f64, |
62 | | ) -> f64; |
63 | | } |
64 | | |
65 | | struct GenJincpifBackend {} |
66 | | |
67 | | impl JincpifBackend for GenJincpifBackend { |
68 | | #[inline(always)] |
69 | 0 | fn fma(&self, x: f64, y: f64, z: f64) -> f64 { |
70 | 0 | f_fmla(x, y, z) |
71 | 0 | } |
72 | | |
73 | | #[inline(always)] |
74 | 0 | fn round(&self, x: f64) -> f64 { |
75 | 0 | x.cpu_round() |
76 | 0 | } |
77 | | |
78 | | #[inline(always)] |
79 | 0 | fn ceil(&self, x: f64) -> f64 { |
80 | 0 | x.cpu_ceil() |
81 | 0 | } |
82 | | |
83 | | #[inline(always)] |
84 | 0 | fn polyeval6(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64, a5: f64) -> f64 { |
85 | | use crate::polyeval::f_polyeval6; |
86 | 0 | f_polyeval6(x, a0, a1, a2, a3, a4, a5) |
87 | 0 | } |
88 | | |
89 | | #[inline(always)] |
90 | 0 | fn polyeval14( |
91 | 0 | &self, |
92 | 0 | x: f64, |
93 | 0 | a0: f64, |
94 | 0 | a1: f64, |
95 | 0 | a2: f64, |
96 | 0 | a3: f64, |
97 | 0 | a4: f64, |
98 | 0 | a5: f64, |
99 | 0 | a6: f64, |
100 | 0 | a7: f64, |
101 | 0 | a8: f64, |
102 | 0 | a9: f64, |
103 | 0 | a10: f64, |
104 | 0 | a11: f64, |
105 | 0 | a12: f64, |
106 | 0 | a13: f64, |
107 | 0 | ) -> f64 { |
108 | | use crate::polyeval::f_polyeval14; |
109 | 0 | f_polyeval14( |
110 | 0 | x, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, |
111 | | ) |
112 | 0 | } |
113 | | } |
114 | | |
115 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
116 | | struct FmaJincpifBackend {} |
117 | | |
118 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
119 | | impl JincpifBackend for FmaJincpifBackend { |
120 | | #[inline(always)] |
121 | 0 | fn fma(&self, x: f64, y: f64, z: f64) -> f64 { |
122 | 0 | f64::mul_add(x, y, z) |
123 | 0 | } |
124 | | |
125 | | #[inline(always)] |
126 | 0 | fn round(&self, x: f64) -> f64 { |
127 | 0 | x.round() |
128 | 0 | } |
129 | | |
130 | | #[inline(always)] |
131 | 0 | fn ceil(&self, x: f64) -> f64 { |
132 | 0 | x.ceil() |
133 | 0 | } |
134 | | |
135 | | #[inline(always)] |
136 | 0 | fn polyeval6(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64, a5: f64) -> f64 { |
137 | | use crate::polyeval::d_polyeval6; |
138 | 0 | d_polyeval6(x, a0, a1, a2, a3, a4, a5) |
139 | 0 | } |
140 | | |
141 | | #[inline(always)] |
142 | 0 | fn polyeval14( |
143 | 0 | &self, |
144 | 0 | x: f64, |
145 | 0 | a0: f64, |
146 | 0 | a1: f64, |
147 | 0 | a2: f64, |
148 | 0 | a3: f64, |
149 | 0 | a4: f64, |
150 | 0 | a5: f64, |
151 | 0 | a6: f64, |
152 | 0 | a7: f64, |
153 | 0 | a8: f64, |
154 | 0 | a9: f64, |
155 | 0 | a10: f64, |
156 | 0 | a11: f64, |
157 | 0 | a12: f64, |
158 | 0 | a13: f64, |
159 | 0 | ) -> f64 { |
160 | | use crate::polyeval::d_polyeval14; |
161 | 0 | d_polyeval14( |
162 | 0 | x, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, |
163 | | ) |
164 | 0 | } |
165 | | } |
166 | | |
167 | | #[inline(always)] |
168 | 0 | fn jincpif_gen<B: JincpifBackend>(x: f32, backend: B) -> f32 { |
169 | 0 | let ux = x.to_bits().wrapping_shl(1); |
170 | 0 | if ux >= 0xffu32 << 24 || ux <= 0x6800_0000u32 { |
171 | | // |x| <= f32::EPSILON, |x| == inf, |x| == NaN |
172 | 0 | if ux <= 0x6800_0000u32 { |
173 | | // |x| == 0 |
174 | 0 | return 1.; |
175 | 0 | } |
176 | 0 | if x.is_infinite() { |
177 | 0 | return 0.; |
178 | 0 | } |
179 | 0 | return x + f32::NAN; // x == NaN |
180 | 0 | } |
181 | | |
182 | 0 | let ax = x.to_bits() & 0x7fff_ffff; |
183 | | |
184 | 0 | if ax < 0x429533c2u32 { |
185 | | // |x| < 74.60109 |
186 | 0 | if ax <= 0x3e800000u32 { |
187 | | // |x| < 0.25 |
188 | 0 | return jincf_near_zero(f32::from_bits(ax), &backend); |
189 | 0 | } |
190 | 0 | let scaled_pix = f32::from_bits(ax) * std::f32::consts::PI; // just test boundaries |
191 | 0 | if scaled_pix < 74.60109 { |
192 | 0 | return jincpif_small_argument(f32::from_bits(ax), &backend); |
193 | 0 | } |
194 | 0 | } |
195 | | |
196 | 0 | jincpif_asympt(f32::from_bits(ax), &backend) as f32 |
197 | 0 | } Unexecuted instantiation: pxfm::bessel::jincpif::jincpif_gen::<pxfm::bessel::jincpif::FmaJincpifBackend> Unexecuted instantiation: pxfm::bessel::jincpif::jincpif_gen::<pxfm::bessel::jincpif::GenJincpifBackend> |
198 | | |
199 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
200 | | #[target_feature(enable = "avx", enable = "fma")] |
201 | 0 | unsafe fn jincpif_fma_impl(x: f32) -> f32 { |
202 | 0 | jincpif_gen(x, FmaJincpifBackend {}) |
203 | 0 | } |
204 | | |
205 | | /// Normalized jinc 2*J1(PI\*x)/(pi\*x) |
206 | | /// |
207 | 0 | pub fn f_jincpif(x: f32) -> f32 { |
208 | | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] |
209 | | { |
210 | | jincpif_gen(x, GenJincpifBackend {}) |
211 | | } |
212 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
213 | | { |
214 | | use std::sync::OnceLock; |
215 | | static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new(); |
216 | 0 | let q = EXECUTOR.get_or_init(|| { |
217 | 0 | if std::arch::is_x86_feature_detected!("avx") |
218 | 0 | && std::arch::is_x86_feature_detected!("fma") |
219 | | { |
220 | 0 | jincpif_fma_impl |
221 | | } else { |
222 | 0 | fn def_jincpif(x: f32) -> f32 { |
223 | 0 | jincpif_gen(x, GenJincpifBackend {}) |
224 | 0 | } |
225 | 0 | def_jincpif |
226 | | } |
227 | 0 | }); |
228 | 0 | unsafe { q(x) } |
229 | | } |
230 | 0 | } |
231 | | |
232 | | #[inline(always)] |
233 | 0 | fn jincf_near_zero<B: JincpifBackend>(x: f32, backend: &B) -> f32 { |
234 | 0 | let dx = x as f64; |
235 | | // Generated in Wolfram Mathematica: |
236 | | // <<FunctionApproximations` |
237 | | // ClearAll["Global`*"] |
238 | | // f[x_]:=2*BesselJ[1,x*Pi]/(x*Pi) |
239 | | // {err,approx}=MiniMaxApproximation[f[z],{z,{2^-23,0.3},5,5},WorkingPrecision->60] |
240 | | // poly=Numerator[approx][[1]]; |
241 | | // coeffs=CoefficientList[poly,z]; |
242 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
243 | | // poly=Denominator[approx][[1]]; |
244 | | // coeffs=CoefficientList[poly,z]; |
245 | | // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
246 | 0 | let p_num = backend.polyeval6( |
247 | 0 | dx, |
248 | 0 | f64::from_bits(0x3ff0000000000002), |
249 | 0 | f64::from_bits(0xbfe46cd1822a5aa0), |
250 | 0 | f64::from_bits(0xbfee583c923dc6f4), |
251 | 0 | f64::from_bits(0x3fe3834f47496519), |
252 | 0 | f64::from_bits(0x3fc8118468756e6f), |
253 | 0 | f64::from_bits(0xbfbfaff09f13df88), |
254 | | ); |
255 | 0 | let p_den = backend.polyeval6( |
256 | 0 | dx, |
257 | 0 | f64::from_bits(0x3ff0000000000000), |
258 | 0 | f64::from_bits(0xbfe46cd1822a4cb0), |
259 | 0 | f64::from_bits(0x3fd2447a026f477a), |
260 | 0 | f64::from_bits(0xbfc6bdf2192404e5), |
261 | 0 | f64::from_bits(0x3fa0cf182218e448), |
262 | 0 | f64::from_bits(0xbf939ab46c3f7a7d), |
263 | | ); |
264 | 0 | (p_num / p_den) as f32 |
265 | 0 | } Unexecuted instantiation: pxfm::bessel::jincpif::jincf_near_zero::<pxfm::bessel::jincpif::FmaJincpifBackend> Unexecuted instantiation: pxfm::bessel::jincpif::jincf_near_zero::<pxfm::bessel::jincpif::GenJincpifBackend> |
266 | | |
267 | | /// This method on small range searches for nearest zero or extremum. |
268 | | /// Then picks stored series expansion at the point end evaluates the poly at the point. |
269 | | #[inline(always)] |
270 | 0 | fn jincpif_small_argument<B: JincpifBackend>(ox: f32, backend: &B) -> f32 { |
271 | | const PI: f64 = f64::from_bits(0x400921fb54442d18); |
272 | 0 | let x = ox as f64 * PI; |
273 | 0 | let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff); |
274 | | |
275 | | // let avg_step = 74.60109 / 47.0; |
276 | | // let inv_step = 1.0 / avg_step; |
277 | | |
278 | | const INV_STEP: f64 = 0.6300176043004198; |
279 | | |
280 | 0 | let inv_scale = x; |
281 | | |
282 | 0 | let fx = x_abs * INV_STEP; |
283 | | const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64; |
284 | 0 | let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() }; |
285 | 0 | let idx1 = unsafe { |
286 | 0 | backend |
287 | 0 | .ceil(fx) |
288 | 0 | .min(J1_ZEROS_COUNT) |
289 | 0 | .to_int_unchecked::<usize>() |
290 | | }; |
291 | | |
292 | 0 | let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]); |
293 | 0 | let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]); |
294 | | |
295 | 0 | let dist0 = (found_zero0.hi - x_abs).abs(); |
296 | 0 | let dist1 = (found_zero1.hi - x_abs).abs(); |
297 | | |
298 | 0 | let (found_zero, idx, dist) = if dist0 < dist1 { |
299 | 0 | (found_zero0, idx0, dist0) |
300 | | } else { |
301 | 0 | (found_zero1, idx1, dist1) |
302 | | }; |
303 | | |
304 | 0 | if idx == 0 { |
305 | 0 | return jincf_near_zero(ox, backend); |
306 | 0 | } |
307 | | |
308 | | // We hit exact zero, value, better to return it directly |
309 | 0 | if dist == 0. { |
310 | 0 | return (f64::from_bits(J1_ZEROS_VALUE[idx]) / inv_scale * 2.) as f32; |
311 | 0 | } |
312 | | |
313 | 0 | let c = &J1F_COEFFS[idx - 1]; |
314 | | |
315 | 0 | let r = (x_abs - found_zero.hi) - found_zero.lo; |
316 | | |
317 | 0 | let p = backend.polyeval14( |
318 | 0 | r, |
319 | 0 | f64::from_bits(c[0]), |
320 | 0 | f64::from_bits(c[1]), |
321 | 0 | f64::from_bits(c[2]), |
322 | 0 | f64::from_bits(c[3]), |
323 | 0 | f64::from_bits(c[4]), |
324 | 0 | f64::from_bits(c[5]), |
325 | 0 | f64::from_bits(c[6]), |
326 | 0 | f64::from_bits(c[7]), |
327 | 0 | f64::from_bits(c[8]), |
328 | 0 | f64::from_bits(c[9]), |
329 | 0 | f64::from_bits(c[10]), |
330 | 0 | f64::from_bits(c[11]), |
331 | 0 | f64::from_bits(c[12]), |
332 | 0 | f64::from_bits(c[13]), |
333 | | ); |
334 | | |
335 | 0 | (p / inv_scale * 2.) as f32 |
336 | 0 | } Unexecuted instantiation: pxfm::bessel::jincpif::jincpif_small_argument::<pxfm::bessel::jincpif::FmaJincpifBackend> Unexecuted instantiation: pxfm::bessel::jincpif::jincpif_small_argument::<pxfm::bessel::jincpif::GenJincpifBackend> |
337 | | |
338 | | /* |
339 | | Evaluates: |
340 | | J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x)) |
341 | | discarding 1*PI/2 using identities gives: |
342 | | J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x)) |
343 | | |
344 | | to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is: |
345 | | |
346 | | J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x)) |
347 | | */ |
348 | | #[inline(always)] |
349 | 0 | pub(crate) fn jincpif_asympt<B: JincpifBackend>(x: f32, backend: &B) -> f64 { |
350 | | const PI: f64 = f64::from_bits(0x400921fb54442d18); |
351 | | |
352 | 0 | let dox = x as f64; |
353 | 0 | let dx = dox * PI; |
354 | | |
355 | 0 | let alpha = j1f_asympt_alpha(dx); |
356 | 0 | let beta = j1f_asympt_beta(dx); |
357 | | |
358 | | // argument reduction assuming x here value is already multiple of PI. |
359 | | // k = round((x*Pi) / (pi*2)) |
360 | 0 | let kd = backend.round(dox * 0.5); |
361 | | // y = (x * Pi) - k * 2 |
362 | 0 | let angle = backend.fma(kd, -2., dox) * PI; |
363 | | |
364 | | // 2^(3/2)/(Pi^2) |
365 | | // reduce argument 2*sqrt(2/(PI*(x*PI))) = 2*sqrt(2)/PI |
366 | | // adding additional pi from division then 2*sqrt(2)/PI^2 |
367 | | const Z2_3_2_OVER_PI_SQR: f64 = f64::from_bits(0x3fd25751e5614413); |
368 | | const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18); |
369 | | |
370 | 0 | let x0pi34 = MPI_OVER_4 - alpha; |
371 | 0 | let r0 = angle + x0pi34; |
372 | | |
373 | 0 | let m_sin = sin_small(r0); |
374 | | |
375 | 0 | let z0 = beta * m_sin; |
376 | 0 | let scale = Z2_3_2_OVER_PI_SQR * j1f_rsqrt(dox); |
377 | | |
378 | 0 | let j1pix = scale * z0; |
379 | 0 | j1pix / dox |
380 | 0 | } Unexecuted instantiation: pxfm::bessel::jincpif::jincpif_asympt::<pxfm::bessel::jincpif::FmaJincpifBackend> Unexecuted instantiation: pxfm::bessel::jincpif::jincpif_asympt::<pxfm::bessel::jincpif::GenJincpifBackend> |
381 | | |
382 | | #[cfg(test)] |
383 | | mod tests { |
384 | | use super::*; |
385 | | |
386 | | #[test] |
387 | | fn test_jincpif() { |
388 | | assert_eq!(f_jincpif(-102.59484), 0.00024380769); |
389 | | assert_eq!(f_jincpif(102.59484), 0.00024380769); |
390 | | assert_eq!(f_jincpif(100.08199), -0.00014386141); |
391 | | assert_eq!(f_jincpif(0.27715185), 0.9081822); |
392 | | assert_eq!(f_jincpif(0.007638072), 0.99992806); |
393 | | assert_eq!(f_jincpif(-f32::EPSILON), 1.0); |
394 | | assert_eq!(f_jincpif(f32::EPSILON), 1.0); |
395 | | assert_eq!( |
396 | | f_jincpif(0.000000000000000000000000000000000000008827127), |
397 | | 1.0 |
398 | | ); |
399 | | assert_eq!(f_jincpif(5.4), -0.010821743); |
400 | | assert_eq!( |
401 | | f_jincpif(77.743162408196766932633181568235159), |
402 | | -0.00041799102 |
403 | | ); |
404 | | assert_eq!( |
405 | | f_jincpif(-77.743162408196766932633181568235159), |
406 | | -0.00041799102 |
407 | | ); |
408 | | assert_eq!( |
409 | | f_jincpif(84.027189586293545175976760219782591), |
410 | | -0.00023927793 |
411 | | ); |
412 | | assert_eq!(f_jincpif(f32::INFINITY), 0.); |
413 | | assert_eq!(f_jincpif(f32::NEG_INFINITY), 0.); |
414 | | assert!(f_jincpif(f32::NAN).is_nan()); |
415 | | assert_eq!(f_jincpif(-1.7014118e38), -0.0); |
416 | | } |
417 | | } |