/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.28/src/bessel/y1.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::bessel::alpha1::{ |
30 | | bessel_1_asympt_alpha, bessel_1_asympt_alpha_fast, bessel_1_asympt_alpha_hard, |
31 | | }; |
32 | | use crate::bessel::beta1::{ |
33 | | bessel_1_asympt_beta, bessel_1_asympt_beta_fast, bessel_1_asympt_beta_hard, |
34 | | }; |
35 | | use crate::bessel::i0::bessel_rsqrt_hard; |
36 | | use crate::bessel::y1_coeffs::Y1_COEFFS_REMEZ; |
37 | | use crate::bessel::y1_coeffs_taylor::Y1_COEFFS; |
38 | | use crate::bessel::y1f_coeffs::{Y1_ZEROS, Y1_ZEROS_VALUES}; |
39 | | use crate::common::f_fmla; |
40 | | use crate::double_double::DoubleDouble; |
41 | | use crate::dyadic_float::{DyadicFloat128, DyadicSign}; |
42 | | use crate::logs::log_dd_fast; |
43 | | use crate::polyeval::{f_polyeval12, f_polyeval13, f_polyeval15, f_polyeval22, f_polyeval24}; |
44 | | use crate::rounding::CpuCeil; |
45 | | use crate::sin_helper::{cos_dd_small, cos_dd_small_fast, cos_f128_small}; |
46 | | use crate::sincos_reduce::{AngleReduced, rem2pi_any, rem2pi_f128}; |
47 | | |
48 | | /// Bessel of the second kind of order 1 ( Y1 ) |
49 | 0 | pub fn f_y1(x: f64) -> f64 { |
50 | 0 | let ix = x.to_bits(); |
51 | | |
52 | 0 | if ix >= 0x7ffu64 << 52 || ix == 0 { |
53 | | // |x| == NaN, x == inf, |x| == 0, x < 0 |
54 | 0 | if ix.wrapping_shl(1) == 0 { |
55 | | // |x| == 0 |
56 | 0 | return f64::NEG_INFINITY; |
57 | 0 | } |
58 | | |
59 | 0 | if x.is_infinite() { |
60 | 0 | if x.is_sign_negative() { |
61 | 0 | return f64::NAN; |
62 | 0 | } |
63 | 0 | return 0.; |
64 | 0 | } |
65 | | |
66 | 0 | return x + f64::NAN; // x == NaN |
67 | 0 | } |
68 | | |
69 | 0 | let xb = x.to_bits(); |
70 | | |
71 | 0 | if xb <= 0x4049c00000000000u64 { |
72 | | // x <= 51.5 |
73 | 0 | if xb <= 0x4000000000000000u64 { |
74 | | // x <= 2 |
75 | 0 | if xb <= 0x3ff75c28f5c28f5cu64 { |
76 | | // x <= 1.46 |
77 | 0 | return y1_near_zero_fast(x); |
78 | 0 | } |
79 | | // transient zone from 1.46 to 2 have bad behavior for log poly already, |
80 | | // and not yet good to be easily covered, thus it use its own poly |
81 | 0 | return y1_transient_zone_fast(x); |
82 | 0 | } |
83 | | |
84 | 0 | return y1_small_argument_fast(x); |
85 | 0 | } |
86 | | |
87 | 0 | y1_asympt_fast(x) |
88 | 0 | } |
89 | | |
90 | | /** |
91 | | Generated by SageMath: |
92 | | Evaluates: |
93 | | y2 = -J1(x)*log(x) + 1/x * (1 - sum((-1)^m*(H(m)+H(m-1))/(2^m*m!*(m-1)!)*x^(2*m)) |
94 | | Y1(x) = 2/pi*(-y2(x)+(euler_gamma - log(2))*J1(x)) |
95 | | expressed as: |
96 | | Y1(x)=log(x)*W1(x) - Z1(x) - 2/(pi*x) |
97 | | ```python |
98 | | from sage.all import * |
99 | | |
100 | | R = LaurentSeriesRing(RealField(300), 'x', default_prec=300) |
101 | | x = R.gen() |
102 | | N = 16 # Number of terms (adjust as needed) |
103 | | gamma = RealField(300)(euler_gamma) |
104 | | d2 = RealField(300)(2) |
105 | | pi = RealField(300).pi() |
106 | | log2 = RealField(300)(2).log() |
107 | | |
108 | | def j_series(n, x): |
109 | | return sum([(-1)**m * (x/2)**(ZZ(n) + ZZ(2)*ZZ(m)) / (ZZ(m).factorial() * (ZZ(m) + ZZ(n)).factorial()) for m in range(N)]) |
110 | | |
111 | | J1_series = j_series(1, x) |
112 | | |
113 | | def harmony(m): |
114 | | return sum(RealField(300)(1)/RealField(300)(k) for k in range(1, m+1)) |
115 | | |
116 | | def z_series(x): |
117 | | return sum([(-1)**m * (x)**(ZZ(2)*ZZ(m)) / (ZZ(2)**(2*m) * ZZ(m).factorial() * (ZZ(m) - ZZ(1)).factorial()) * (harmony(m) + harmony(m - 1)) for m in range(1, N)]) |
118 | | |
119 | | W1 = d2/pi * J1_series |
120 | | Z1 = -(d2/(x*pi) * z_series(x) + d2/pi * gamma * J1_series(x) - d2/pi * log2 * J1_series(x)) |
121 | | |
122 | | def y1_full(x): |
123 | | return d2/pi * (J1_series(x) * x.log() - 1/x * ( 1 - z_series(x)) + (gamma - log2) * J1_series(x)) |
124 | | |
125 | | # see the series |
126 | | print(W0) |
127 | | print(Z0) |
128 | | ``` |
129 | | See ./notes/bessel_y1_taylor.ipynb for generation |
130 | | **/ |
131 | | #[inline] |
132 | 0 | fn y1_near_zero_fast(x: f64) -> f64 { |
133 | | const W: [(u64, u64); 15] = [ |
134 | | (0xbc76b01ec5417056, 0x3fd45f306dc9c883), |
135 | | (0x3c46b01ec5417056, 0xbfa45f306dc9c883), |
136 | | (0xbbfe40290701eb1e, 0x3f5b2995e7b7b604), |
137 | | (0xbba67fe4a5feb897, 0xbf021bb945252402), |
138 | | (0xbb0334914cdd2ba9, 0x3e9cf9286ea1d337), |
139 | | (0x3aab4f3c6d42c1f4, 0xbe2ee7a29824147f), |
140 | | (0xba407fb57ef4dc2c, 0x3db78be9987d036d), |
141 | | (0x39d2921e91b07dd0, 0xbd3ae90af76a4d0f), |
142 | | (0x395a28c8620dc90e, 0x3cb7eb97f85e7d62), |
143 | | (0xb8cf83f52abe45c5, 0xbc31028e3376648a), |
144 | | (0xb8441050c68ca435, 0x3ba3cb1e7d0c17e7), |
145 | | (0xb7ab072548a1aa43, 0xbb133191ed9f1eef), |
146 | | (0xb6f05192c2d9b6ee, 0x3a7f7f4b5e8ef7b0), |
147 | | (0x367ad65afe306d57, 0xb9e626e36cb3515d), |
148 | | (0xb5ea1c4136f8f230, 0x394b01153dce6810), |
149 | | ]; |
150 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
151 | 0 | let w0 = f_polyeval12( |
152 | 0 | x2.hi, |
153 | 0 | f64::from_bits(W[3].1), |
154 | 0 | f64::from_bits(W[4].1), |
155 | 0 | f64::from_bits(W[5].1), |
156 | 0 | f64::from_bits(W[6].1), |
157 | 0 | f64::from_bits(W[7].1), |
158 | 0 | f64::from_bits(W[8].1), |
159 | 0 | f64::from_bits(W[9].1), |
160 | 0 | f64::from_bits(W[10].1), |
161 | 0 | f64::from_bits(W[11].1), |
162 | 0 | f64::from_bits(W[12].1), |
163 | 0 | f64::from_bits(W[13].1), |
164 | 0 | f64::from_bits(W[14].1), |
165 | | ); |
166 | | |
167 | 0 | let mut w = DoubleDouble::mul_f64_add(x2, w0, DoubleDouble::from_bit_pair(W[2])); |
168 | 0 | w = DoubleDouble::mul_add(x2, w, DoubleDouble::from_bit_pair(W[1])); |
169 | 0 | w = DoubleDouble::mul_add(x2, w, DoubleDouble::from_bit_pair(W[0])); |
170 | 0 | w = DoubleDouble::quick_mult_f64(w, x); |
171 | | |
172 | | const Z: [(u64, u64); 15] = [ |
173 | | (0x3c61d7eb2e54cda1, 0x3fc91866143cbc8a), |
174 | | (0xbc2f9f7a0ce54a40, 0xbfabd3975c75b4a7), |
175 | | (0xbbf7659313f45e8c, 0x3f6835b97894be5b), |
176 | | (0x3b9cbcd40f1be7b9, 0xbf12c7dbffcde97d), |
177 | | (0xbb495d78778645b4, 0x3eb0a780ac776eac), |
178 | | (0xbae15be86455c1ab, 0xbe432e5a4ddeea30), |
179 | | (0xba5ad966c12f1e3c, 0x3dcf0ce34d2066a6), |
180 | | (0x39e9717155dc7521, 0xbd52a4e1aea45c18), |
181 | | (0x394f447fe5de1290, 0x3cd1474ade9154ac), |
182 | | (0xb8e1699d9009a7fc, 0xbc4978ba84f218c0), |
183 | | (0xb8505502096ead17, 0x3bbe9598c016378b), |
184 | | (0x37942b6c36b2c5f1, 0xbb2e7e5fcfc4b7b1), |
185 | | (0x37210853b78bd08a, 0x3a99a6c1266c116d), |
186 | | (0xb686c9639c9d976e, 0xba02738998fe7337), |
187 | | (0xb603b739ee04b9fe, 0x3966f58cd41b6d08), |
188 | | ]; |
189 | 0 | let z0 = f_polyeval12( |
190 | 0 | x2.hi, |
191 | 0 | f64::from_bits(Z[3].1), |
192 | 0 | f64::from_bits(Z[4].1), |
193 | 0 | f64::from_bits(Z[5].1), |
194 | 0 | f64::from_bits(Z[6].1), |
195 | 0 | f64::from_bits(Z[7].1), |
196 | 0 | f64::from_bits(Z[8].1), |
197 | 0 | f64::from_bits(Z[9].1), |
198 | 0 | f64::from_bits(Z[10].1), |
199 | 0 | f64::from_bits(Z[11].1), |
200 | 0 | f64::from_bits(Z[12].1), |
201 | 0 | f64::from_bits(Z[13].1), |
202 | 0 | f64::from_bits(Z[14].1), |
203 | | ); |
204 | | |
205 | 0 | let mut z = DoubleDouble::mul_f64_add(x2, z0, DoubleDouble::from_bit_pair(Z[2])); |
206 | 0 | z = DoubleDouble::mul_add(x2, z, DoubleDouble::from_bit_pair(Z[1])); |
207 | 0 | z = DoubleDouble::mul_add(x2, z, DoubleDouble::from_bit_pair(Z[0])); |
208 | 0 | z = DoubleDouble::quick_mult_f64(z, x); |
209 | | |
210 | 0 | let w_log = log_dd_fast(x); |
211 | | |
212 | | const MINUS_TWO_OVER_PI: DoubleDouble = |
213 | | DoubleDouble::from_bit_pair((0x3c86b01ec5417056, 0xbfe45f306dc9c883)); |
214 | | |
215 | | let m_two_over_pi_div_x: DoubleDouble; |
216 | | #[cfg(any( |
217 | | all( |
218 | | any(target_arch = "x86", target_arch = "x86_64"), |
219 | | target_feature = "fma" |
220 | | ), |
221 | | target_arch = "aarch64" |
222 | | ))] |
223 | | { |
224 | | m_two_over_pi_div_x = DoubleDouble::div_dd_f64(MINUS_TWO_OVER_PI, x) |
225 | | } |
226 | | #[cfg(not(any( |
227 | | all( |
228 | | any(target_arch = "x86", target_arch = "x86_64"), |
229 | | target_feature = "fma" |
230 | | ), |
231 | | target_arch = "aarch64" |
232 | | )))] |
233 | | { |
234 | | use crate::double_double::two_product_compatible; |
235 | 0 | m_two_over_pi_div_x = if two_product_compatible(x) { |
236 | 0 | DoubleDouble::div_dd_f64(MINUS_TWO_OVER_PI, x) |
237 | | } else { |
238 | 0 | DoubleDouble::div_safe_dd_f64(MINUS_TWO_OVER_PI, x) |
239 | | }; |
240 | | } |
241 | 0 | if m_two_over_pi_div_x.hi.is_infinite() { |
242 | 0 | return f64::NEG_INFINITY; |
243 | 0 | } |
244 | | |
245 | 0 | let zvp = DoubleDouble::mul_add(w, w_log, -z); |
246 | 0 | let p = DoubleDouble::add(m_two_over_pi_div_x, zvp); |
247 | 0 | let err = f_fmla( |
248 | 0 | p.hi, |
249 | 0 | f64::from_bits(0x3c30000000000000), // 2^-60 |
250 | 0 | f64::from_bits(0x3be0000000000000), // 2^-65 |
251 | | ); |
252 | 0 | let ub = p.hi + (p.lo + err); |
253 | 0 | let lb = p.hi + (p.lo - err); |
254 | 0 | if ub == lb { |
255 | 0 | return p.to_f64(); |
256 | 0 | } |
257 | 0 | y1_near_zero(x, w_log) |
258 | 0 | } |
259 | | |
260 | | /** |
261 | | Generated by SageMath: |
262 | | Evaluates: |
263 | | y2 = -J1(x)*log(x) + 1/x * (1 - sum((-1)^m*(H(m)+H(m-1))/(2^m*m!*(m-1)!)*x^(2*m)) |
264 | | Y1(x) = 2/pi*(-y2(x)+(euler_gamma - log(2))*J1(x)) |
265 | | expressed as: |
266 | | Y1(x)=log(x)*W1(x) - Z1(x) - 2/(pi*x) |
267 | | ```python |
268 | | from sage.all import * |
269 | | |
270 | | R = LaurentSeriesRing(RealField(300), 'x', default_prec=300) |
271 | | x = R.gen() |
272 | | N = 16 # Number of terms (adjust as needed) |
273 | | gamma = RealField(300)(euler_gamma) |
274 | | d2 = RealField(300)(2) |
275 | | pi = RealField(300).pi() |
276 | | log2 = RealField(300)(2).log() |
277 | | |
278 | | def j_series(n, x): |
279 | | return sum([(-1)**m * (x/2)**(ZZ(n) + ZZ(2)*ZZ(m)) / (ZZ(m).factorial() * (ZZ(m) + ZZ(n)).factorial()) for m in range(N)]) |
280 | | |
281 | | J1_series = j_series(1, x) |
282 | | |
283 | | def harmony(m): |
284 | | return sum(RealField(300)(1)/RealField(300)(k) for k in range(1, m+1)) |
285 | | |
286 | | def z_series(x): |
287 | | return sum([(-1)**m * (x)**(ZZ(2)*ZZ(m)) / (ZZ(2)**(2*m) * ZZ(m).factorial() * (ZZ(m) - ZZ(1)).factorial()) * (harmony(m) + harmony(m - 1)) for m in range(1, N)]) |
288 | | |
289 | | W1 = d2/pi * J1_series |
290 | | Z1 = -(d2/(x*pi) * z_series(x) + d2/pi * gamma * J1_series(x) - d2/pi * log2 * J1_series(x)) |
291 | | |
292 | | def y1_full(x): |
293 | | return d2/pi * (J1_series(x) * x.log() - 1/x * ( 1 - z_series(x)) + (gamma - log2) * J1_series(x)) |
294 | | |
295 | | # see the series |
296 | | print(W0) |
297 | | print(Z0) |
298 | | ``` |
299 | | See ./notes/bessel_y1_taylor.ipynb for generation |
300 | | **/ |
301 | | #[cold] |
302 | | #[inline(never)] |
303 | 0 | fn y1_near_zero(x: f64, w_log: DoubleDouble) -> f64 { |
304 | | const W: [(u64, u64); 15] = [ |
305 | | (0xbc76b01ec5417056, 0x3fd45f306dc9c883), |
306 | | (0x3c46b01ec5417056, 0xbfa45f306dc9c883), |
307 | | (0xbbfe40290701eb1e, 0x3f5b2995e7b7b604), |
308 | | (0xbba67fe4a5feb897, 0xbf021bb945252402), |
309 | | (0xbb0334914cdd2ba9, 0x3e9cf9286ea1d337), |
310 | | (0x3aab4f3c6d42c1f4, 0xbe2ee7a29824147f), |
311 | | (0xba407fb57ef4dc2c, 0x3db78be9987d036d), |
312 | | (0x39d2921e91b07dd0, 0xbd3ae90af76a4d0f), |
313 | | (0x395a28c8620dc90e, 0x3cb7eb97f85e7d62), |
314 | | (0xb8cf83f52abe45c5, 0xbc31028e3376648a), |
315 | | (0xb8441050c68ca435, 0x3ba3cb1e7d0c17e7), |
316 | | (0xb7ab072548a1aa43, 0xbb133191ed9f1eef), |
317 | | (0xb6f05192c2d9b6ee, 0x3a7f7f4b5e8ef7b0), |
318 | | (0x367ad65afe306d57, 0xb9e626e36cb3515d), |
319 | | (0xb5ea1c4136f8f230, 0x394b01153dce6810), |
320 | | ]; |
321 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
322 | 0 | let mut w = f_polyeval15( |
323 | 0 | x2, |
324 | 0 | DoubleDouble::from_bit_pair(W[0]), |
325 | 0 | DoubleDouble::from_bit_pair(W[1]), |
326 | 0 | DoubleDouble::from_bit_pair(W[2]), |
327 | 0 | DoubleDouble::from_bit_pair(W[3]), |
328 | 0 | DoubleDouble::from_bit_pair(W[4]), |
329 | 0 | DoubleDouble::from_bit_pair(W[5]), |
330 | 0 | DoubleDouble::from_bit_pair(W[6]), |
331 | 0 | DoubleDouble::from_bit_pair(W[7]), |
332 | 0 | DoubleDouble::from_bit_pair(W[8]), |
333 | 0 | DoubleDouble::from_bit_pair(W[9]), |
334 | 0 | DoubleDouble::from_bit_pair(W[10]), |
335 | 0 | DoubleDouble::from_bit_pair(W[11]), |
336 | 0 | DoubleDouble::from_bit_pair(W[12]), |
337 | 0 | DoubleDouble::from_bit_pair(W[13]), |
338 | 0 | DoubleDouble::from_bit_pair(W[14]), |
339 | | ); |
340 | 0 | w = DoubleDouble::quick_mult_f64(w, x); |
341 | | |
342 | | const Z: [(u64, u64); 15] = [ |
343 | | (0x3c61d7eb2e54cda1, 0x3fc91866143cbc8a), |
344 | | (0xbc2f9f7a0ce54a40, 0xbfabd3975c75b4a7), |
345 | | (0xbbf7659313f45e8c, 0x3f6835b97894be5b), |
346 | | (0x3b9cbcd40f1be7b9, 0xbf12c7dbffcde97d), |
347 | | (0xbb495d78778645b4, 0x3eb0a780ac776eac), |
348 | | (0xbae15be86455c1ab, 0xbe432e5a4ddeea30), |
349 | | (0xba5ad966c12f1e3c, 0x3dcf0ce34d2066a6), |
350 | | (0x39e9717155dc7521, 0xbd52a4e1aea45c18), |
351 | | (0x394f447fe5de1290, 0x3cd1474ade9154ac), |
352 | | (0xb8e1699d9009a7fc, 0xbc4978ba84f218c0), |
353 | | (0xb8505502096ead17, 0x3bbe9598c016378b), |
354 | | (0x37942b6c36b2c5f1, 0xbb2e7e5fcfc4b7b1), |
355 | | (0x37210853b78bd08a, 0x3a99a6c1266c116d), |
356 | | (0xb686c9639c9d976e, 0xba02738998fe7337), |
357 | | (0xb603b739ee04b9fe, 0x3966f58cd41b6d08), |
358 | | ]; |
359 | 0 | let mut z = f_polyeval15( |
360 | 0 | x2, |
361 | 0 | DoubleDouble::from_bit_pair(Z[0]), |
362 | 0 | DoubleDouble::from_bit_pair(Z[1]), |
363 | 0 | DoubleDouble::from_bit_pair(Z[2]), |
364 | 0 | DoubleDouble::from_bit_pair(Z[3]), |
365 | 0 | DoubleDouble::from_bit_pair(Z[4]), |
366 | 0 | DoubleDouble::from_bit_pair(Z[5]), |
367 | 0 | DoubleDouble::from_bit_pair(Z[6]), |
368 | 0 | DoubleDouble::from_bit_pair(Z[7]), |
369 | 0 | DoubleDouble::from_bit_pair(Z[8]), |
370 | 0 | DoubleDouble::from_bit_pair(Z[9]), |
371 | 0 | DoubleDouble::from_bit_pair(Z[10]), |
372 | 0 | DoubleDouble::from_bit_pair(Z[11]), |
373 | 0 | DoubleDouble::from_bit_pair(Z[12]), |
374 | 0 | DoubleDouble::from_bit_pair(Z[13]), |
375 | 0 | DoubleDouble::from_bit_pair(Z[14]), |
376 | | ); |
377 | 0 | z = DoubleDouble::quick_mult_f64(z, x); |
378 | | |
379 | | const MINUS_TWO_OVER_PI: DoubleDouble = |
380 | | DoubleDouble::from_bit_pair((0x3c86b01ec5417056, 0xbfe45f306dc9c883)); |
381 | | |
382 | | let m_two_over_pi_div_x: DoubleDouble; |
383 | | #[cfg(any( |
384 | | all( |
385 | | any(target_arch = "x86", target_arch = "x86_64"), |
386 | | target_feature = "fma" |
387 | | ), |
388 | | target_arch = "aarch64" |
389 | | ))] |
390 | | { |
391 | | m_two_over_pi_div_x = DoubleDouble::div_dd_f64(MINUS_TWO_OVER_PI, x) |
392 | | } |
393 | | #[cfg(not(any( |
394 | | all( |
395 | | any(target_arch = "x86", target_arch = "x86_64"), |
396 | | target_feature = "fma" |
397 | | ), |
398 | | target_arch = "aarch64" |
399 | | )))] |
400 | | { |
401 | | use crate::double_double::two_product_compatible; |
402 | 0 | m_two_over_pi_div_x = if two_product_compatible(x) { |
403 | 0 | DoubleDouble::div_dd_f64(MINUS_TWO_OVER_PI, x) |
404 | | } else { |
405 | 0 | DoubleDouble::div_safe_dd_f64(MINUS_TWO_OVER_PI, x) |
406 | | }; |
407 | | } |
408 | 0 | if m_two_over_pi_div_x.hi.is_infinite() { |
409 | 0 | return f64::NEG_INFINITY; |
410 | 0 | } |
411 | | |
412 | 0 | let zvp = DoubleDouble::mul_add(w, w_log, -z); |
413 | 0 | DoubleDouble::full_dd_add(m_two_over_pi_div_x, zvp).to_f64() |
414 | 0 | } |
415 | | |
416 | | #[inline] |
417 | 0 | fn y1_transient_zone_fast(x: f64) -> f64 { |
418 | | /* |
419 | | <<FunctionApproximations` |
420 | | ClearAll["Global`*"] |
421 | | f[x_]:= BesselY[1,x + 2.1971413260310170351490335626990] |
422 | | {approx,error} =MiniMaxApproximation[f[x],{x,{1.42 - 2.1971413260310170351490335626990, 2- 2.1971413260310170351490335626990 },27,0},WorkingPrecision->120] |
423 | | poly=error[[1]]; |
424 | | coeffs=CoefficientList[poly,x]; |
425 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
426 | | */ |
427 | | const C: [(u64, u64); 28] = [ |
428 | | (0x38d23216c8eac2ee, 0x3c631de77e55e9b0), |
429 | | (0x3c77f3a6718f8e03, 0x3fe0aa48442f0150), |
430 | | (0xbc52b77d09be8679, 0xbfbe56f82217b33a), |
431 | | (0x3c237af39fc9d759, 0xbfa0d2af4e922afe), |
432 | | (0xbc08241e95389bc3, 0xbf73a6dec2f9f739), |
433 | | (0x3c18eac6a35acd63, 0x3f7e671c82a1c956), |
434 | | (0x3be0a0f3c8908083, 0xbf65429d5dbc3bb0), |
435 | | (0x3be217fa58861600, 0x3f517abad71c26c0), |
436 | | (0x3bebc327d6c65514, 0xbf40b28b4ef33a56), |
437 | | (0xbbcb597d6bdd5992, 0x3f2ef0d150ec9934), |
438 | | (0x3bb1a480de696e07, 0xbf1c0758a86844be), |
439 | | (0xbb9c6ea6352ab84e, 0x3f0b7c88b58d9ef3), |
440 | | (0x3b7dbb78dfd868a9, 0xbee97ef9519bdcfd), |
441 | | (0xbb8d4519030f499d, 0x3f04815f08e7ad5e), |
442 | | (0x3bbbd70e1480e260, 0x3f10f348483b57bc), |
443 | | (0x3b7117cf4d2b6f3c, 0x3f231914389bb1bb), |
444 | | (0x3b8ca48beaf6a58d, 0x3f30b29e838345b4), |
445 | | (0x3bccfac65ce17cf9, 0x3f39a69e98f61897), |
446 | | (0xbbeae7e3065b09c9, 0x3f40b9511666fcf0), |
447 | | (0xbbe5cbddf691e7e6, 0x3f428cd8388e634b), |
448 | | (0xbbd91372412d1e1b, 0x3f414ba048d9e1d5), |
449 | | (0xbbb0781a70c6f715, 0x3f3acdcf66f1de95), |
450 | | (0xbba3ae83fd425494, 0x3f30f44ae6620bba), |
451 | | (0x3bc001d75da77b74, 0x3f21154a0a1f2161), |
452 | | (0xbb91c9afb1a1b874, 0x3f0a687b664cbac6), |
453 | | (0x3b8e0a06b9444963, 0x3eed7c3cbb4ba5d8), |
454 | | (0x3b4f0e9dfc915934, 0x3ec53ca23fdd0999), |
455 | | (0xbb0409258f8ffca8, 0x3e8de620acb51b2d), |
456 | | ]; |
457 | | |
458 | | // this poly relative to first zero |
459 | | const ZERO: DoubleDouble = |
460 | | DoubleDouble::from_bit_pair((0xbc8bd1e50d219bfd, 0x400193bed4dff243)); |
461 | | |
462 | 0 | let mut r = DoubleDouble::full_add_f64(-ZERO, x); |
463 | 0 | r = DoubleDouble::from_exact_add(r.hi, r.lo); |
464 | | |
465 | 0 | let p0 = f_polyeval24( |
466 | 0 | r.to_f64(), |
467 | 0 | f64::from_bits(C[4].1), |
468 | 0 | f64::from_bits(C[5].1), |
469 | 0 | f64::from_bits(C[6].1), |
470 | 0 | f64::from_bits(C[7].1), |
471 | 0 | f64::from_bits(C[8].1), |
472 | 0 | f64::from_bits(C[9].1), |
473 | 0 | f64::from_bits(C[10].1), |
474 | 0 | f64::from_bits(C[11].1), |
475 | 0 | f64::from_bits(C[12].1), |
476 | 0 | f64::from_bits(C[13].1), |
477 | 0 | f64::from_bits(C[14].1), |
478 | 0 | f64::from_bits(C[15].1), |
479 | 0 | f64::from_bits(C[16].1), |
480 | 0 | f64::from_bits(C[17].1), |
481 | 0 | f64::from_bits(C[18].1), |
482 | 0 | f64::from_bits(C[19].1), |
483 | 0 | f64::from_bits(C[20].1), |
484 | 0 | f64::from_bits(C[21].1), |
485 | 0 | f64::from_bits(C[22].1), |
486 | 0 | f64::from_bits(C[23].1), |
487 | 0 | f64::from_bits(C[24].1), |
488 | 0 | f64::from_bits(C[25].1), |
489 | 0 | f64::from_bits(C[26].1), |
490 | 0 | f64::from_bits(C[27].1), |
491 | | ); |
492 | | |
493 | 0 | let mut p = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(C[3])); |
494 | 0 | p = DoubleDouble::mul_add(p, r, DoubleDouble::from_bit_pair(C[2])); |
495 | 0 | p = DoubleDouble::mul_add(p, r, DoubleDouble::from_bit_pair(C[1])); |
496 | 0 | p = DoubleDouble::mul_add(p, r, DoubleDouble::from_bit_pair(C[0])); |
497 | | |
498 | 0 | let err = f_fmla( |
499 | 0 | p.hi, |
500 | 0 | f64::from_bits(0x3c50000000000000), // 2^-58 |
501 | 0 | f64::from_bits(0x3b90000000000000), // 2^-70 |
502 | | ); |
503 | 0 | let ub = p.hi + (p.lo + err); |
504 | 0 | let lb = p.hi + (p.lo - err); |
505 | 0 | if ub == lb { |
506 | 0 | return p.to_f64(); |
507 | 0 | } |
508 | 0 | y1_transient_zone(x) |
509 | 0 | } |
510 | | |
511 | 0 | fn y1_transient_zone(x: f64) -> f64 { |
512 | | /* |
513 | | <<FunctionApproximations` |
514 | | ClearAll["Global`*"] |
515 | | f[x_]:= BesselY[1,x + 2.1971413260310170351490335626990] |
516 | | {approx,error} =MiniMaxApproximation[f[x],{x,{1.42 - 2.1971413260310170351490335626990, 2- 2.1971413260310170351490335626990 },27,0},WorkingPrecision->120] |
517 | | poly=error[[1]]; |
518 | | coeffs=CoefficientList[poly,x]; |
519 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
520 | | */ |
521 | | const C: [(u64, u64); 28] = [ |
522 | | (0x38d23216c8eac2ee, 0x3c631de77e55e9b0), |
523 | | (0x3c77f3a6718f8e03, 0x3fe0aa48442f0150), |
524 | | (0xbc52b77d09be8679, 0xbfbe56f82217b33a), |
525 | | (0x3c237af39fc9d759, 0xbfa0d2af4e922afe), |
526 | | (0xbc08241e95389bc3, 0xbf73a6dec2f9f739), |
527 | | (0x3c18eac6a35acd63, 0x3f7e671c82a1c956), |
528 | | (0x3be0a0f3c8908083, 0xbf65429d5dbc3bb0), |
529 | | (0x3be217fa58861600, 0x3f517abad71c26c0), |
530 | | (0x3bebc327d6c65514, 0xbf40b28b4ef33a56), |
531 | | (0xbbcb597d6bdd5992, 0x3f2ef0d150ec9934), |
532 | | (0x3bb1a480de696e07, 0xbf1c0758a86844be), |
533 | | (0xbb9c6ea6352ab84e, 0x3f0b7c88b58d9ef3), |
534 | | (0x3b7dbb78dfd868a9, 0xbee97ef9519bdcfd), |
535 | | (0xbb8d4519030f499d, 0x3f04815f08e7ad5e), |
536 | | (0x3bbbd70e1480e260, 0x3f10f348483b57bc), |
537 | | (0x3b7117cf4d2b6f3c, 0x3f231914389bb1bb), |
538 | | (0x3b8ca48beaf6a58d, 0x3f30b29e838345b4), |
539 | | (0x3bccfac65ce17cf9, 0x3f39a69e98f61897), |
540 | | (0xbbeae7e3065b09c9, 0x3f40b9511666fcf0), |
541 | | (0xbbe5cbddf691e7e6, 0x3f428cd8388e634b), |
542 | | (0xbbd91372412d1e1b, 0x3f414ba048d9e1d5), |
543 | | (0xbbb0781a70c6f715, 0x3f3acdcf66f1de95), |
544 | | (0xbba3ae83fd425494, 0x3f30f44ae6620bba), |
545 | | (0x3bc001d75da77b74, 0x3f21154a0a1f2161), |
546 | | (0xbb91c9afb1a1b874, 0x3f0a687b664cbac6), |
547 | | (0x3b8e0a06b9444963, 0x3eed7c3cbb4ba5d8), |
548 | | (0x3b4f0e9dfc915934, 0x3ec53ca23fdd0999), |
549 | | (0xbb0409258f8ffca8, 0x3e8de620acb51b2d), |
550 | | ]; |
551 | | |
552 | | // this poly relative to first zero |
553 | | const ZERO: DoubleDouble = |
554 | | DoubleDouble::from_bit_pair((0xbc8bd1e50d219bfd, 0x400193bed4dff243)); |
555 | | |
556 | 0 | let r = DoubleDouble::full_add_f64(-ZERO, x); |
557 | | |
558 | 0 | let p0 = f_polyeval13( |
559 | 0 | r.to_f64(), |
560 | 0 | f64::from_bits(C[15].1), |
561 | 0 | f64::from_bits(C[16].1), |
562 | 0 | f64::from_bits(C[17].1), |
563 | 0 | f64::from_bits(C[18].1), |
564 | 0 | f64::from_bits(C[19].1), |
565 | 0 | f64::from_bits(C[20].1), |
566 | 0 | f64::from_bits(C[21].1), |
567 | 0 | f64::from_bits(C[22].1), |
568 | 0 | f64::from_bits(C[23].1), |
569 | 0 | f64::from_bits(C[24].1), |
570 | 0 | f64::from_bits(C[25].1), |
571 | 0 | f64::from_bits(C[26].1), |
572 | 0 | f64::from_bits(C[27].1), |
573 | | ); |
574 | | |
575 | 0 | let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(C[14])); |
576 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[13])); |
577 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[12])); |
578 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[11])); |
579 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[10])); |
580 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[9])); |
581 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[8])); |
582 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[7])); |
583 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[6])); |
584 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[5])); |
585 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[4])); |
586 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[3])); |
587 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[2])); |
588 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[1])); |
589 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[0])); |
590 | | |
591 | 0 | let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo); |
592 | 0 | let err = f_fmla( |
593 | 0 | p.hi, |
594 | 0 | f64::from_bits(0x3c20000000000000), // 2^-61 |
595 | 0 | f64::from_bits(0x3a90000000000000), // 2^-86 |
596 | | ); |
597 | 0 | let ub = p.hi + (p.lo + err); |
598 | 0 | let lb = p.hi + (p.lo - err); |
599 | 0 | if ub == lb { |
600 | 0 | return p.to_f64(); |
601 | 0 | } |
602 | 0 | y1_transient_hard(x) |
603 | 0 | } |
604 | | |
605 | | #[cold] |
606 | | #[inline(never)] |
607 | 0 | fn y1_transient_hard(x: f64) -> f64 { |
608 | | /* |
609 | | <<FunctionApproximations` |
610 | | ClearAll["Global`*"] |
611 | | f[x_]:= BesselY[1,x + 2.1971413260310170351490335626990] |
612 | | {approx,error} =MiniMaxApproximation[f[x],{x,{1.42 - 2.1971413260310170351490335626990, 2- 2.1971413260310170351490335626990 },27,0},WorkingPrecision->120] |
613 | | poly=error[[1]]; |
614 | | coeffs=CoefficientList[poly,x]; |
615 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
616 | | */ |
617 | | pub(crate) static C: [DyadicFloat128; 28] = [ |
618 | | DyadicFloat128 { |
619 | | sign: DyadicSign::Pos, |
620 | | exponent: -184, |
621 | | mantissa: 0x98ef3bf2_af4d8048_c85b23ab_0bb72488_u128, |
622 | | }, |
623 | | DyadicFloat128 { |
624 | | sign: DyadicSign::Pos, |
625 | | exponent: -128, |
626 | | mantissa: 0x85524221_780a817f_3a6718f8_e03513ec_u128, |
627 | | }, |
628 | | DyadicFloat128 { |
629 | | sign: DyadicSign::Neg, |
630 | | exponent: -131, |
631 | | mantissa: 0xf2b7c110_bd99d256_efa137d0_cf1f7988_u128, |
632 | | }, |
633 | | DyadicFloat128 { |
634 | | sign: DyadicSign::Neg, |
635 | | exponent: -132, |
636 | | mantissa: 0x86957a74_9157ef64_286301b1_453792e2_u128, |
637 | | }, |
638 | | DyadicFloat128 { |
639 | | sign: DyadicSign::Neg, |
640 | | exponent: -135, |
641 | | mantissa: 0x9d36f617_cfb9c982_41e95389_bc32e8c2_u128, |
642 | | }, |
643 | | DyadicFloat128 { |
644 | | sign: DyadicSign::Pos, |
645 | | exponent: -135, |
646 | | mantissa: 0xf338e415_0e4ab31d_58d46b59_ac6792eb_u128, |
647 | | }, |
648 | | DyadicFloat128 { |
649 | | sign: DyadicSign::Neg, |
650 | | exponent: -136, |
651 | | mantissa: 0xaa14eaed_e1dd7f7a_f861bb7b_fbe6acb5_u128, |
652 | | }, |
653 | | DyadicFloat128 { |
654 | | sign: DyadicSign::Pos, |
655 | | exponent: -137, |
656 | | mantissa: 0x8bd5d6b8_e1360121_7fa58861_5ff9b614_u128, |
657 | | }, |
658 | | DyadicFloat128 { |
659 | | sign: DyadicSign::Neg, |
660 | | exponent: -138, |
661 | | mantissa: 0x85945a77_99d2ac87_9b052735_5d7f5f59_u128, |
662 | | }, |
663 | | DyadicFloat128 { |
664 | | sign: DyadicSign::Pos, |
665 | | exponent: -140, |
666 | | mantissa: 0xf7868a87_64c99c94_d0528454_cdccd44c_u128, |
667 | | }, |
668 | | DyadicFloat128 { |
669 | | sign: DyadicSign::Neg, |
670 | | exponent: -141, |
671 | | mantissa: 0xe03ac543_4225edcb_6fe432d2_3f11a8b0_u128, |
672 | | }, |
673 | | DyadicFloat128 { |
674 | | sign: DyadicSign::Pos, |
675 | | exponent: -142, |
676 | | mantissa: 0xdbe445ac_6cf79639_159cad54_7b1eda7c_u128, |
677 | | }, |
678 | | DyadicFloat128 { |
679 | | sign: DyadicSign::Neg, |
680 | | exponent: -144, |
681 | | mantissa: 0xcbf7ca8c_dee7e624_48720279_75757a17_u128, |
682 | | }, |
683 | | DyadicFloat128 { |
684 | | sign: DyadicSign::Pos, |
685 | | exponent: -142, |
686 | | mantissa: 0xa40af847_3d6aef15_d737e785_b3163322_u128, |
687 | | }, |
688 | | DyadicFloat128 { |
689 | | sign: DyadicSign::Pos, |
690 | | exponent: -141, |
691 | | mantissa: 0x879a4241_dabde37a_e1c2901c_4c06b1dc_u128, |
692 | | }, |
693 | | DyadicFloat128 { |
694 | | sign: DyadicSign::Pos, |
695 | | exponent: -140, |
696 | | mantissa: 0x98c8a1c4_dd8dd811_17cf4d2b_6f3c3910_u128, |
697 | | }, |
698 | | DyadicFloat128 { |
699 | | sign: DyadicSign::Pos, |
700 | | exponent: -139, |
701 | | mantissa: 0x8594f41c_1a2da01c_a48beaf6_a58cef9f_u128, |
702 | | }, |
703 | | DyadicFloat128 { |
704 | | sign: DyadicSign::Pos, |
705 | | exponent: -139, |
706 | | mantissa: 0xcd34f4c7_b0c4b9cf_ac65ce17_cf940c9c_u128, |
707 | | }, |
708 | | DyadicFloat128 { |
709 | | sign: DyadicSign::Pos, |
710 | | exponent: -138, |
711 | | mantissa: 0x85ca88b3_37e77ca3_039f349e_c6ddb06d_u128, |
712 | | }, |
713 | | DyadicFloat128 { |
714 | | sign: DyadicSign::Pos, |
715 | | exponent: -138, |
716 | | mantissa: 0x9466c1c4_731a5546_84412dc3_033a8ee7_u128, |
717 | | }, |
718 | | DyadicFloat128 { |
719 | | sign: DyadicSign::Pos, |
720 | | exponent: -138, |
721 | | mantissa: 0x8a5d0246_cf0ea66e_c8dbed2e_1e50b14f_u128, |
722 | | }, |
723 | | DyadicFloat128 { |
724 | | sign: DyadicSign::Pos, |
725 | | exponent: -139, |
726 | | mantissa: 0xd66e7b37_8ef4a77c_3f2c79c8_4757a40d_u128, |
727 | | }, |
728 | | DyadicFloat128 { |
729 | | sign: DyadicSign::Pos, |
730 | | exponent: -139, |
731 | | mantissa: 0x87a25733_105dcfb1_45f00af6_adb11b5f_u128, |
732 | | }, |
733 | | DyadicFloat128 { |
734 | | sign: DyadicSign::Pos, |
735 | | exponent: -140, |
736 | | mantissa: 0x88aa5050_f90b0a00_3aebb4ef_6e7e9ce1_u128, |
737 | | }, |
738 | | DyadicFloat128 { |
739 | | sign: DyadicSign::Pos, |
740 | | exponent: -142, |
741 | | mantissa: 0xd343db32_65d62ee3_6504e5e4_78c55965_u128, |
742 | | }, |
743 | | DyadicFloat128 { |
744 | | sign: DyadicSign::Pos, |
745 | | exponent: -144, |
746 | | mantissa: 0xebe1e5da_5d2ec3c1_40d72889_2c57e483_u128, |
747 | | }, |
748 | | DyadicFloat128 { |
749 | | sign: DyadicSign::Pos, |
750 | | exponent: -146, |
751 | | mantissa: 0xa9e511fe_e84cc8f8_74efe48a_c9a09ebc_u128, |
752 | | }, |
753 | | DyadicFloat128 { |
754 | | sign: DyadicSign::Pos, |
755 | | exponent: -150, |
756 | | mantissa: 0xef310565_a8d9675f_b6d38380_1abf92a6_u128, |
757 | | }, |
758 | | ]; |
759 | | const ZERO: DyadicFloat128 = DyadicFloat128 { |
760 | | sign: DyadicSign::Pos, |
761 | | exponent: -126, |
762 | | mantissa: 0x8c9df6a6_ff921721_70d796f3_2017e155_u128, |
763 | | }; |
764 | 0 | let r = DyadicFloat128::new_from_f64(x) - ZERO; |
765 | | |
766 | 0 | let mut p = C[27]; |
767 | 0 | for i in (0..27).rev() { |
768 | 0 | p = r * p + C[i]; |
769 | 0 | } |
770 | 0 | p.fast_as_f64() |
771 | 0 | } |
772 | | |
773 | | /// This method on small range searches for nearest zero or extremum. |
774 | | /// Then picks stored series expansion at the point end evaluates the poly at the point. |
775 | | #[inline] |
776 | 0 | pub(crate) fn y1_small_argument_fast(x: f64) -> f64 { |
777 | | // let avg_step = 51.03 / 33.0; |
778 | | // let inv_step = 1.0 / avg_step; |
779 | | // |
780 | | // println!("inv_step {}", inv_step); |
781 | | |
782 | | const INV_STEP: f64 = 0.6466784244562023; |
783 | | |
784 | 0 | let fx = x * INV_STEP; |
785 | | const Y1_ZEROS_COUNT: f64 = (Y1_ZEROS.len() - 1) as f64; |
786 | 0 | let idx0 = unsafe { fx.min(Y1_ZEROS_COUNT).to_int_unchecked::<usize>() }; |
787 | 0 | let idx1 = unsafe { |
788 | 0 | fx.cpu_ceil() |
789 | 0 | .min(Y1_ZEROS_COUNT) |
790 | 0 | .to_int_unchecked::<usize>() |
791 | | }; |
792 | | |
793 | 0 | let found_zero0 = DoubleDouble::from_bit_pair(Y1_ZEROS[idx0]); |
794 | 0 | let found_zero1 = DoubleDouble::from_bit_pair(Y1_ZEROS[idx1]); |
795 | | |
796 | 0 | let dist0 = (found_zero0.hi - x).abs(); |
797 | 0 | let dist1 = (found_zero1.hi - x).abs(); |
798 | | |
799 | 0 | let (found_zero, idx, dist) = if dist0 < dist1 { |
800 | 0 | (found_zero0, idx0, dist0) |
801 | | } else { |
802 | 0 | (found_zero1, idx1, dist1) |
803 | | }; |
804 | | |
805 | 0 | if idx == 0 { |
806 | 0 | return y1_near_zero_fast(x); |
807 | 0 | } |
808 | | |
809 | 0 | let close_to_zero = dist.abs() < 1e-3; |
810 | | |
811 | 0 | let c = if close_to_zero { |
812 | 0 | &Y1_COEFFS[idx - 1] |
813 | | } else { |
814 | 0 | &Y1_COEFFS_REMEZ[idx - 1] |
815 | | }; |
816 | | |
817 | 0 | let r = DoubleDouble::full_add_f64(-found_zero, x); |
818 | | |
819 | | // We hit exact zero, value, better to return it directly |
820 | 0 | if dist == 0. { |
821 | 0 | return f64::from_bits(Y1_ZEROS_VALUES[idx]); |
822 | 0 | } |
823 | | |
824 | 0 | let p = f_polyeval22( |
825 | 0 | r.hi, |
826 | 0 | f64::from_bits(c[6].1), |
827 | 0 | f64::from_bits(c[7].1), |
828 | 0 | f64::from_bits(c[8].1), |
829 | 0 | f64::from_bits(c[9].1), |
830 | 0 | f64::from_bits(c[10].1), |
831 | 0 | f64::from_bits(c[11].1), |
832 | 0 | f64::from_bits(c[12].1), |
833 | 0 | f64::from_bits(c[13].1), |
834 | 0 | f64::from_bits(c[14].1), |
835 | 0 | f64::from_bits(c[15].1), |
836 | 0 | f64::from_bits(c[16].1), |
837 | 0 | f64::from_bits(c[17].1), |
838 | 0 | f64::from_bits(c[18].1), |
839 | 0 | f64::from_bits(c[19].1), |
840 | 0 | f64::from_bits(c[20].1), |
841 | 0 | f64::from_bits(c[21].1), |
842 | 0 | f64::from_bits(c[22].1), |
843 | 0 | f64::from_bits(c[23].1), |
844 | 0 | f64::from_bits(c[24].1), |
845 | 0 | f64::from_bits(c[25].1), |
846 | 0 | f64::from_bits(c[26].1), |
847 | 0 | f64::from_bits(c[27].1), |
848 | | ); |
849 | | |
850 | 0 | let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[5])); |
851 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[4])); |
852 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3])); |
853 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2])); |
854 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1])); |
855 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0])); |
856 | 0 | let p = z; |
857 | 0 | let err = f_fmla( |
858 | 0 | p.hi, |
859 | 0 | f64::from_bits(0x3c60000000000000), // 2^-57 |
860 | 0 | f64::from_bits(0x3c20000000000000), // 2^-61 |
861 | | ); |
862 | 0 | let ub = p.hi + (p.lo + err); |
863 | 0 | let lb = p.hi + (p.lo - err); |
864 | 0 | if ub == lb { |
865 | 0 | return p.to_f64(); |
866 | 0 | } |
867 | 0 | y0_small_argument_moderate(r, c) |
868 | 0 | } |
869 | | |
870 | 0 | fn y0_small_argument_moderate(r: DoubleDouble, c0: &[(u64, u64); 28]) -> f64 { |
871 | 0 | let c = &c0[15..]; |
872 | | |
873 | 0 | let p0 = f_polyeval13( |
874 | 0 | r.to_f64(), |
875 | 0 | f64::from_bits(c[0].1), |
876 | 0 | f64::from_bits(c[1].1), |
877 | 0 | f64::from_bits(c[2].1), |
878 | 0 | f64::from_bits(c[3].1), |
879 | 0 | f64::from_bits(c[4].1), |
880 | 0 | f64::from_bits(c[5].1), |
881 | 0 | f64::from_bits(c[6].1), |
882 | 0 | f64::from_bits(c[7].1), |
883 | 0 | f64::from_bits(c[8].1), |
884 | 0 | f64::from_bits(c[9].1), |
885 | 0 | f64::from_bits(c[10].1), |
886 | 0 | f64::from_bits(c[11].1), |
887 | 0 | f64::from_bits(c[12].1), |
888 | | ); |
889 | | |
890 | 0 | let c = c0; |
891 | | |
892 | 0 | let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14])); |
893 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13])); |
894 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12])); |
895 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11])); |
896 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10])); |
897 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9])); |
898 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8])); |
899 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7])); |
900 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6])); |
901 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5])); |
902 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4])); |
903 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3])); |
904 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2])); |
905 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1])); |
906 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0])); |
907 | | |
908 | 0 | let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo); |
909 | 0 | let err = f_fmla( |
910 | 0 | p.hi, |
911 | 0 | f64::from_bits(0x3c30000000000000), // 2^-60 |
912 | 0 | f64::from_bits(0x3bf0000000000000), // 2^-64 |
913 | | ); |
914 | 0 | let ub = p.hi + (p.lo + err); |
915 | 0 | let lb = p.hi + (p.lo - err); |
916 | 0 | if ub == lb { |
917 | 0 | return p.to_f64(); |
918 | 0 | } |
919 | 0 | y1_small_argument_hard(r, c) |
920 | 0 | } |
921 | | |
922 | | #[cold] |
923 | | #[inline(never)] |
924 | 0 | fn y1_small_argument_hard(r: DoubleDouble, c: &[(u64, u64); 28]) -> f64 { |
925 | | // if we're too close to zero taylor will converge faster and more accurate, |
926 | | // since remez, minimax and other almost cannot polynomial optimize near zero |
927 | 0 | let mut p = DoubleDouble::from_bit_pair(c[27]); |
928 | 0 | for i in (0..27).rev() { |
929 | 0 | p = DoubleDouble::mul_add(r, p, DoubleDouble::from_bit_pair(c[i])); |
930 | 0 | p = DoubleDouble::from_exact_add(p.hi, p.lo); |
931 | 0 | } |
932 | 0 | p.to_f64() |
933 | 0 | } |
934 | | |
935 | | /* |
936 | | Evaluates: |
937 | | Y1 = sqrt(2/(PI*x)) * beta(x) * sin(x - 3*PI/4 - alpha(x)) |
938 | | |
939 | | Discarding 1/2*PI gives: |
940 | | Y1 = sqrt(2/(PI*x)) * beta(x) * (-cos(x - PI/4 - alpha(x))) |
941 | | */ |
942 | | #[inline] |
943 | 0 | pub(crate) fn y1_asympt_fast(x: f64) -> f64 { |
944 | | const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new( |
945 | | f64::from_bits(0xbc8cbc0d30ebfd15), |
946 | | f64::from_bits(0x3fe9884533d43651), |
947 | | ); |
948 | | const MPI_OVER_4: DoubleDouble = DoubleDouble::new( |
949 | | f64::from_bits(0xbc81a62633145c07), |
950 | | f64::from_bits(0xbfe921fb54442d18), |
951 | | ); |
952 | | |
953 | 0 | let recip = if x.to_bits() > 0x7fd000000000000u64 { |
954 | 0 | DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_div_fma(4.0, x), 0.25) |
955 | | } else { |
956 | 0 | DoubleDouble::from_recip(x) |
957 | | }; |
958 | | |
959 | 0 | let alpha = bessel_1_asympt_alpha_fast(recip); |
960 | 0 | let beta = bessel_1_asympt_beta_fast(recip); |
961 | | |
962 | 0 | let AngleReduced { angle } = rem2pi_any(x); |
963 | | |
964 | | // Without full subtraction cancellation happens sometimes |
965 | 0 | let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha); |
966 | 0 | let r0 = DoubleDouble::full_dd_add(angle, x0pi34); |
967 | | |
968 | 0 | let m_cos = -cos_dd_small_fast(r0); |
969 | 0 | let z0 = DoubleDouble::quick_mult(beta, m_cos); |
970 | 0 | let r_sqrt = DoubleDouble::from_rsqrt_fast(x); |
971 | 0 | let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt); |
972 | 0 | let r = DoubleDouble::quick_mult(scale, z0); |
973 | 0 | let p = DoubleDouble::from_exact_add(r.hi, r.lo); |
974 | 0 | let err = f_fmla( |
975 | 0 | p.hi, |
976 | 0 | f64::from_bits(0x3c40000000000000), // 2^-60 |
977 | 0 | f64::from_bits(0x3bf0000000000000), // 2^-87 |
978 | | ); |
979 | 0 | let ub = p.hi + (p.lo + err); |
980 | 0 | let lb = p.hi + (p.lo - err); |
981 | 0 | if ub == lb { |
982 | 0 | return p.to_f64(); |
983 | 0 | } |
984 | 0 | y1_asympt(x, recip, r_sqrt, angle) |
985 | 0 | } |
986 | | |
987 | | /* |
988 | | Evaluates: |
989 | | Y1 = sqrt(2/(PI*x)) * beta(x) * sin(x - 3*PI/4 - alpha(x)) |
990 | | |
991 | | Discarding 1/2*PI gives: |
992 | | Y1 = sqrt(2/(PI*x)) * beta(x) * (-cos(x - PI/4 - alpha(x))) |
993 | | */ |
994 | 0 | fn y1_asympt(x: f64, recip: DoubleDouble, r_sqrt: DoubleDouble, angle: DoubleDouble) -> f64 { |
995 | | const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new( |
996 | | f64::from_bits(0xbc8cbc0d30ebfd15), |
997 | | f64::from_bits(0x3fe9884533d43651), |
998 | | ); |
999 | | const MPI_OVER_4: DoubleDouble = DoubleDouble::new( |
1000 | | f64::from_bits(0xbc81a62633145c07), |
1001 | | f64::from_bits(0xbfe921fb54442d18), |
1002 | | ); |
1003 | | |
1004 | 0 | let alpha = bessel_1_asympt_alpha(recip); |
1005 | 0 | let beta = bessel_1_asympt_beta(recip); |
1006 | | |
1007 | | // Without full subtraction cancellation happens sometimes |
1008 | 0 | let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha); |
1009 | 0 | let r0 = DoubleDouble::full_dd_add(angle, x0pi34); |
1010 | | |
1011 | 0 | let m_cos = -cos_dd_small(r0); |
1012 | 0 | let z0 = DoubleDouble::quick_mult(beta, m_cos); |
1013 | 0 | let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt); |
1014 | 0 | let r = DoubleDouble::quick_mult(scale, z0); |
1015 | 0 | let p = DoubleDouble::from_exact_add(r.hi, r.lo); |
1016 | 0 | let err = f_fmla( |
1017 | 0 | p.hi, |
1018 | 0 | f64::from_bits(0x3c30000000000000), // 2^-60 |
1019 | 0 | f64::from_bits(0x3a80000000000000), // 2^-87 |
1020 | | ); |
1021 | 0 | let ub = p.hi + (p.lo + err); |
1022 | 0 | let lb = p.hi + (p.lo - err); |
1023 | 0 | if ub == lb { |
1024 | 0 | return p.to_f64(); |
1025 | 0 | } |
1026 | 0 | y1_asympt_hard(x) |
1027 | 0 | } |
1028 | | |
1029 | | /* |
1030 | | Evaluates: |
1031 | | Y1 = sqrt(2/(PI*x)) * beta(x) * sin(x - 3*PI/4 - alpha(x)) |
1032 | | |
1033 | | Discarding 1/2*PI gives: |
1034 | | Y1 = sqrt(2/(PI*x)) * beta(x) * (-cos(x - PI/4 - alpha(x))) |
1035 | | */ |
1036 | | #[cold] |
1037 | | #[inline(never)] |
1038 | 0 | fn y1_asympt_hard(x: f64) -> f64 { |
1039 | | const SQRT_2_OVER_PI: DyadicFloat128 = DyadicFloat128 { |
1040 | | sign: DyadicSign::Pos, |
1041 | | exponent: -128, |
1042 | | mantissa: 0xcc42299e_a1b28468_7e59e280_5d5c7180_u128, |
1043 | | }; |
1044 | | |
1045 | | const MPI_OVER_4: DyadicFloat128 = DyadicFloat128 { |
1046 | | sign: DyadicSign::Neg, |
1047 | | exponent: -128, |
1048 | | mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128, |
1049 | | }; |
1050 | | |
1051 | 0 | let x_dyadic = DyadicFloat128::new_from_f64(x); |
1052 | 0 | let recip = DyadicFloat128::accurate_reciprocal(x); |
1053 | | |
1054 | 0 | let alpha = bessel_1_asympt_alpha_hard(recip); |
1055 | 0 | let beta = bessel_1_asympt_beta_hard(recip); |
1056 | | |
1057 | 0 | let angle = rem2pi_f128(x_dyadic); |
1058 | | |
1059 | 0 | let x0pi34 = MPI_OVER_4 - alpha; |
1060 | 0 | let r0 = angle + x0pi34; |
1061 | | |
1062 | 0 | let m_cos = cos_f128_small(r0).negated(); |
1063 | | |
1064 | 0 | let z0 = beta * m_cos; |
1065 | 0 | let r_sqrt = bessel_rsqrt_hard(x, recip); |
1066 | 0 | let scale = SQRT_2_OVER_PI * r_sqrt; |
1067 | 0 | let p = scale * z0; |
1068 | 0 | p.fast_as_f64() |
1069 | 0 | } |
1070 | | |
1071 | | #[cfg(test)] |
1072 | | mod tests { |
1073 | | use super::*; |
1074 | | |
1075 | | #[test] |
1076 | | fn test_y1() { |
1077 | | // ULP should be less than 0.500001, but it was 0.5089558379720955, on 2.1957931471395398 result -0.0007023285780874727, using f_y1 and MPFR -0.0007023285780874729 |
1078 | | assert_eq!(f_y1(0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007291282546733975), |
1079 | | -873124540555277200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.); |
1080 | | assert_eq!(f_y1(2.1957931471395398), -0.0007023285780874727); |
1081 | | assert_eq!( |
1082 | | f_y1(f64::from_bits(0x571a31ffe2ff7e9fu64)), |
1083 | | f64::from_bits(0x32e58532f95056ffu64) |
1084 | | ); |
1085 | | assert_eq!( |
1086 | | f_y1(f64::from_bits(0x400193bed4dff243)), |
1087 | | 0.00000000000000002513306678922122 |
1088 | | ); |
1089 | | assert_eq!( |
1090 | | f_y1(f64::from_bits(0x3ffc513c569fe78e)), |
1091 | | -0.24189760895998239 |
1092 | | ); |
1093 | | assert_eq!( |
1094 | | f_y1(f64::from_bits(0x4192391e4c8faa60)), |
1095 | | -0.000000000000000002572292246748134 |
1096 | | ); |
1097 | | assert_eq!( |
1098 | | f_y1(f64::from_bits(0x403e9e480605283c)), |
1099 | | -0.00000000000000001524456280251315 |
1100 | | ); |
1101 | | assert_eq!( |
1102 | | f_y1(f64::from_bits(0x40277f9138d43206)), |
1103 | | 0.000000000000000006849807120770496 |
1104 | | ); |
1105 | | assert_eq!(f_y1(f64::INFINITY), 0.); |
1106 | | assert!(f_y1(f64::NEG_INFINITY).is_nan()); |
1107 | | assert!(f_y1(f64::NAN).is_nan()); |
1108 | | } |
1109 | | |
1110 | | #[test] |
1111 | | fn test_y1_edge_cases() { |
1112 | | assert_eq!(f_y1(2.1904620854463985), -0.0034837351616785234); |
1113 | | assert_eq!(f_y1(2.197142201034536), 4.5568985277260593e-7); |
1114 | | assert_eq!(f_y1(1.4000000000000004), -0.4791469742327998); |
1115 | | assert_eq!(f_y1(2.0002288794493848), -0.1069033735586767); |
1116 | | } |
1117 | | } |