/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.26/src/bessel/y0.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::bessel::alpha0::{ |
30 | | bessel_0_asympt_alpha, bessel_0_asympt_alpha_fast, bessel_0_asympt_alpha_hard, |
31 | | }; |
32 | | use crate::bessel::beta0::{ |
33 | | bessel_0_asympt_beta, bessel_0_asympt_beta_fast, bessel_0_asympt_beta_hard, |
34 | | }; |
35 | | use crate::bessel::i0::bessel_rsqrt_hard; |
36 | | use crate::bessel::j0::j0_maclaurin_series; |
37 | | use crate::bessel::y0_coeffs::Y0_COEFFS; |
38 | | use crate::bessel::y0_coeffs_taylor::Y0_COEFFS_TAYLOR; |
39 | | use crate::bessel::y0f_coeffs::{Y0_ZEROS, Y0_ZEROS_VALUES}; |
40 | | use crate::common::f_fmla; |
41 | | use crate::double_double::DoubleDouble; |
42 | | use crate::dyadic_float::{DyadicFloat128, DyadicSign}; |
43 | | use crate::logs::log_dd_fast; |
44 | | use crate::polyeval::{f_polyeval12, f_polyeval13, f_polyeval15, f_polyeval22, f_polyeval24}; |
45 | | use crate::rounding::CpuCeil; |
46 | | use crate::sin_helper::{sin_dd_small, sin_dd_small_fast, sin_f128_small}; |
47 | | use crate::sincos_reduce::{AngleReduced, rem2pi_any, rem2pi_f128}; |
48 | | |
49 | | /// Bessel of the second kind of order 0 (Y0) |
50 | 0 | pub fn f_y0(x: f64) -> f64 { |
51 | 0 | let ix = x.to_bits(); |
52 | | |
53 | 0 | if ix >= 0x7ffu64 << 52 || ix == 0 { |
54 | | // |x| == NaN, x == inf, |x| == 0, x < 0 |
55 | 0 | if ix.wrapping_shl(1) == 0 { |
56 | | // |x| == 0 |
57 | 0 | return f64::NEG_INFINITY; |
58 | 0 | } |
59 | | |
60 | 0 | if x.is_infinite() { |
61 | 0 | if x.is_sign_negative() { |
62 | 0 | return f64::NAN; |
63 | 0 | } |
64 | 0 | return 0.; |
65 | 0 | } |
66 | 0 | return x + f64::NAN; // x == NaN |
67 | 0 | } |
68 | | |
69 | 0 | let xb = x.to_bits(); |
70 | | |
71 | 0 | if xb <= 0x4052d9999999999au64 { |
72 | | // x <= 75.4 |
73 | 0 | if xb <= 0x4000000000000000u64 { |
74 | | // x <= 2 |
75 | 0 | if xb <= 0x3ff599999999999au64 { |
76 | | // x < 1.35 |
77 | 0 | return y0_near_zero_fast(x); |
78 | 0 | } |
79 | | // transient zone from 1.46 to 2 have bad behavior for log poly already, |
80 | | // and not yet good to be easily covered, thus it use its own poly |
81 | 0 | return y0_transient_area_fast(x); |
82 | 0 | } |
83 | 0 | return y0_small_argument_fast(x); |
84 | 0 | } |
85 | | |
86 | 0 | y0_asympt_fast(x) |
87 | 0 | } |
88 | | |
89 | | /** |
90 | | Generated by SageMath: |
91 | | Evaluates: |
92 | | Y0(x) = 2/pi*(euler_gamma + log(x/2))*J0(x) - sum((-1)^m*(x/2)^(2*m)/(m!)^2*sum(1+1/2 + ... 1/m)) |
93 | | expressed as: |
94 | | Y0(x)=log(x)*W0(x) - Z0(x) |
95 | | ```python |
96 | | from sage.all import * |
97 | | |
98 | | R = LaurentSeriesRing(RealField(300), 'x',default_prec=300) |
99 | | x = R.gen() |
100 | | N = 10 # Number of terms (adjust as needed) |
101 | | gamma = RealField(300)(euler_gamma) |
102 | | d2 = RealField(300)(2) |
103 | | pi = RealField(300).pi() |
104 | | |
105 | | # Define J0(x) Taylor expansion at x = 0 |
106 | | def j_series(n, x): |
107 | | return sum([(-1)**m * (x/2)**(ZZ(n) + ZZ(2)*ZZ(m)) / (ZZ(m).factorial() * (ZZ(m) + ZZ(n)).factorial()) for m in range(N)]) |
108 | | |
109 | | J0_series = j_series(0, x) |
110 | | |
111 | | def z_series(x): |
112 | | return sum([(-1)**m * (x/2)**(ZZ(2)*ZZ(m)) / ZZ(m).factorial()**ZZ(2) * sum(RealField(300)(1)/RealField(300)(k) for k in range(1, m+1)) for m in range(1, N)]) |
113 | | |
114 | | W0 = (d2/pi) * J0_series |
115 | | Z0 = -gamma * (d2/pi) * J0_series + RealField(300)(2).log() * (d2/pi) * J0_series + (d2/pi) * z_series(x) |
116 | | |
117 | | # see the series |
118 | | print(W0) |
119 | | print(Z0) |
120 | | ``` |
121 | | **/ |
122 | | #[inline] |
123 | 0 | fn y0_near_zero_fast(x: f64) -> f64 { |
124 | | const W: [(u64, u64); 15] = [ |
125 | | (0xbc86b01ec5417056, 0x3fe45f306dc9c883), |
126 | | (0x3c66b01ec5417056, 0xbfc45f306dc9c883), |
127 | | (0xbc26b01ec5417056, 0x3f845f306dc9c883), |
128 | | (0xbbd67fe4a5feb897, 0xbf321bb945252402), |
129 | | (0x3b767fe4a5feb897, 0x3ed21bb945252402), |
130 | | (0xbaf5c2495706f745, 0xbe672db9f21b0f5f), |
131 | | (0x3a90c8209874dfad, 0x3df49a6c656d62ff), |
132 | | (0x3a12921e91b07dd0, 0xbd7ae90af76a4d0f), |
133 | | (0xb992921e91b07dd0, 0x3cfae90af76a4d0f), |
134 | | (0x39089b0d8a9228ca, 0xbc754331c053fdad), |
135 | | (0x3878d321ddfd3c6e, 0x3beb3749ebf0a0dd), |
136 | | (0x37e77548130d809b, 0xbb5cca5ae46eae67), |
137 | | (0xb73a848e7ca1c943, 0x3ac9976d3cd4293f), |
138 | | (0xb6c884706195a054, 0xba336206ff1ce731), |
139 | | (0xb6387a7d2389630d, 0x39995103e9f1818f), |
140 | | ]; |
141 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
142 | | |
143 | 0 | let w0 = f_polyeval12( |
144 | 0 | x2.hi, |
145 | 0 | f64::from_bits(W[3].1), |
146 | 0 | f64::from_bits(W[4].1), |
147 | 0 | f64::from_bits(W[5].1), |
148 | 0 | f64::from_bits(W[6].1), |
149 | 0 | f64::from_bits(W[7].1), |
150 | 0 | f64::from_bits(W[8].1), |
151 | 0 | f64::from_bits(W[9].1), |
152 | 0 | f64::from_bits(W[10].1), |
153 | 0 | f64::from_bits(W[11].1), |
154 | 0 | f64::from_bits(W[12].1), |
155 | 0 | f64::from_bits(W[13].1), |
156 | 0 | f64::from_bits(W[14].1), |
157 | | ); |
158 | | |
159 | 0 | let mut w = DoubleDouble::mul_f64_add(x2, w0, DoubleDouble::from_bit_pair(W[2])); |
160 | 0 | w = DoubleDouble::mul_add(x2, w, DoubleDouble::from_bit_pair(W[1])); |
161 | 0 | w = DoubleDouble::mul_add(x2, w, DoubleDouble::from_bit_pair(W[0])); |
162 | | |
163 | | const Z: [(u64, u64); 15] = [ |
164 | | (0xbc5ddfd831a70821, 0x3fb2e4d699cbd01f), |
165 | | (0xbc6d93e63489aea6, 0xbfc6bbcb41034286), |
166 | | (0xbc1b88525c2e130b, 0x3f9075b1bbf41364), |
167 | | (0x3be097334e26e578, 0xbf41a6206b7b973d), |
168 | | (0x3b51c64a34c78cda, 0x3ee3e99794203bbd), |
169 | | (0xbb1c407b0f5b2805, 0xbe7bce4a600d3ea4), |
170 | | (0xbaa57d1e1e88c9ca, 0x3e0a6ee796b871b6), |
171 | | (0x3a3b6e7030a77899, 0xbd92393d82c6b2e4), |
172 | | (0x397fcfedacb03781, 0x3d131085da82054c), |
173 | | (0xb8e45f51f6118e46, 0xbc8f4ed4b492ebcc), |
174 | | (0xb89bd46046c3c8de, 0x3c04b7ac8a1b15d0), |
175 | | (0x37d1a206fb205e32, 0xbb769201941d0d49), |
176 | | (0x3782f38acbf23993, 0x3ae4987e587ab039), |
177 | | (0x36b691bdabf5672b, 0xba4ff1953e0a7c5b), |
178 | | (0x3636e1c8cd260e18, 0x39b55031dc5e1967), |
179 | | ]; |
180 | 0 | let z0 = f_polyeval12( |
181 | 0 | x2.hi, |
182 | 0 | f64::from_bits(Z[3].1), |
183 | 0 | f64::from_bits(Z[4].1), |
184 | 0 | f64::from_bits(Z[5].1), |
185 | 0 | f64::from_bits(Z[6].1), |
186 | 0 | f64::from_bits(Z[7].1), |
187 | 0 | f64::from_bits(Z[8].1), |
188 | 0 | f64::from_bits(Z[9].1), |
189 | 0 | f64::from_bits(Z[10].1), |
190 | 0 | f64::from_bits(Z[11].1), |
191 | 0 | f64::from_bits(Z[12].1), |
192 | 0 | f64::from_bits(Z[13].1), |
193 | 0 | f64::from_bits(Z[14].1), |
194 | | ); |
195 | | |
196 | 0 | let mut z = DoubleDouble::mul_f64_add(x2, z0, DoubleDouble::from_bit_pair(Z[2])); |
197 | 0 | z = DoubleDouble::mul_add(x2, z, DoubleDouble::from_bit_pair(Z[1])); |
198 | 0 | z = DoubleDouble::mul_add(x2, z, DoubleDouble::from_bit_pair(Z[0])); |
199 | 0 | let w_log = log_dd_fast(x); |
200 | 0 | let p = DoubleDouble::mul_add(w, w_log, -z); |
201 | 0 | let err = f_fmla( |
202 | 0 | p.hi, |
203 | 0 | f64::from_bits(0x3c50000000000000), // 2^-58 |
204 | 0 | f64::from_bits(0x3c30000000000000), // 2^-60 |
205 | | ); |
206 | 0 | let ub = p.hi + (p.lo + err); |
207 | 0 | let lb = p.hi + (p.lo - err); |
208 | 0 | if ub == lb { |
209 | 0 | return p.to_f64(); |
210 | 0 | } |
211 | 0 | y0_near_zero(x, w_log) |
212 | 0 | } |
213 | | |
214 | | /** |
215 | | Generated by SageMath: |
216 | | Evaluates: |
217 | | Y0(x) = 2/pi*(euler_gamma + log(x/2))*J0(x) - sum((-1)^m*(x/2)^(2*m)/(m!)^2*sum(1+1/2 + ... 1/m)) |
218 | | expressed as: |
219 | | Y0(x)=log(x)*W0(x) - Z0(x) |
220 | | ```python |
221 | | from sage.all import * |
222 | | |
223 | | R = LaurentSeriesRing(RealField(300), 'x',default_prec=300) |
224 | | x = R.gen() |
225 | | N = 10 # Number of terms (adjust as needed) |
226 | | gamma = RealField(300)(euler_gamma) |
227 | | d2 = RealField(300)(2) |
228 | | pi = RealField(300).pi() |
229 | | |
230 | | # Define J0(x) Taylor expansion at x = 0 |
231 | | def j_series(n, x): |
232 | | return sum([(-1)**m * (x/2)**(ZZ(n) + ZZ(2)*ZZ(m)) / (ZZ(m).factorial() * (ZZ(m) + ZZ(n)).factorial()) for m in range(N)]) |
233 | | |
234 | | J0_series = j_series(0, x) |
235 | | |
236 | | def z_series(x): |
237 | | return sum([(-1)**m * (x/2)**(ZZ(2)*ZZ(m)) / ZZ(m).factorial()**ZZ(2) * sum(RealField(300)(1)/RealField(300)(k) for k in range(1, m+1)) for m in range(1, N)]) |
238 | | |
239 | | W0 = (d2/pi) * J0_series |
240 | | Z0 = -gamma * (d2/pi) * J0_series + RealField(300)(2).log() * (d2/pi) * J0_series + (d2/pi) * z_series(x) |
241 | | |
242 | | # see the series |
243 | | print(W0) |
244 | | print(Z0) |
245 | | ``` |
246 | | **/ |
247 | | #[cold] |
248 | | #[inline(never)] |
249 | 0 | fn y0_near_zero(x: f64, w_log: DoubleDouble) -> f64 { |
250 | | const W: [(u64, u64); 15] = [ |
251 | | (0xbc86b01ec5417056, 0x3fe45f306dc9c883), |
252 | | (0x3c66b01ec5417056, 0xbfc45f306dc9c883), |
253 | | (0xbc26b01ec5417056, 0x3f845f306dc9c883), |
254 | | (0xbbd67fe4a5feb897, 0xbf321bb945252402), |
255 | | (0x3b767fe4a5feb897, 0x3ed21bb945252402), |
256 | | (0xbaf5c2495706f745, 0xbe672db9f21b0f5f), |
257 | | (0x3a90c8209874dfad, 0x3df49a6c656d62ff), |
258 | | (0x3a12921e91b07dd0, 0xbd7ae90af76a4d0f), |
259 | | (0xb992921e91b07dd0, 0x3cfae90af76a4d0f), |
260 | | (0x39089b0d8a9228ca, 0xbc754331c053fdad), |
261 | | (0x3878d321ddfd3c6e, 0x3beb3749ebf0a0dd), |
262 | | (0x37e77548130d809b, 0xbb5cca5ae46eae67), |
263 | | (0xb73a848e7ca1c943, 0x3ac9976d3cd4293f), |
264 | | (0xb6c884706195a054, 0xba336206ff1ce731), |
265 | | (0xb6387a7d2389630d, 0x39995103e9f1818f), |
266 | | ]; |
267 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
268 | 0 | let w = f_polyeval15( |
269 | 0 | x2, |
270 | 0 | DoubleDouble::from_bit_pair(W[0]), |
271 | 0 | DoubleDouble::from_bit_pair(W[1]), |
272 | 0 | DoubleDouble::from_bit_pair(W[2]), |
273 | 0 | DoubleDouble::from_bit_pair(W[3]), |
274 | 0 | DoubleDouble::from_bit_pair(W[4]), |
275 | 0 | DoubleDouble::from_bit_pair(W[5]), |
276 | 0 | DoubleDouble::from_bit_pair(W[6]), |
277 | 0 | DoubleDouble::from_bit_pair(W[7]), |
278 | 0 | DoubleDouble::from_bit_pair(W[8]), |
279 | 0 | DoubleDouble::from_bit_pair(W[9]), |
280 | 0 | DoubleDouble::from_bit_pair(W[10]), |
281 | 0 | DoubleDouble::from_bit_pair(W[11]), |
282 | 0 | DoubleDouble::from_bit_pair(W[12]), |
283 | 0 | DoubleDouble::from_bit_pair(W[13]), |
284 | 0 | DoubleDouble::from_bit_pair(W[14]), |
285 | | ); |
286 | | |
287 | | const Z: [(u64, u64); 15] = [ |
288 | | (0xbc5ddfd831a70821, 0x3fb2e4d699cbd01f), |
289 | | (0xbc6d93e63489aea6, 0xbfc6bbcb41034286), |
290 | | (0xbc1b88525c2e130b, 0x3f9075b1bbf41364), |
291 | | (0x3be097334e26e578, 0xbf41a6206b7b973d), |
292 | | (0x3b51c64a34c78cda, 0x3ee3e99794203bbd), |
293 | | (0xbb1c407b0f5b2805, 0xbe7bce4a600d3ea4), |
294 | | (0xbaa57d1e1e88c9ca, 0x3e0a6ee796b871b6), |
295 | | (0x3a3b6e7030a77899, 0xbd92393d82c6b2e4), |
296 | | (0x397fcfedacb03781, 0x3d131085da82054c), |
297 | | (0xb8e45f51f6118e46, 0xbc8f4ed4b492ebcc), |
298 | | (0xb89bd46046c3c8de, 0x3c04b7ac8a1b15d0), |
299 | | (0x37d1a206fb205e32, 0xbb769201941d0d49), |
300 | | (0x3782f38acbf23993, 0x3ae4987e587ab039), |
301 | | (0x36b691bdabf5672b, 0xba4ff1953e0a7c5b), |
302 | | (0x3636e1c8cd260e18, 0x39b55031dc5e1967), |
303 | | ]; |
304 | 0 | let z = f_polyeval15( |
305 | 0 | x2, |
306 | 0 | DoubleDouble::from_bit_pair(Z[0]), |
307 | 0 | DoubleDouble::from_bit_pair(Z[1]), |
308 | 0 | DoubleDouble::from_bit_pair(Z[2]), |
309 | 0 | DoubleDouble::from_bit_pair(Z[3]), |
310 | 0 | DoubleDouble::from_bit_pair(Z[4]), |
311 | 0 | DoubleDouble::from_bit_pair(Z[5]), |
312 | 0 | DoubleDouble::from_bit_pair(Z[6]), |
313 | 0 | DoubleDouble::from_bit_pair(Z[7]), |
314 | 0 | DoubleDouble::from_bit_pair(Z[8]), |
315 | 0 | DoubleDouble::from_bit_pair(Z[9]), |
316 | 0 | DoubleDouble::from_bit_pair(Z[10]), |
317 | 0 | DoubleDouble::from_bit_pair(Z[11]), |
318 | 0 | DoubleDouble::from_bit_pair(Z[12]), |
319 | 0 | DoubleDouble::from_bit_pair(Z[13]), |
320 | 0 | DoubleDouble::from_bit_pair(Z[14]), |
321 | | ); |
322 | 0 | DoubleDouble::mul_add(w, w_log, -z).to_f64() |
323 | 0 | } |
324 | | |
325 | | /** |
326 | | Path for transient area between 1.35 to 2. |
327 | | **/ |
328 | | #[inline] |
329 | 0 | pub(crate) fn y0_transient_area_fast(x: f64) -> f64 { |
330 | | /** |
331 | | Polynomial generated by Wolfram: |
332 | | ```text |
333 | | <<FunctionApproximations` |
334 | | ClearAll["Global`*"] |
335 | | f[x_]:= BesselY[0,x + 2.1971413260310170351490335626990] |
336 | | {approx,error} = MiniMaxApproximation[f[x],{x,{1.35 - 2.1971413260310170351490335626990, 2 - 2.1971413260310170351490335626990 },27,0},WorkingPrecision->120] |
337 | | poly=error[[1]]; |
338 | | coeffs=CoefficientList[poly,x]; |
339 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
340 | | ``` |
341 | | **/ |
342 | | const C: [(u64, u64); 28] = [ |
343 | | (0xbc689e111675434b, 0x3fe0aa48442f014b), |
344 | | (0x396ffb11562e8c70, 0x3cc1806f07aceb3c), |
345 | | (0xbc6156edff56513d, 0xbfd0aa48442f0030), |
346 | | (0xbc278dbff5ee7db4, 0x3fa439fac165db2d), |
347 | | (0x3c1f9463c2023663, 0x3f80d2af4ebc8fc4), |
348 | | (0xbbee09e5733a1236, 0x3f4f716488aebd9c), |
349 | | (0xbbf261a1f255ddf4, 0xbf5444bd2a7e6254), |
350 | | (0x3bd4f543544f1fe7, 0x3f384c300e8674d8), |
351 | | (0xbb96ef8d95fe049b, 0xbf217a0fc83af41e), |
352 | | (0x3ba2be82573ae98d, 0x3f0dbc664048e495), |
353 | | (0xbb942b15646c85f2, 0xbef8522f83e4a3e3), |
354 | | (0x3b7a127725ba4606, 0x3ee775c010ce4146), |
355 | | (0x3ae4a02f0b2a18e2, 0x3e7d1d7cf40f9697), |
356 | | (0x3b8fcf1a3d27236b, 0x3eea9c226c21712d), |
357 | | (0xbb70b3aa0a1e9ffb, 0x3ef8f237831ec74b), |
358 | | (0x3baa6c24261245f3, 0x3f08ebfea3ea469e), |
359 | | (0x3bb5fa1b8c4587c4, 0x3f1474022b90cbda), |
360 | | (0x3b69545db8d098d1, 0x3f1d153dc04c51c0), |
361 | | (0x3bc68eab6520d21b, 0x3f2198a4578cb6ca), |
362 | | (0xbbc255734bc49c8b, 0x3f2212febf7cecdd), |
363 | | (0x3bb8dd02722339f5, 0x3f1f314deec17049), |
364 | | (0x3bbb6ef8f04b26a2, 0x3f1657d051699088), |
365 | | (0x3b878233fbf501dc, 0x3f0a1a422dafcef6), |
366 | | (0xbb73730138d1dbc2, 0x3ef8423cd021f1dd), |
367 | | (0x3b7e2a0d7009d709, 0x3ee145cae37afe1b), |
368 | | (0x3b6af21aeaba4e57, 0x3ec1bc74380f6d7b), |
369 | | (0xbb3607fb9242657f, 0x3e977341fc10cdc8), |
370 | | (0xbac747923880f651, 0x3e5e30218bc1fee3), |
371 | | ]; |
372 | | |
373 | | const ZERO: DoubleDouble = |
374 | | DoubleDouble::from_bit_pair((0xbc8bd1e50d219bfd, 0x400193bed4dff243)); |
375 | | |
376 | 0 | let r = DoubleDouble::full_add_f64(-ZERO, x); |
377 | | |
378 | 0 | let p0 = f_polyeval24( |
379 | 0 | r.to_f64(), |
380 | 0 | f64::from_bits(C[4].1), |
381 | 0 | f64::from_bits(C[5].1), |
382 | 0 | f64::from_bits(C[6].1), |
383 | 0 | f64::from_bits(C[7].1), |
384 | 0 | f64::from_bits(C[8].1), |
385 | 0 | f64::from_bits(C[9].1), |
386 | 0 | f64::from_bits(C[10].1), |
387 | 0 | f64::from_bits(C[11].1), |
388 | 0 | f64::from_bits(C[12].1), |
389 | 0 | f64::from_bits(C[13].1), |
390 | 0 | f64::from_bits(C[14].1), |
391 | 0 | f64::from_bits(C[15].1), |
392 | 0 | f64::from_bits(C[16].1), |
393 | 0 | f64::from_bits(C[17].1), |
394 | 0 | f64::from_bits(C[18].1), |
395 | 0 | f64::from_bits(C[19].1), |
396 | 0 | f64::from_bits(C[20].1), |
397 | 0 | f64::from_bits(C[21].1), |
398 | 0 | f64::from_bits(C[22].1), |
399 | 0 | f64::from_bits(C[23].1), |
400 | 0 | f64::from_bits(C[24].1), |
401 | 0 | f64::from_bits(C[25].1), |
402 | 0 | f64::from_bits(C[26].1), |
403 | 0 | f64::from_bits(C[27].1), |
404 | | ); |
405 | | |
406 | 0 | let mut p = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(C[3])); |
407 | 0 | p = DoubleDouble::mul_add(p, r, DoubleDouble::from_bit_pair(C[2])); |
408 | 0 | p = DoubleDouble::mul_add(p, r, DoubleDouble::from_bit_pair(C[1])); |
409 | 0 | p = DoubleDouble::mul_add(p, r, DoubleDouble::from_bit_pair(C[0])); |
410 | | |
411 | 0 | let err = f_fmla( |
412 | 0 | p.hi, |
413 | 0 | f64::from_bits(0x3c50000000000000), // 2^-58 |
414 | 0 | f64::from_bits(0x3b10000000000000), // 2^-78 |
415 | | ); |
416 | 0 | let ub = p.hi + (p.lo + err); |
417 | 0 | let lb = p.hi + (p.lo - err); |
418 | 0 | if ub != lb { |
419 | 0 | return y0_transient_area_moderate(x); |
420 | 0 | } |
421 | 0 | p.to_f64() |
422 | 0 | } |
423 | | |
424 | | /** |
425 | | Path for transient area between 1.35 to 2. |
426 | | **/ |
427 | 0 | fn y0_transient_area_moderate(x: f64) -> f64 { |
428 | | /** |
429 | | Polynomial generated by Wolfram: |
430 | | ```text |
431 | | <<FunctionApproximations` |
432 | | ClearAll["Global`*"] |
433 | | f[x_]:= BesselY[0,x + 2.1971413260310170351490335626990] |
434 | | {approx,error} = MiniMaxApproximation[f[x],{x,{1.35 - 2.1971413260310170351490335626990, 2- 2.1971413260310170351490335626990 },27,0},WorkingPrecision->120] |
435 | | poly=error[[1]]; |
436 | | coeffs=CoefficientList[poly,x]; |
437 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
438 | | ``` |
439 | | **/ |
440 | | const C: [(u64, u64); 28] = [ |
441 | | (0xbc689e111675434b, 0x3fe0aa48442f014b), |
442 | | (0x396ffb11562e8c70, 0x3cc1806f07aceb3c), |
443 | | (0xbc6156edff56513d, 0xbfd0aa48442f0030), |
444 | | (0xbc278dbff5ee7db4, 0x3fa439fac165db2d), |
445 | | (0x3c1f9463c2023663, 0x3f80d2af4ebc8fc4), |
446 | | (0xbbee09e5733a1236, 0x3f4f716488aebd9c), |
447 | | (0xbbf261a1f255ddf4, 0xbf5444bd2a7e6254), |
448 | | (0x3bd4f543544f1fe7, 0x3f384c300e8674d8), |
449 | | (0xbb96ef8d95fe049b, 0xbf217a0fc83af41e), |
450 | | (0x3ba2be82573ae98d, 0x3f0dbc664048e495), |
451 | | (0xbb942b15646c85f2, 0xbef8522f83e4a3e3), |
452 | | (0x3b7a127725ba4606, 0x3ee775c010ce4146), |
453 | | (0x3ae4a02f0b2a18e2, 0x3e7d1d7cf40f9697), |
454 | | (0x3b8fcf1a3d27236b, 0x3eea9c226c21712d), |
455 | | (0xbb70b3aa0a1e9ffb, 0x3ef8f237831ec74b), |
456 | | (0x3baa6c24261245f3, 0x3f08ebfea3ea469e), |
457 | | (0x3bb5fa1b8c4587c4, 0x3f1474022b90cbda), |
458 | | (0x3b69545db8d098d1, 0x3f1d153dc04c51c0), |
459 | | (0x3bc68eab6520d21b, 0x3f2198a4578cb6ca), |
460 | | (0xbbc255734bc49c8b, 0x3f2212febf7cecdd), |
461 | | (0x3bb8dd02722339f5, 0x3f1f314deec17049), |
462 | | (0x3bbb6ef8f04b26a2, 0x3f1657d051699088), |
463 | | (0x3b878233fbf501dc, 0x3f0a1a422dafcef6), |
464 | | (0xbb73730138d1dbc2, 0x3ef8423cd021f1dd), |
465 | | (0x3b7e2a0d7009d709, 0x3ee145cae37afe1b), |
466 | | (0x3b6af21aeaba4e57, 0x3ec1bc74380f6d7b), |
467 | | (0xbb3607fb9242657f, 0x3e977341fc10cdc8), |
468 | | (0xbac747923880f651, 0x3e5e30218bc1fee3), |
469 | | ]; |
470 | | |
471 | | const ZERO: DoubleDouble = |
472 | | DoubleDouble::from_bit_pair((0xbc8bd1e50d219bfd, 0x400193bed4dff243)); |
473 | | |
474 | 0 | let mut r = DoubleDouble::full_add_f64(-ZERO, x); |
475 | 0 | r = DoubleDouble::from_exact_add(r.hi, r.lo); |
476 | | |
477 | 0 | let p0 = f_polyeval13( |
478 | 0 | r.to_f64(), |
479 | 0 | f64::from_bits(C[15].1), |
480 | 0 | f64::from_bits(C[16].1), |
481 | 0 | f64::from_bits(C[17].1), |
482 | 0 | f64::from_bits(C[18].1), |
483 | 0 | f64::from_bits(C[19].1), |
484 | 0 | f64::from_bits(C[20].1), |
485 | 0 | f64::from_bits(C[21].1), |
486 | 0 | f64::from_bits(C[22].1), |
487 | 0 | f64::from_bits(C[23].1), |
488 | 0 | f64::from_bits(C[24].1), |
489 | 0 | f64::from_bits(C[25].1), |
490 | 0 | f64::from_bits(C[26].1), |
491 | 0 | f64::from_bits(C[27].1), |
492 | | ); |
493 | | |
494 | 0 | let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(C[14])); |
495 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[13])); |
496 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[12])); |
497 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[11])); |
498 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[10])); |
499 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[9])); |
500 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[8])); |
501 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[7])); |
502 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[6])); |
503 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[5])); |
504 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[4])); |
505 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[3])); |
506 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[2])); |
507 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[1])); |
508 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(C[0])); |
509 | | |
510 | 0 | let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo); |
511 | 0 | let err = f_fmla( |
512 | 0 | p.hi, |
513 | 0 | f64::from_bits(0x3c10000000000000), // 2^-62 |
514 | 0 | f64::from_bits(0x3a30000000000000), // 2^-91 |
515 | | ); |
516 | 0 | let ub = p.hi + (p.lo + err); |
517 | 0 | let lb = p.hi + (p.lo - err); |
518 | 0 | if ub != lb { |
519 | 0 | return y0_transient_area_hard(x); |
520 | 0 | } |
521 | 0 | p.to_f64() |
522 | 0 | } |
523 | | |
524 | | #[cold] |
525 | | #[inline(never)] |
526 | 0 | fn y0_transient_area_hard(x: f64) -> f64 { |
527 | | const ZERO: DyadicFloat128 = DyadicFloat128 { |
528 | | sign: DyadicSign::Pos, |
529 | | exponent: -126, |
530 | | mantissa: 0x8c9df6a6_ff921721_70d796f3_2017e155_u128, |
531 | | }; |
532 | 0 | let r = DyadicFloat128::new_from_f64(x) - ZERO; |
533 | | /* |
534 | | Polynomial generated by Wolfram: |
535 | | ```text |
536 | | <<FunctionApproximations` |
537 | | ClearAll["Global`*"] |
538 | | f[x_]:= BesselY[0,x + 2.1971413260310170351490335626990] |
539 | | {approx,error} = MiniMaxApproximation[f[x],{x,{1.35 - 2.1971413260310170351490335626990, 2- 2.1971413260310170351490335626990 },27,0},WorkingPrecision->120] |
540 | | poly=error[[1]]; |
541 | | coeffs=CoefficientList[poly,x]; |
542 | | TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]] |
543 | | ``` |
544 | | */ |
545 | | static C: [DyadicFloat128; 28] = [ |
546 | | DyadicFloat128 { |
547 | | sign: DyadicSign::Pos, |
548 | | exponent: -128, |
549 | | mantissa: 0x85524221_780a573b_0f774c55_e5a946dc_u128, |
550 | | }, |
551 | | DyadicFloat128 { |
552 | | sign: DyadicSign::Pos, |
553 | | exponent: -178, |
554 | | mantissa: 0x8c03783d_6759e3ff_622ac5d1_8df27811_u128, |
555 | | }, |
556 | | DyadicFloat128 { |
557 | | sign: DyadicSign::Neg, |
558 | | exponent: -129, |
559 | | mantissa: 0x85524221_78018115_6edff565_13cf55ab_u128, |
560 | | }, |
561 | | DyadicFloat128 { |
562 | | sign: DyadicSign::Pos, |
563 | | exponent: -132, |
564 | | mantissa: 0xa1cfd60b_2ed96743_9200508c_125cf3d8_u128, |
565 | | }, |
566 | | DyadicFloat128 { |
567 | | sign: DyadicSign::Pos, |
568 | | exponent: -134, |
569 | | mantissa: 0x86957a75_e47e21f9_463c2023_663178df_u128, |
570 | | }, |
571 | | DyadicFloat128 { |
572 | | sign: DyadicSign::Pos, |
573 | | exponent: -138, |
574 | | mantissa: 0xfb8b2445_75ecdc3e_c35198bd_b9330775_u128, |
575 | | }, |
576 | | DyadicFloat128 { |
577 | | sign: DyadicSign::Neg, |
578 | | exponent: -137, |
579 | | mantissa: 0xa225e953_f312a24c_343e4abb_be73338f_u128, |
580 | | }, |
581 | | DyadicFloat128 { |
582 | | sign: DyadicSign::Pos, |
583 | | exponent: -139, |
584 | | mantissa: 0xc2618074_33a6c29e_a86a89e3_fcde0075_u128, |
585 | | }, |
586 | | DyadicFloat128 { |
587 | | sign: DyadicSign::Neg, |
588 | | exponent: -140, |
589 | | mantissa: 0x8bd07e41_d7a0f05b_be3657f8_126b9715_u128, |
590 | | }, |
591 | | DyadicFloat128 { |
592 | | sign: DyadicSign::Pos, |
593 | | exponent: -142, |
594 | | mantissa: 0xede33202_4724aa57_d04ae75d_31ad69cd_u128, |
595 | | }, |
596 | | DyadicFloat128 { |
597 | | sign: DyadicSign::Neg, |
598 | | exponent: -143, |
599 | | mantissa: 0xc2917c1f_251f1a85_62ac8d90_be4550ff_u128, |
600 | | }, |
601 | | DyadicFloat128 { |
602 | | sign: DyadicSign::Pos, |
603 | | exponent: -144, |
604 | | mantissa: 0xbbae0086_720a31a1_27725ba4_605e0479_u128, |
605 | | }, |
606 | | DyadicFloat128 { |
607 | | sign: DyadicSign::Pos, |
608 | | exponent: -151, |
609 | | mantissa: 0xe8ebe7a0_7cb4b852_80bc2ca8_63864ee0_u128, |
610 | | }, |
611 | | DyadicFloat128 { |
612 | | sign: DyadicSign::Pos, |
613 | | exponent: -144, |
614 | | mantissa: 0xd4e11361_0b896bf9_e347a4e4_6d642f8e_u128, |
615 | | }, |
616 | | DyadicFloat128 { |
617 | | sign: DyadicSign::Pos, |
618 | | exponent: -143, |
619 | | mantissa: 0xc791bc18_f63a577a_62afaf0b_002753e2_u128, |
620 | | }, |
621 | | DyadicFloat128 { |
622 | | sign: DyadicSign::Pos, |
623 | | exponent: -142, |
624 | | mantissa: 0xc75ff51f_5234f34d_8484c248_be6beef2_u128, |
625 | | }, |
626 | | DyadicFloat128 { |
627 | | sign: DyadicSign::Pos, |
628 | | exponent: -141, |
629 | | mantissa: 0xa3a0115c_865ed2bf_437188b0_f87883dc_u128, |
630 | | }, |
631 | | DyadicFloat128 { |
632 | | sign: DyadicSign::Pos, |
633 | | exponent: -141, |
634 | | mantissa: 0xe8a9ee02_628e0019_545db8d0_98d12eff_u128, |
635 | | }, |
636 | | DyadicFloat128 { |
637 | | sign: DyadicSign::Pos, |
638 | | exponent: -140, |
639 | | mantissa: 0x8cc522bc_65b652d1_d56ca41a_43537f6a_u128, |
640 | | }, |
641 | | DyadicFloat128 { |
642 | | sign: DyadicSign::Pos, |
643 | | exponent: -140, |
644 | | mantissa: 0x9097f5fb_e766e5b5_5196876c_6ea798ce_u128, |
645 | | }, |
646 | | DyadicFloat128 { |
647 | | sign: DyadicSign::Pos, |
648 | | exponent: -141, |
649 | | mantissa: 0xf98a6f76_0b824b1b_a04e4467_3ea487e1_u128, |
650 | | }, |
651 | | DyadicFloat128 { |
652 | | sign: DyadicSign::Pos, |
653 | | exponent: -141, |
654 | | mantissa: 0xb2be828b_4c84436d_df1e0964_d44f420f_u128, |
655 | | }, |
656 | | DyadicFloat128 { |
657 | | sign: DyadicSign::Pos, |
658 | | exponent: -142, |
659 | | mantissa: 0xd0d2116d_7e77b0bc_119fdfa8_0ee2dfee_u128, |
660 | | }, |
661 | | DyadicFloat128 { |
662 | | sign: DyadicSign::Pos, |
663 | | exponent: -143, |
664 | | mantissa: 0xc211e681_0f8ee764_67f63971_21f1a199_u128, |
665 | | }, |
666 | | DyadicFloat128 { |
667 | | sign: DyadicSign::Pos, |
668 | | exponent: -144, |
669 | | mantissa: 0x8a2e571b_d7f0d9e2_a0d7009d_70969fa2_u128, |
670 | | }, |
671 | | DyadicFloat128 { |
672 | | sign: DyadicSign::Pos, |
673 | | exponent: -146, |
674 | | mantissa: 0x8de3a1c0_7b6bdb5e_435d5749_cadf3edd_u128, |
675 | | }, |
676 | | DyadicFloat128 { |
677 | | sign: DyadicSign::Pos, |
678 | | exponent: -149, |
679 | | mantissa: 0xbb9a0fe0_866e3d3f_008db7b3_5029fe59_u128, |
680 | | }, |
681 | | DyadicFloat128 { |
682 | | sign: DyadicSign::Pos, |
683 | | exponent: -153, |
684 | | mantissa: 0xf1810c5e_0ff717a2_e1b71dfc_26babb9f_u128, |
685 | | }, |
686 | | ]; |
687 | 0 | let mut z = C[27]; |
688 | 0 | for i in (0..27).rev() { |
689 | 0 | z = r * z + C[i]; |
690 | 0 | } |
691 | 0 | z.fast_as_f64() |
692 | 0 | } |
693 | | |
694 | | /// This method on small range searches for nearest zero or extremum. |
695 | | /// Then picks stored series expansion at the point end evaluates the poly at the point. |
696 | | #[inline] |
697 | 0 | pub(crate) fn y0_small_argument_fast(x: f64) -> f64 { |
698 | 0 | let x_abs = x; |
699 | | |
700 | | // let avg_step = 74.607799 / 47.0; |
701 | | // let inv_step = 1.0 / avg_step; |
702 | | |
703 | | const INV_STEP: f64 = 0.6299609508652038; |
704 | | |
705 | 0 | let fx = x_abs * INV_STEP; |
706 | | const Y0_ZEROS_COUNT: f64 = (Y0_ZEROS.len() - 1) as f64; |
707 | 0 | let idx0 = unsafe { fx.min(Y0_ZEROS_COUNT).to_int_unchecked::<usize>() }; |
708 | 0 | let idx1 = unsafe { |
709 | 0 | fx.cpu_ceil() |
710 | 0 | .min(Y0_ZEROS_COUNT) |
711 | 0 | .to_int_unchecked::<usize>() |
712 | | }; |
713 | | |
714 | 0 | let found_zero0 = DoubleDouble::from_bit_pair(Y0_ZEROS[idx0]); |
715 | 0 | let found_zero1 = DoubleDouble::from_bit_pair(Y0_ZEROS[idx1]); |
716 | | |
717 | 0 | let dist0 = (found_zero0.hi - x_abs).abs(); |
718 | 0 | let dist1 = (found_zero1.hi - x_abs).abs(); |
719 | | |
720 | 0 | let (found_zero, idx, dist) = if dist0 < dist1 { |
721 | 0 | (found_zero0, idx0, dist0) |
722 | | } else { |
723 | 0 | (found_zero1, idx1, dist1) |
724 | | }; |
725 | | |
726 | 0 | if idx == 0 { |
727 | 0 | return j0_maclaurin_series(x); |
728 | 0 | } |
729 | | |
730 | 0 | let is_too_close_to_zero = dist.abs() < 1e-3; |
731 | | |
732 | 0 | let c = if is_too_close_to_zero { |
733 | 0 | &Y0_COEFFS_TAYLOR[idx - 1] |
734 | | } else { |
735 | 0 | &Y0_COEFFS[idx - 1] |
736 | | }; |
737 | | |
738 | 0 | let r = DoubleDouble::full_add_f64(-found_zero, x); |
739 | | |
740 | | // We hit exact zero, value, better to return it directly |
741 | 0 | if dist == 0. { |
742 | 0 | return f64::from_bits(Y0_ZEROS_VALUES[idx]); |
743 | 0 | } |
744 | | |
745 | 0 | let p = f_polyeval22( |
746 | 0 | r.hi, |
747 | 0 | f64::from_bits(c[6].1), |
748 | 0 | f64::from_bits(c[7].1), |
749 | 0 | f64::from_bits(c[8].1), |
750 | 0 | f64::from_bits(c[9].1), |
751 | 0 | f64::from_bits(c[10].1), |
752 | 0 | f64::from_bits(c[11].1), |
753 | 0 | f64::from_bits(c[12].1), |
754 | 0 | f64::from_bits(c[13].1), |
755 | 0 | f64::from_bits(c[14].1), |
756 | 0 | f64::from_bits(c[15].1), |
757 | 0 | f64::from_bits(c[16].1), |
758 | 0 | f64::from_bits(c[17].1), |
759 | 0 | f64::from_bits(c[18].1), |
760 | 0 | f64::from_bits(c[19].1), |
761 | 0 | f64::from_bits(c[20].1), |
762 | 0 | f64::from_bits(c[21].1), |
763 | 0 | f64::from_bits(c[22].1), |
764 | 0 | f64::from_bits(c[23].1), |
765 | 0 | f64::from_bits(c[24].1), |
766 | 0 | f64::from_bits(c[25].1), |
767 | 0 | f64::from_bits(c[26].1), |
768 | 0 | f64::from_bits(c[27].1), |
769 | | ); |
770 | | |
771 | 0 | let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[5])); |
772 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[4])); |
773 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3])); |
774 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2])); |
775 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1])); |
776 | 0 | z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0])); |
777 | 0 | let p = z; |
778 | 0 | let err = f_fmla( |
779 | 0 | p.hi, |
780 | 0 | f64::from_bits(0x3c60000000000000), // 2^-57 |
781 | 0 | f64::from_bits(0x3c20000000000000), // 2^-61 |
782 | | ); |
783 | 0 | let ub = p.hi + (p.lo + err); |
784 | 0 | let lb = p.hi + (p.lo - err); |
785 | 0 | if ub != lb { |
786 | 0 | return y0_small_argument_moderate(r, c); |
787 | 0 | } |
788 | 0 | z.to_f64() |
789 | 0 | } |
790 | | |
791 | | /// This method on small range searches for nearest zero or extremum. |
792 | | /// Then picks stored series expansion at the point end evaluates the poly at the point. |
793 | 0 | fn y0_small_argument_moderate(r: DoubleDouble, c: &[(u64, u64); 28]) -> f64 { |
794 | 0 | let c0 = &c[15..]; |
795 | | |
796 | 0 | let p0 = f_polyeval13( |
797 | 0 | r.to_f64(), |
798 | 0 | f64::from_bits(c0[0].1), |
799 | 0 | f64::from_bits(c0[1].1), |
800 | 0 | f64::from_bits(c0[2].1), |
801 | 0 | f64::from_bits(c0[3].1), |
802 | 0 | f64::from_bits(c0[4].1), |
803 | 0 | f64::from_bits(c0[5].1), |
804 | 0 | f64::from_bits(c0[6].1), |
805 | 0 | f64::from_bits(c0[7].1), |
806 | 0 | f64::from_bits(c0[8].1), |
807 | 0 | f64::from_bits(c0[9].1), |
808 | 0 | f64::from_bits(c0[10].1), |
809 | 0 | f64::from_bits(c0[11].1), |
810 | 0 | f64::from_bits(c0[12].1), |
811 | | ); |
812 | | |
813 | 0 | let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14])); |
814 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13])); |
815 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12])); |
816 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11])); |
817 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10])); |
818 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9])); |
819 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8])); |
820 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7])); |
821 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6])); |
822 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5])); |
823 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4])); |
824 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3])); |
825 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2])); |
826 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1])); |
827 | 0 | p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0])); |
828 | | |
829 | 0 | let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo); |
830 | 0 | let err = f_fmla( |
831 | 0 | p.hi, |
832 | 0 | f64::from_bits(0x3c30000000000000), // 2^-60 |
833 | 0 | f64::from_bits(0x3a30000000000000), // 2^-91 |
834 | | ); |
835 | 0 | let ub = p.hi + (p.lo + err); |
836 | 0 | let lb = p.hi + (p.lo - err); |
837 | 0 | if ub != lb { |
838 | 0 | return y0_small_argument_hard(r, c); |
839 | 0 | } |
840 | 0 | p.to_f64() |
841 | 0 | } |
842 | | |
843 | | #[cold] |
844 | | #[inline(never)] |
845 | 0 | fn y0_small_argument_hard(r: DoubleDouble, c: &[(u64, u64); 28]) -> f64 { |
846 | 0 | let mut p = DoubleDouble::from_bit_pair(c[27]); |
847 | 0 | for i in (0..27).rev() { |
848 | 0 | p = DoubleDouble::mul_add(r, p, DoubleDouble::from_bit_pair(c[i])); |
849 | 0 | p = DoubleDouble::from_exact_add(p.hi, p.lo); |
850 | 0 | } |
851 | 0 | p.to_f64() |
852 | 0 | } |
853 | | |
854 | | /* |
855 | | Evaluates: |
856 | | Y0 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x)) |
857 | | */ |
858 | | #[inline] |
859 | 0 | pub(crate) fn y0_asympt_fast(x: f64) -> f64 { |
860 | | const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new( |
861 | | f64::from_bits(0xbc8cbc0d30ebfd15), |
862 | | f64::from_bits(0x3fe9884533d43651), |
863 | | ); |
864 | | const MPI_OVER_4: DoubleDouble = DoubleDouble::new( |
865 | | f64::from_bits(0xbc81a62633145c07), |
866 | | f64::from_bits(0xbfe921fb54442d18), |
867 | | ); |
868 | | |
869 | 0 | let recip = if x.to_bits() > 0x7fd000000000000u64 { |
870 | 0 | DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_safe_div(4.0, x), 0.25) |
871 | | } else { |
872 | 0 | DoubleDouble::from_recip(x) |
873 | | }; |
874 | | |
875 | 0 | let alpha = bessel_0_asympt_alpha_fast(recip); |
876 | 0 | let beta = bessel_0_asympt_beta_fast(recip); |
877 | | |
878 | 0 | let AngleReduced { angle } = rem2pi_any(x); |
879 | | |
880 | | // Without full subtraction cancellation happens sometimes |
881 | 0 | let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha); |
882 | 0 | let r0 = DoubleDouble::full_dd_add(angle, x0pi34); |
883 | | |
884 | 0 | let m_cos = sin_dd_small_fast(r0); |
885 | 0 | let z0 = DoubleDouble::quick_mult(beta, m_cos); |
886 | 0 | let r_sqrt = DoubleDouble::from_rsqrt_fast(x); |
887 | 0 | let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt); |
888 | 0 | let r = DoubleDouble::quick_mult(scale, z0); |
889 | 0 | let p = DoubleDouble::from_exact_add(r.hi, r.lo); |
890 | 0 | let err = f_fmla( |
891 | 0 | p.hi, |
892 | 0 | f64::from_bits(0x3c40000000000000), // 2^-59 |
893 | 0 | f64::from_bits(0x3c10000000000000), // 2^-62 |
894 | | ); |
895 | 0 | let ub = p.hi + (p.lo + err); |
896 | 0 | let lb = p.hi + (p.lo - err); |
897 | | |
898 | 0 | if ub == lb { |
899 | 0 | return p.to_f64(); |
900 | 0 | } |
901 | 0 | y0_asympt(x, recip, r_sqrt, angle) |
902 | 0 | } |
903 | | |
904 | | /* |
905 | | Evaluates: |
906 | | Y0 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x)) |
907 | | */ |
908 | 0 | fn y0_asympt(x: f64, recip: DoubleDouble, r_sqrt: DoubleDouble, angle: DoubleDouble) -> f64 { |
909 | | const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new( |
910 | | f64::from_bits(0xbc8cbc0d30ebfd15), |
911 | | f64::from_bits(0x3fe9884533d43651), |
912 | | ); |
913 | | const MPI_OVER_4: DoubleDouble = DoubleDouble::new( |
914 | | f64::from_bits(0xbc81a62633145c07), |
915 | | f64::from_bits(0xbfe921fb54442d18), |
916 | | ); |
917 | | |
918 | 0 | let alpha = bessel_0_asympt_alpha(recip); |
919 | 0 | let beta = bessel_0_asympt_beta(recip); |
920 | | |
921 | | // Without full subtraction cancellation happens sometimes |
922 | 0 | let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha); |
923 | 0 | let r0 = DoubleDouble::full_dd_add(angle, x0pi34); |
924 | | |
925 | 0 | let m_cos = sin_dd_small(r0); |
926 | 0 | let z0 = DoubleDouble::quick_mult(beta, m_cos); |
927 | 0 | let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt); |
928 | 0 | let r = DoubleDouble::quick_mult(scale, z0); |
929 | 0 | let p = DoubleDouble::from_exact_add(r.hi, r.lo); |
930 | 0 | let err = f_fmla( |
931 | 0 | p.hi, |
932 | 0 | f64::from_bits(0x3c30000000000000), // 2^-60 |
933 | 0 | f64::from_bits(0x3a20000000000000), // 2^-93 |
934 | | ); |
935 | 0 | let ub = p.hi + (p.lo + err); |
936 | 0 | let lb = p.hi + (p.lo - err); |
937 | | |
938 | 0 | if ub == lb { |
939 | 0 | return p.to_f64(); |
940 | 0 | } |
941 | 0 | y0_asympt_hard(x) |
942 | 0 | } |
943 | | |
944 | | /* |
945 | | Evaluates: |
946 | | Y0 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x)) |
947 | | */ |
948 | | #[cold] |
949 | | #[inline(never)] |
950 | 0 | fn y0_asympt_hard(x: f64) -> f64 { |
951 | | const SQRT_2_OVER_PI: DyadicFloat128 = DyadicFloat128 { |
952 | | sign: DyadicSign::Pos, |
953 | | exponent: -128, |
954 | | mantissa: 0xcc42299e_a1b28468_7e59e280_5d5c7180_u128, |
955 | | }; |
956 | | |
957 | | const MPI_OVER_4: DyadicFloat128 = DyadicFloat128 { |
958 | | sign: DyadicSign::Neg, |
959 | | exponent: -128, |
960 | | mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128, |
961 | | }; |
962 | | |
963 | 0 | let x_dyadic = DyadicFloat128::new_from_f64(x); |
964 | 0 | let recip = DyadicFloat128::accurate_reciprocal(x); |
965 | | |
966 | 0 | let alpha = bessel_0_asympt_alpha_hard(recip); |
967 | 0 | let beta = bessel_0_asympt_beta_hard(recip); |
968 | | |
969 | 0 | let angle = rem2pi_f128(x_dyadic); |
970 | | |
971 | 0 | let x0pi34 = MPI_OVER_4 - alpha; |
972 | 0 | let r0 = angle + x0pi34; |
973 | | |
974 | 0 | let m_sin = sin_f128_small(r0); |
975 | | |
976 | 0 | let z0 = beta * m_sin; |
977 | 0 | let r_sqrt = bessel_rsqrt_hard(x, recip); |
978 | 0 | let scale = SQRT_2_OVER_PI * r_sqrt; |
979 | 0 | let p = scale * z0; |
980 | 0 | p.fast_as_f64() |
981 | 0 | } |
982 | | |
983 | | #[cfg(test)] |
984 | | mod tests { |
985 | | use super::*; |
986 | | |
987 | | #[test] |
988 | | fn test_y0() { |
989 | | //ULP should be less than 0.5, but it was 1017.1969361449036, on 3 result 0.37685001001284685, using f_y0 and MPFR 0.3768500100127904 |
990 | | assert_eq!(f_y0(3.), 0.3768500100127904); |
991 | | assert_eq!(f_y0(0.906009703874588), 0.01085796448629276); |
992 | | assert_eq!(f_y0(80.), -0.05562033908977); |
993 | | assert_eq!(f_y0(5.), -0.30851762524903376); |
994 | | assert_eq!( |
995 | | f_y0(f64::from_bits(0x3fec982eb8d417ea)), |
996 | | -0.000000000000000023389279284062102 |
997 | | ); |
998 | | assert!(f_y0(f64::NAN).is_nan()); |
999 | | assert_eq!(f_y0(f64::INFINITY), 0.); |
1000 | | assert!(f_y0(f64::NEG_INFINITY).is_nan()); |
1001 | | } |
1002 | | |
1003 | | #[test] |
1004 | | fn test_y0_edge_values() { |
1005 | | assert_eq!(f_y0(0.8900000000138676), -0.0031519646708080126); |
1006 | | assert_eq!(f_y0(0.8900000000409116), -0.0031519646469294936); |
1007 | | assert_eq!(f_y0(98.1760435789366), 0.0000000000000056889416242533015); |
1008 | | assert_eq!( |
1009 | | f_y0(91.8929453121571802176), |
1010 | | -0.00000000000000007281665706677893 |
1011 | | ); |
1012 | | assert_eq!( |
1013 | | f_y0(f64::from_bits(0x6e7c1d741dc52512u64)), |
1014 | | f64::from_bits(0x2696f860815bc669) |
1015 | | ); |
1016 | | assert_eq!(f_y0(f64::from_bits(0x3e04cdee58a47edd)), -13.58605001628649); |
1017 | | assert_eq!( |
1018 | | f_y0(0.89357696627916749), |
1019 | | -0.000000000000000023389279284062102 |
1020 | | ); |
1021 | | } |
1022 | | } |