/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.28/src/tangent/atanpi.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::{dd_fmla, dyad_fmla, f_fmla}; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::shared_eval::poly_dd_3; |
32 | | use crate::tangent::atan::{ATAN_CIRCLE, ATAN_REDUCE}; |
33 | | |
34 | | const ONE_OVER_PI_DD: DoubleDouble = DoubleDouble::new( |
35 | | f64::from_bits(0xbc76b01ec5417056), |
36 | | f64::from_bits(0x3fd45f306dc9c883), |
37 | | ); |
38 | | |
39 | | const ONE_OVER_3PI: f64 = f64::from_bits(0x3fbb2995e7b7b604); // approximates 1/(3pi) |
40 | | |
41 | 0 | fn atanpi_small(x: f64) -> f64 { |
42 | 0 | if x == 0. { |
43 | 0 | return x; |
44 | 0 | } |
45 | 0 | if x.abs() == f64::from_bits(0x0015cba89af1f855) { |
46 | 0 | return if x > 0. { |
47 | 0 | f_fmla( |
48 | 0 | f64::from_bits(0x9a70000000000000), |
49 | 0 | f64::from_bits(0x1a70000000000000), |
50 | 0 | f64::from_bits(0x0006f00f7cd3a40b), |
51 | | ) |
52 | | } else { |
53 | 0 | f_fmla( |
54 | 0 | f64::from_bits(0x1a70000000000000), |
55 | 0 | f64::from_bits(0x1a70000000000000), |
56 | 0 | f64::from_bits(0x8006f00f7cd3a40b), |
57 | | ) |
58 | | }; |
59 | 0 | } |
60 | | // generic worst case |
61 | 0 | let mut v = x.to_bits(); |
62 | 0 | if (v & 0xfffffffffffff) == 0x59af9a1194efe |
63 | | // +/-0x1.59af9a1194efe*2^e |
64 | | { |
65 | 0 | let e = v >> 52; |
66 | 0 | if (e & 0x7ff) > 2 { |
67 | 0 | v = ((e - 2) << 52) | 0xb824198b94a89; |
68 | 0 | return if x > 0. { |
69 | 0 | dd_fmla( |
70 | 0 | f64::from_bits(0x9a70000000000000), |
71 | 0 | f64::from_bits(0x1a70000000000000), |
72 | 0 | f64::from_bits(v), |
73 | | ) |
74 | | } else { |
75 | 0 | dd_fmla( |
76 | 0 | f64::from_bits(0x1a70000000000000), |
77 | 0 | f64::from_bits(0x1a70000000000000), |
78 | 0 | f64::from_bits(v), |
79 | | ) |
80 | | }; |
81 | 0 | } |
82 | 0 | } |
83 | 0 | let h = x * ONE_OVER_PI_DD.hi; |
84 | | /* Assuming h = x*ONE_OVER_PI_DD.hi - e, the correction term is |
85 | | e + x * ONE_OVER_PIL, but we need to scale values to avoid underflow. */ |
86 | 0 | let mut corr = dd_fmla( |
87 | 0 | x * f64::from_bits(0x4690000000000000), |
88 | 0 | ONE_OVER_PI_DD.hi, |
89 | 0 | -h * f64::from_bits(0x4690000000000000), |
90 | | ); |
91 | 0 | corr = dd_fmla( |
92 | 0 | x * f64::from_bits(0x4690000000000000), |
93 | 0 | ONE_OVER_PI_DD.lo, |
94 | 0 | corr, |
95 | 0 | ); |
96 | | // now return h + corr * 2^-106 |
97 | | |
98 | 0 | dyad_fmla(corr, f64::from_bits(0x3950000000000000), h) |
99 | 0 | } |
100 | | |
101 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
102 | | #[target_feature(enable = "avx", enable = "fma")] |
103 | 0 | unsafe fn atanpi_small_fma(x: f64) -> f64 { |
104 | 0 | if x == 0. { |
105 | 0 | return x; |
106 | 0 | } |
107 | 0 | if x.abs() == f64::from_bits(0x0015cba89af1f855) { |
108 | 0 | return if x > 0. { |
109 | 0 | f64::mul_add( |
110 | 0 | f64::from_bits(0x9a70000000000000), |
111 | 0 | f64::from_bits(0x1a70000000000000), |
112 | 0 | f64::from_bits(0x0006f00f7cd3a40b), |
113 | | ) |
114 | | } else { |
115 | 0 | f64::mul_add( |
116 | 0 | f64::from_bits(0x1a70000000000000), |
117 | 0 | f64::from_bits(0x1a70000000000000), |
118 | 0 | f64::from_bits(0x8006f00f7cd3a40b), |
119 | | ) |
120 | | }; |
121 | 0 | } |
122 | | // generic worst case |
123 | 0 | let mut v = x.to_bits(); |
124 | 0 | if (v & 0xfffffffffffff) == 0x59af9a1194efe |
125 | | // +/-0x1.59af9a1194efe*2^e |
126 | | { |
127 | 0 | let e = v >> 52; |
128 | 0 | if (e & 0x7ff) > 2 { |
129 | 0 | v = ((e - 2) << 52) | 0xb824198b94a89; |
130 | 0 | return if x > 0. { |
131 | 0 | f64::mul_add( |
132 | 0 | f64::from_bits(0x9a70000000000000), |
133 | 0 | f64::from_bits(0x1a70000000000000), |
134 | 0 | f64::from_bits(v), |
135 | | ) |
136 | | } else { |
137 | 0 | f64::mul_add( |
138 | 0 | f64::from_bits(0x1a70000000000000), |
139 | 0 | f64::from_bits(0x1a70000000000000), |
140 | 0 | f64::from_bits(v), |
141 | | ) |
142 | | }; |
143 | 0 | } |
144 | 0 | } |
145 | 0 | let h = x * ONE_OVER_PI_DD.hi; |
146 | | /* Assuming h = x*ONE_OVER_PI_DD.hi - e, the correction term is |
147 | | e + x * ONE_OVER_PIL, but we need to scale values to avoid underflow. */ |
148 | 0 | let mut corr = f64::mul_add( |
149 | 0 | x * f64::from_bits(0x4690000000000000), |
150 | 0 | ONE_OVER_PI_DD.hi, |
151 | 0 | -h * f64::from_bits(0x4690000000000000), |
152 | | ); |
153 | 0 | corr = f64::mul_add( |
154 | 0 | x * f64::from_bits(0x4690000000000000), |
155 | 0 | ONE_OVER_PI_DD.lo, |
156 | 0 | corr, |
157 | 0 | ); |
158 | | // now return h + corr * 2^-106 |
159 | | |
160 | 0 | f64::mul_add(corr, f64::from_bits(0x3950000000000000), h) |
161 | 0 | } |
162 | | |
163 | | /* Deal with the case where |x| is large: |
164 | | for x > 0, atanpi(x) = 1/2 - 1/pi * 1/x + 1/(3pi) * 1/x^3 + O(1/x^5) |
165 | | for x < 0, atanpi(x) = -1/2 - 1/pi * 1/x + 1/(3pi) * 1/x^3 + O(1/x^5). |
166 | | The next term 1/5*x^5/pi is smaller than 2^-107 * atanpi(x) |
167 | | when |x| > 0x1.bep20. */ |
168 | 0 | fn atanpi_asympt(x: f64) -> f64 { |
169 | 0 | let h = f64::copysign(0.5, x); |
170 | | // approximate 1/x as yh + yl |
171 | 0 | let rcy = DoubleDouble::from_quick_recip(x); |
172 | 0 | let mut m = DoubleDouble::quick_mult(rcy, ONE_OVER_PI_DD); |
173 | | // m + l ~ 1/pi * 1/x |
174 | 0 | m.hi = -m.hi; |
175 | 0 | m.lo = dd_fmla(ONE_OVER_3PI * rcy.hi, rcy.hi * rcy.hi, -m.lo); |
176 | | // m + l ~ - 1/pi * 1/x + 1/(3pi) * 1/x^3 |
177 | 0 | let vh = DoubleDouble::from_exact_add(h, m.hi); |
178 | 0 | m.hi = vh.hi; |
179 | 0 | m = DoubleDouble::from_exact_add(vh.lo, m.lo); |
180 | 0 | if m.hi.abs() == f64::from_bits(0x3c80000000000000) { |
181 | | // this is 1/2 ulp(atan(x)) |
182 | 0 | m.hi = if m.hi * m.lo > 0. { |
183 | 0 | f64::copysign(f64::from_bits(0x3c80000000000001), m.hi) |
184 | | } else { |
185 | 0 | f64::copysign(f64::from_bits(0x3c7fffffffffffff), m.hi) |
186 | | }; |
187 | 0 | } |
188 | 0 | vh.hi + m.hi |
189 | 0 | } |
190 | | |
191 | | #[inline] |
192 | 0 | fn atanpi_tiny(x: f64) -> f64 { |
193 | 0 | let mut dx = DoubleDouble::quick_mult_f64(ONE_OVER_PI_DD, x); |
194 | 0 | dx.lo = dd_fmla(-ONE_OVER_3PI * x, x * x, dx.lo); |
195 | 0 | dx.to_f64() |
196 | 0 | } |
197 | | |
198 | | #[cold] |
199 | 0 | fn as_atanpi_refine2(x: f64, a: f64) -> f64 { |
200 | 0 | if x.abs() > f64::from_bits(0x413be00000000000) { |
201 | 0 | return atanpi_asympt(x); |
202 | 0 | } |
203 | 0 | if x.abs() < f64::from_bits(0x3e4c700000000000) { |
204 | 0 | return atanpi_tiny(x); |
205 | 0 | } |
206 | | const CH: [(u64, u64); 3] = [ |
207 | | (0xbfd5555555555555, 0xbc75555555555555), |
208 | | (0x3fc999999999999a, 0xbc6999999999bcb8), |
209 | | (0xbfc2492492492492, 0xbc6249242093c016), |
210 | | ]; |
211 | | const CL: [u64; 4] = [ |
212 | | 0x3fbc71c71c71c71c, |
213 | | 0xbfb745d1745d1265, |
214 | | 0x3fb3b13b115bcbc4, |
215 | | 0xbfb1107c41ad3253, |
216 | | ]; |
217 | 0 | let phi = ((a.abs()) * f64::from_bits(0x40545f306dc9c883) + 256.5).to_bits(); |
218 | 0 | let i: i64 = ((phi >> (52 - 8)) & 0xff) as i64; |
219 | 0 | let dh: DoubleDouble = if i == 128 { |
220 | 0 | DoubleDouble::from_quick_recip(-x) |
221 | | } else { |
222 | 0 | let ta = f64::copysign(f64::from_bits(ATAN_REDUCE[i as usize].0), x); |
223 | 0 | let dzta = DoubleDouble::from_exact_mult(x, ta); |
224 | 0 | let zmta = x - ta; |
225 | 0 | let v = 1. + dzta.hi; |
226 | 0 | let d = 1. - v; |
227 | 0 | let ev = (d + dzta.hi) - ((d + v) - 1.) + dzta.lo; |
228 | 0 | let r = 1.0 / v; |
229 | 0 | let rl = f_fmla(-ev, r, dd_fmla(r, -v, 1.0)) * r; |
230 | 0 | DoubleDouble::quick_mult_f64(DoubleDouble::new(rl, r), zmta) |
231 | | }; |
232 | 0 | let d2 = DoubleDouble::quick_mult(dh, dh); |
233 | 0 | let h4 = d2.hi * d2.hi; |
234 | 0 | let h3 = DoubleDouble::quick_mult(dh, d2); |
235 | | |
236 | 0 | let fl0 = f_fmla(d2.hi, f64::from_bits(CL[1]), f64::from_bits(CL[0])); |
237 | 0 | let fl1 = f_fmla(d2.hi, f64::from_bits(CL[3]), f64::from_bits(CL[2])); |
238 | | |
239 | 0 | let fl = d2.hi * f_fmla(h4, fl1, fl0); |
240 | 0 | let mut f = poly_dd_3(d2, CH, fl); |
241 | 0 | f = DoubleDouble::quick_mult(h3, f); |
242 | | let (ah, mut al, mut at); |
243 | 0 | if i == 0 { |
244 | 0 | ah = dh.hi; |
245 | 0 | al = f.hi; |
246 | 0 | at = f.lo; |
247 | 0 | } else { |
248 | 0 | let mut df = 0.; |
249 | 0 | if i < 128 { |
250 | 0 | df = f64::copysign(1.0, x) * f64::from_bits(ATAN_REDUCE[i as usize].1); |
251 | 0 | } |
252 | 0 | let id = f64::copysign(i as f64, x); |
253 | 0 | ah = f64::from_bits(0x3f8921fb54442d00) * id; |
254 | 0 | al = f64::from_bits(0x3c88469898cc5180) * id; |
255 | 0 | at = f64::from_bits(0xb97fc8f8cbb5bf80) * id; |
256 | 0 | let v0 = DoubleDouble::add(DoubleDouble::new(at, al), DoubleDouble::new(0., df)); |
257 | 0 | let v1 = DoubleDouble::add(v0, dh); |
258 | 0 | let v2 = DoubleDouble::add(v1, f); |
259 | 0 | al = v2.hi; |
260 | 0 | at = v2.lo; |
261 | | } |
262 | | |
263 | 0 | let v2 = DoubleDouble::from_exact_add(ah, al); |
264 | 0 | let v1 = DoubleDouble::from_exact_add(v2.lo, at); |
265 | | |
266 | 0 | let z0 = DoubleDouble::quick_mult(DoubleDouble::new(v1.hi, v2.hi), ONE_OVER_PI_DD); |
267 | | // atanpi_end |
268 | 0 | z0.to_f64() |
269 | 0 | } |
270 | | |
271 | | #[inline(always)] |
272 | | /// fma - fma |
273 | | /// dd_fma - mandatory fma fallback |
274 | 0 | fn atanpi_gen_impl< |
275 | 0 | Q: Fn(f64, f64, f64) -> f64, |
276 | 0 | F: Fn(f64, f64, f64) -> f64, |
277 | 0 | DdMul: Fn(DoubleDouble, DoubleDouble) -> DoubleDouble, |
278 | 0 | AtanpiSmall: Fn(f64) -> f64, |
279 | 0 | >( |
280 | 0 | x: f64, |
281 | 0 | fma: Q, |
282 | 0 | dd_fma: F, |
283 | 0 | dd_mul: DdMul, |
284 | 0 | atanpi_small: AtanpiSmall, |
285 | 0 | ) -> f64 { |
286 | | const CH: [u64; 4] = [ |
287 | | 0x3ff0000000000000, |
288 | | 0xbfd555555555552b, |
289 | | 0x3fc9999999069c20, |
290 | | 0xbfc248d2c8444ac6, |
291 | | ]; |
292 | 0 | let t = x.to_bits(); |
293 | 0 | let at: u64 = t & 0x7fff_ffff_ffff_ffff; |
294 | 0 | let mut i = (at >> 51).wrapping_sub(2030u64); |
295 | 0 | if at < 0x3f7b21c475e6362au64 { |
296 | | // |x| < 0.006624 |
297 | 0 | if at < 0x3c90000000000000u64 { |
298 | | // |x| < 2^-54 |
299 | 0 | return atanpi_small(x); |
300 | 0 | } |
301 | 0 | if x == 0. { |
302 | 0 | return x; |
303 | 0 | } |
304 | | const CH2: [u64; 4] = [ |
305 | | 0xbfd5555555555555, |
306 | | 0x3fc99999999998c1, |
307 | | 0xbfc249249176aec0, |
308 | | 0x3fbc711fd121ae80, |
309 | | ]; |
310 | 0 | let x2 = x * x; |
311 | 0 | let x3 = x * x2; |
312 | 0 | let x4 = x2 * x2; |
313 | | |
314 | 0 | let f0 = fma(x2, f64::from_bits(CH2[3]), f64::from_bits(CH2[2])); |
315 | 0 | let f1 = fma(x2, f64::from_bits(CH2[1]), f64::from_bits(CH2[0])); |
316 | | |
317 | 0 | let f = x3 * dd_fma(x4, f0, f1); |
318 | | // begin_atanpi |
319 | | /* Here x+f approximates atan(x), with absolute error bounded by |
320 | | 0x4.8p-52*f (see atan.c). After multiplying by 1/pi this error |
321 | | will be bounded by 0x1.6fp-52*f. For |x| < 0x1.b21c475e6362ap-8 |
322 | | we have |f| < 2^-16*|x|, thus the error is bounded by |
323 | | 0x1.6fp-52*2^-16*|x| < 0x1.6fp-68. */ |
324 | | // multiply x + f by 1/pi |
325 | 0 | let hy = dd_mul(DoubleDouble::new(f, x), ONE_OVER_PI_DD); |
326 | | /* The rounding error in muldd and the approximation error between |
327 | | 1/pi and ONE_OVER_PIH + ONE_OVER_PIL are covered by the difference |
328 | | between 0x4.8p-52*pi and 0x1.6fp-52, which is > 2^-61.8. */ |
329 | 0 | let mut ub = hy.hi + dd_fma(f64::from_bits(0x3bb6f00000000000), x, hy.lo); |
330 | 0 | let lb = hy.hi + dd_fma(f64::from_bits(0xbbb6f00000000000), x, hy.lo); |
331 | 0 | if ub == lb { |
332 | 0 | return ub; |
333 | 0 | } |
334 | | // end_atanpi |
335 | 0 | ub = (f + f * f64::from_bits(0x3cd2000000000000)) + x; // atanpi_specific, original value in atan |
336 | 0 | return as_atanpi_refine2(x, ub); |
337 | 0 | } |
338 | | // now |x| >= 0x1.b21c475e6362ap-8 |
339 | | let h; |
340 | | let mut a: DoubleDouble; |
341 | 0 | if at > 0x4062ded8e34a9035u64 { |
342 | | // |x| > 0x1.2ded8e34a9035p+7, atanpi|x| > 0.49789 |
343 | 0 | if at >= 0x43445f306dc9c883u64 { |
344 | | // |x| >= 0x1.45f306dc9c883p+53, atanpi|x| > 0.5 - 0x1p-55 |
345 | 0 | if at >= (0x7ffu64 << 52) { |
346 | | // case Inf or NaN |
347 | 0 | if at == 0x7ffu64 << 52 { |
348 | | // Inf |
349 | 0 | return f64::copysign(0.5, x); |
350 | 0 | } // atanpi_specific |
351 | 0 | return x + x; // NaN |
352 | 0 | } |
353 | 0 | return f64::copysign(0.5, x) - f64::copysign(f64::from_bits(0x3c70000000000000), x); |
354 | 0 | } |
355 | 0 | h = -1.0 / x; |
356 | 0 | a = DoubleDouble::new( |
357 | 0 | f64::copysign(f64::from_bits(0x3c91a62633145c07), x), |
358 | 0 | f64::copysign(f64::from_bits(0x3ff921fb54442d18), x), |
359 | 0 | ); |
360 | | } else { |
361 | | // 0x1.b21c475e6362ap-8 <= |x| <= 0x1.2ded8e34a9035p+7 |
362 | | /* we need to deal with |x| = 1 separately since in this case |
363 | | h=0 below, and the error is measured in terms of multiple of h */ |
364 | 0 | if at == 0x3ff0000000000000 { |
365 | | // |x| = 1 |
366 | 0 | return f64::copysign(f64::from_bits(0x3fd0000000000000), x); |
367 | 0 | } |
368 | 0 | let u: u64 = t & 0x0007ffffffffffff; |
369 | 0 | let ut = u >> (51 - 16); |
370 | 0 | let ut2 = (ut * ut) >> 16; |
371 | 0 | let vc = ATAN_CIRCLE[i as usize]; |
372 | 0 | i = (((vc[0] as u64).wrapping_shl(16)) + ut * (vc[1] as u64) - ut2 * (vc[2] as u64)) |
373 | 0 | >> (16 + 9); |
374 | 0 | let va = ATAN_REDUCE[i as usize]; |
375 | 0 | let ta = f64::copysign(1.0, x) * f64::from_bits(va.0); |
376 | 0 | let id = f64::copysign(1.0, x) * i as f64; |
377 | 0 | h = (x - ta) / fma(x, ta, 1.); |
378 | 0 | a = DoubleDouble::new( |
379 | 0 | fma( |
380 | 0 | f64::copysign(1.0, x), |
381 | 0 | f64::from_bits(va.1), |
382 | 0 | f64::from_bits(0x3c88469898cc5170) * id, |
383 | 0 | ), |
384 | 0 | f64::from_bits(0x3f8921fb54442d00) * id, |
385 | 0 | ); |
386 | | } |
387 | 0 | let h2 = h * h; |
388 | 0 | let h4 = h2 * h2; |
389 | | |
390 | 0 | let f0 = fma(h2, f64::from_bits(CH[3]), f64::from_bits(CH[2])); |
391 | 0 | let f1 = fma(h2, f64::from_bits(CH[1]), f64::from_bits(CH[0])); |
392 | | |
393 | 0 | let f = fma(h4, f0, f1); |
394 | 0 | a.lo = dd_fma(h, f, a.lo); |
395 | | // begin_atanpi |
396 | | /* Now ah + al approximates atan(x) with error bounded by 0x3.fp-52*h |
397 | | (see atan.c), thus by 0x1.41p-52*h after multiplication by 1/pi. |
398 | | We normalize ah+al so that the rounding error in muldd is negligible |
399 | | below. */ |
400 | 0 | let e0 = h * f64::from_bits(0x3ccf800000000000); // original value in atan.c |
401 | 0 | let ub0 = (a.lo + e0) + a.hi; // original value in atan.c |
402 | 0 | a = DoubleDouble::from_exact_add(a.hi, a.lo); |
403 | 0 | a = dd_mul(a, ONE_OVER_PI_DD); |
404 | | /* The rounding error in muldd() and the approximation error between 1/pi |
405 | | and ONE_OVER_PIH+ONE_OVER_PIL are absorbed when rounding up 0x3.fp-52*pi |
406 | | to 0x1.41p-52. */ |
407 | 0 | let e = h * f64::from_bits(0x3cb4100000000000); // atanpi_specific |
408 | | // end_atanpi |
409 | 0 | let ub = (a.lo + e) + a.hi; |
410 | 0 | let lb = (a.lo - e) + a.hi; |
411 | 0 | if ub == lb { |
412 | 0 | return ub; |
413 | 0 | } |
414 | 0 | as_atanpi_refine2(x, ub0) |
415 | 0 | } Unexecuted instantiation: pxfm::tangent::atanpi::atanpi_gen_impl::<<f64>::mul_add, <f64>::mul_add, <pxfm::double_double::DoubleDouble>::quick_mult_fma, pxfm::tangent::atanpi::atanpi_fma_impl::{closure#0}>Unexecuted instantiation: pxfm::tangent::atanpi::atanpi_gen_impl::<pxfm::common::f_fmla, pxfm::common::dd_fmla, <pxfm::double_double::DoubleDouble>::quick_mult, pxfm::tangent::atanpi::atanpi_small> |
416 | | |
417 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
418 | | #[target_feature(enable = "avx", enable = "fma")] |
419 | 0 | unsafe fn atanpi_fma_impl(x: f64) -> f64 { |
420 | 0 | atanpi_gen_impl( |
421 | 0 | x, |
422 | | f64::mul_add, |
423 | | f64::mul_add, |
424 | | DoubleDouble::quick_mult_fma, |
425 | 0 | |x| unsafe { atanpi_small_fma(x) }, |
426 | | ) |
427 | 0 | } |
428 | | |
429 | | /// Computes atan(x)/pi |
430 | | /// |
431 | | /// Max ULP 0.5 |
432 | 0 | pub fn f_atanpi(x: f64) -> f64 { |
433 | | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] |
434 | | { |
435 | | atanpi_gen_impl(x, f_fmla, dd_fmla, DoubleDouble::quick_mult, atanpi_small) |
436 | | } |
437 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
438 | | { |
439 | | use std::sync::OnceLock; |
440 | | static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new(); |
441 | 0 | let q = EXECUTOR.get_or_init(|| { |
442 | 0 | if std::arch::is_x86_feature_detected!("avx") |
443 | 0 | && std::arch::is_x86_feature_detected!("fma") |
444 | | { |
445 | 0 | atanpi_fma_impl |
446 | | } else { |
447 | 0 | fn def_atanpi(x: f64) -> f64 { |
448 | 0 | atanpi_gen_impl(x, f_fmla, dd_fmla, DoubleDouble::quick_mult, atanpi_small) |
449 | 0 | } |
450 | 0 | def_atanpi |
451 | | } |
452 | 0 | }); |
453 | 0 | unsafe { q(x) } |
454 | | } |
455 | 0 | } |
456 | | |
457 | | #[cfg(test)] |
458 | | mod tests { |
459 | | |
460 | | use super::*; |
461 | | |
462 | | #[test] |
463 | | fn atanpi_test() { |
464 | | assert_eq!(f_atanpi(119895888825197.97), 0.49999999999999734); |
465 | | assert_eq!(f_atanpi(1.5610674235815122E-307), 4.969031939254545e-308); |
466 | | assert_eq!(f_atanpi(0.00004577636903266291), 0.000014571070806516354); |
467 | | assert_eq!(f_atanpi(-0.00004577636903266291), -0.000014571070806516354); |
468 | | assert_eq!(f_atanpi(-0.4577636903266291), -0.13664770469904508); |
469 | | assert_eq!(f_atanpi(0.9876636903266291), 0.24802445507819193); |
470 | | } |
471 | | } |