/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/tangent/atan2.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::f_fmla; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::dyadic_float::{DyadicFloat128, DyadicSign}; |
32 | | use crate::polyeval::f_polyeval4; |
33 | | use crate::rounding::CpuRound; |
34 | | |
35 | | // atan(i/64) with i = 0..64, generated by Sollya with: |
36 | | // > for i from 0 to 64 do { |
37 | | // a = round(atan(i/64), D, RN); |
38 | | // b = round(atan(i/64) - a, D, RN); |
39 | | // print("{", b, ",", a, "},"); |
40 | | // }; |
41 | | pub(crate) static ATAN_I: [(u64, u64); 65] = [ |
42 | | (0x0000000000000000, 0x0000000000000000), |
43 | | (0xbc2220c39d4dff50, 0x3f8fff555bbb729b), |
44 | | (0xbc35ec431444912c, 0x3f9ffd55bba97625), |
45 | | (0xbc086ef8f794f105, 0x3fa7fb818430da2a), |
46 | | (0xbc3c934d86d23f1d, 0x3faff55bb72cfdea), |
47 | | (0x3c5ac4ce285df847, 0x3fb3f59f0e7c559d), |
48 | | (0xbc5cfb654c0c3d98, 0x3fb7ee182602f10f), |
49 | | (0x3c5f7b8f29a05987, 0x3fbbe39ebe6f07c3), |
50 | | (0xbc4cd37686760c17, 0x3fbfd5ba9aac2f6e), |
51 | | (0xbc4b485914dacf8c, 0x3fc1e1fafb043727), |
52 | | (0x3c661a3b0ce9281b, 0x3fc3d6eee8c6626c), |
53 | | (0xbc5054ab2c010f3d, 0x3fc5c9811e3ec26a), |
54 | | (0x3c5347b0b4f881ca, 0x3fc7b97b4bce5b02), |
55 | | (0x3c4cf601e7b4348e, 0x3fc9a6a8e96c8626), |
56 | | (0x3c217b10d2e0e5ab, 0x3fcb90d7529260a2), |
57 | | (0x3c6c648d1534597e, 0x3fcd77d5df205736), |
58 | | (0x3c68ab6e3cf7afbd, 0x3fcf5b75f92c80dd), |
59 | | (0x3c762e47390cb865, 0x3fd09dc597d86362), |
60 | | (0x3c630ca4748b1bf9, 0x3fd18bf5a30bf178), |
61 | | (0xbc7077cdd36dfc81, 0x3fd278372057ef46), |
62 | | (0xbc6963a544b672d8, 0x3fd362773707ebcc), |
63 | | (0xbc75d5e43c55b3ba, 0x3fd44aa436c2af0a), |
64 | | (0xbc62566480884082, 0x3fd530ad9951cd4a), |
65 | | (0xbc7a725715711f00, 0x3fd614840309cfe2), |
66 | | (0xbc7c63aae6f6e918, 0x3fd6f61941e4def1), |
67 | | (0x3c769c885c2b249a, 0x3fd7d5604b63b3f7), |
68 | | (0x3c7b6d0ba3748fa8, 0x3fd8b24d394a1b25), |
69 | | (0x3c79e6c988fd0a77, 0x3fd98cd5454d6b18), |
70 | | (0xbc724dec1b50b7ff, 0x3fda64eec3cc23fd), |
71 | | (0x3c7ae187b1ca5040, 0x3fdb3a911da65c6c), |
72 | | (0xbc7cc1ce70934c34, 0x3fdc0db4c94ec9f0), |
73 | | (0xbc7a2cfa4418f1ad, 0x3fdcde53432c1351), |
74 | | (0x3c7a2b7f222f65e2, 0x3fddac670561bb4f), |
75 | | (0x3c70e53dc1bf3435, 0x3fde77eb7f175a34), |
76 | | (0xbc6a3992dc382a23, 0x3fdf40dd0b541418), |
77 | | (0xbc8b32c949c9d593, 0x3fe0039c73c1a40c), |
78 | | (0xbc7d5b495f6349e6, 0x3fe0657e94db30d0), |
79 | | (0x3c5974fa13b5404f, 0x3fe0c6145b5b43da), |
80 | | (0xbc52bdaee1c0ee35, 0x3fe1255d9bfbd2a9), |
81 | | (0x3c8c621cec00c301, 0x3fe1835a88be7c13), |
82 | | (0xbc5928df287a668f, 0x3fe1e00babdefeb4), |
83 | | (0x3c6c421c9f38224e, 0x3fe23b71e2cc9e6a), |
84 | | (0xbc709e73b0c6c087, 0x3fe2958e59308e31), |
85 | | (0x3c8c5d5e9ff0cf8d, 0x3fe2ee628406cbca), |
86 | | (0x3c81021137c71102, 0x3fe345f01cce37bb), |
87 | | (0xbc82304331d8bf46, 0x3fe39c391cd4171a), |
88 | | (0x3c7ecf8b492644f0, 0x3fe3f13fb89e96f4), |
89 | | (0xbc7f76d0163f79c8, 0x3fe445065b795b56), |
90 | | (0x3c72419a87f2a458, 0x3fe4978fa3269ee1), |
91 | | (0x3c84a33dbeb3796c, 0x3fe4e8de5bb6ec04), |
92 | | (0xbc81bb74abda520c, 0x3fe538f57b89061f), |
93 | | (0xbc75e5c9d8c5a950, 0x3fe587d81f732fbb), |
94 | | (0x3c60028e4bc5e7ca, 0x3fe5d58987169b18), |
95 | | (0xbc62b785350ee8c1, 0x3fe6220d115d7b8e), |
96 | | (0xbc76ea6febe8bbba, 0x3fe66d663923e087), |
97 | | (0xbc8a80386188c50e, 0x3fe6b798920b3d99), |
98 | | (0xbc78c34d25aadef6, 0x3fe700a7c5784634), |
99 | | (0x3c47b2a6165884a1, 0x3fe748978fba8e0f), |
100 | | (0x3c8406a089803740, 0x3fe78f6bbd5d315e), |
101 | | (0x3c8560821e2f3aa9, 0x3fe7d528289fa093), |
102 | | (0xbc7bf76229d3b917, 0x3fe819d0b7158a4d), |
103 | | (0x3c66b66e7fc8b8c3, 0x3fe85d69576cc2c5), |
104 | | (0xbc855b9a5e177a1b, 0x3fe89ff5ff57f1f8), |
105 | | (0xbc7ec182ab042f61, 0x3fe8e17aa99cc05e), |
106 | | (0x3c81a62633145c07, 0x3fe921fb54442d18), |
107 | | ]; |
108 | | |
109 | | // Sage math: |
110 | | // print("pub(crate) static ATAN_RATIONAL_128: [DyadicFloat128; 65] = [") |
111 | | // for i in range(65): |
112 | | // x = D3(i)/D3(64) |
113 | | // v = atan(x) |
114 | | // print_dyadic(v) |
115 | | // |
116 | | // print("];") |
117 | | pub(crate) static ATAN_RATIONAL_128: [DyadicFloat128; 65] = [ |
118 | | DyadicFloat128 { |
119 | | sign: DyadicSign::Pos, |
120 | | exponent: 0, |
121 | | mantissa: 0x0_u128, |
122 | | }, |
123 | | DyadicFloat128 { |
124 | | sign: DyadicSign::Pos, |
125 | | exponent: -134, |
126 | | mantissa: 0xfffaaadd_db94d5bb_e78c5640_15f76048_u128, |
127 | | }, |
128 | | DyadicFloat128 { |
129 | | sign: DyadicSign::Pos, |
130 | | exponent: -133, |
131 | | mantissa: 0xffeaaddd_4bb12542_779d776d_da8c6214_u128, |
132 | | }, |
133 | | DyadicFloat128 { |
134 | | sign: DyadicSign::Pos, |
135 | | exponent: -132, |
136 | | mantissa: 0xbfdc0c21_86d14fcf_220e10d6_1df56ec7_u128, |
137 | | }, |
138 | | DyadicFloat128 { |
139 | | sign: DyadicSign::Pos, |
140 | | exponent: -132, |
141 | | mantissa: 0xffaaddb9_67ef4e36_cb2792dc_0e2e0d51_u128, |
142 | | }, |
143 | | DyadicFloat128 { |
144 | | sign: DyadicSign::Pos, |
145 | | exponent: -131, |
146 | | mantissa: 0x9facf873_e2aceb58_99c50bbf_08e6cdf6_u128, |
147 | | }, |
148 | | DyadicFloat128 { |
149 | | sign: DyadicSign::Pos, |
150 | | exponent: -131, |
151 | | mantissa: 0xbf70c130_17887460_93567e78_4cf83676_u128, |
152 | | }, |
153 | | DyadicFloat128 { |
154 | | sign: DyadicSign::Pos, |
155 | | exponent: -131, |
156 | | mantissa: 0xdf1cf5f3_783e1bef_71e5340b_30e5d9ef_u128, |
157 | | }, |
158 | | DyadicFloat128 { |
159 | | sign: DyadicSign::Pos, |
160 | | exponent: -131, |
161 | | mantissa: 0xfeadd4d5_617b6e32_c897989f_3e888ef8_u128, |
162 | | }, |
163 | | DyadicFloat128 { |
164 | | sign: DyadicSign::Pos, |
165 | | exponent: -130, |
166 | | mantissa: 0x8f0fd7d8_21b93725_bd375929_83a0af9a_u128, |
167 | | }, |
168 | | DyadicFloat128 { |
169 | | sign: DyadicSign::Pos, |
170 | | exponent: -130, |
171 | | mantissa: 0x9eb77746_331362c3_47619d25_0360fe85_u128, |
172 | | }, |
173 | | DyadicFloat128 { |
174 | | sign: DyadicSign::Pos, |
175 | | exponent: -130, |
176 | | mantissa: 0xae4c08f1_f6134efa_b54d3fef_0c2de994_u128, |
177 | | }, |
178 | | DyadicFloat128 { |
179 | | sign: DyadicSign::Pos, |
180 | | exponent: -130, |
181 | | mantissa: 0xbdcbda5e_72d81134_7b0b4f88_1c9c7488_u128, |
182 | | }, |
183 | | DyadicFloat128 { |
184 | | sign: DyadicSign::Pos, |
185 | | exponent: -130, |
186 | | mantissa: 0xcd35474b_643130e7_b00f3da1_a46eeb3b_u128, |
187 | | }, |
188 | | DyadicFloat128 { |
189 | | sign: DyadicSign::Pos, |
190 | | exponent: -130, |
191 | | mantissa: 0xdc86ba94_93051022_f621a5c1_cb552f03_u128, |
192 | | }, |
193 | | DyadicFloat128 { |
194 | | sign: DyadicSign::Pos, |
195 | | exponent: -130, |
196 | | mantissa: 0xebbeaef9_02b9b38c_91a2a68b_2fbd78e8_u128, |
197 | | }, |
198 | | DyadicFloat128 { |
199 | | sign: DyadicSign::Pos, |
200 | | exponent: -130, |
201 | | mantissa: 0xfadbafc9_6406eb15_6dc79ef5_f7a217e6_u128, |
202 | | }, |
203 | | DyadicFloat128 { |
204 | | sign: DyadicSign::Pos, |
205 | | exponent: -129, |
206 | | mantissa: 0x84ee2cbe_c31b12c5_c8e72197_0cabd3a3_u128, |
207 | | }, |
208 | | DyadicFloat128 { |
209 | | sign: DyadicSign::Pos, |
210 | | exponent: -129, |
211 | | mantissa: 0x8c5fad18_5f8bc130_ca4748b1_bf88298d_u128, |
212 | | }, |
213 | | DyadicFloat128 { |
214 | | sign: DyadicSign::Pos, |
215 | | exponent: -129, |
216 | | mantissa: 0x93c1b902_bf7a2df1_06459240_6fe1447a_u128, |
217 | | }, |
218 | | DyadicFloat128 { |
219 | | sign: DyadicSign::Pos, |
220 | | exponent: -129, |
221 | | mantissa: 0x9b13b9b8_3f5e5e69_c5abb498_d27af328_u128, |
222 | | }, |
223 | | DyadicFloat128 { |
224 | | sign: DyadicSign::Pos, |
225 | | exponent: -129, |
226 | | mantissa: 0xa25521b6_15784d45_43787549_88b8d9e3_u128, |
227 | | }, |
228 | | DyadicFloat128 { |
229 | | sign: DyadicSign::Pos, |
230 | | exponent: -129, |
231 | | mantissa: 0xa9856cca_8e6a4eda_99b7f77b_f7d9e8c1_u128, |
232 | | }, |
233 | | DyadicFloat128 { |
234 | | sign: DyadicSign::Pos, |
235 | | exponent: -129, |
236 | | mantissa: 0xb0a42018_4e7f0cb1_b51d51dc_200a0fc3_u128, |
237 | | }, |
238 | | DyadicFloat128 { |
239 | | sign: DyadicSign::Pos, |
240 | | exponent: -129, |
241 | | mantissa: 0xb7b0ca0f_26f78473_8aa32122_dcfe4483_u128, |
242 | | }, |
243 | | DyadicFloat128 { |
244 | | sign: DyadicSign::Pos, |
245 | | exponent: -129, |
246 | | mantissa: 0xbeab025b_1d9fbad3_910b8564_93411026_u128, |
247 | | }, |
248 | | DyadicFloat128 { |
249 | | sign: DyadicSign::Pos, |
250 | | exponent: -129, |
251 | | mantissa: 0xc59269ca_50d92b6d_a1746e91_f50a28de_u128, |
252 | | }, |
253 | | DyadicFloat128 { |
254 | | sign: DyadicSign::Pos, |
255 | | exponent: -129, |
256 | | mantissa: 0xcc66aa2a_6b58c33c_d9311fa1_4ed9b7c4_u128, |
257 | | }, |
258 | | DyadicFloat128 { |
259 | | sign: DyadicSign::Pos, |
260 | | exponent: -129, |
261 | | mantissa: 0xd327761e_611fe5b6_427c95e9_001e7136_u128, |
262 | | }, |
263 | | DyadicFloat128 { |
264 | | sign: DyadicSign::Pos, |
265 | | exponent: -129, |
266 | | mantissa: 0xd9d488ed_32e3635c_30f6394a_0806345d_u128, |
267 | | }, |
268 | | DyadicFloat128 { |
269 | | sign: DyadicSign::Pos, |
270 | | exponent: -129, |
271 | | mantissa: 0xe06da64a_764f7c67_c631ed96_798cb804_u128, |
272 | | }, |
273 | | DyadicFloat128 { |
274 | | sign: DyadicSign::Pos, |
275 | | exponent: -129, |
276 | | mantissa: 0xe6f29a19_609a84ba_60b77ce1_ca6dc2c8_u128, |
277 | | }, |
278 | | DyadicFloat128 { |
279 | | sign: DyadicSign::Pos, |
280 | | exponent: -129, |
281 | | mantissa: 0xed63382b_0dda7b45_6fe445ec_bc3a8d03_u128, |
282 | | }, |
283 | | DyadicFloat128 { |
284 | | sign: DyadicSign::Pos, |
285 | | exponent: -129, |
286 | | mantissa: 0xf3bf5bf8_bad1a21c_a7b837e6_86adf3fa_u128, |
287 | | }, |
288 | | DyadicFloat128 { |
289 | | sign: DyadicSign::Pos, |
290 | | exponent: -129, |
291 | | mantissa: 0xfa06e85a_a0a0be5c_66d23c7d_5dc8ecc2_u128, |
292 | | }, |
293 | | DyadicFloat128 { |
294 | | sign: DyadicSign::Pos, |
295 | | exponent: -128, |
296 | | mantissa: 0x801ce39e_0d205c99_a6d6c6c5_4d938596_u128, |
297 | | }, |
298 | | DyadicFloat128 { |
299 | | sign: DyadicSign::Pos, |
300 | | exponent: -128, |
301 | | mantissa: 0x832bf4a6_d9867e2a_4b6a09cb_61a515c1_u128, |
302 | | }, |
303 | | DyadicFloat128 { |
304 | | sign: DyadicSign::Pos, |
305 | | exponent: -128, |
306 | | mantissa: 0x8630a2da_da1ed065_d3e84ed5_013ca37e_u128, |
307 | | }, |
308 | | DyadicFloat128 { |
309 | | sign: DyadicSign::Pos, |
310 | | exponent: -128, |
311 | | mantissa: 0x892aecdf_de9547b5_094478fc_472b4afc_u128, |
312 | | }, |
313 | | DyadicFloat128 { |
314 | | sign: DyadicSign::Pos, |
315 | | exponent: -128, |
316 | | mantissa: 0x8c1ad445_f3e09b8c_439d8018_60205921_u128, |
317 | | }, |
318 | | DyadicFloat128 { |
319 | | sign: DyadicSign::Pos, |
320 | | exponent: -128, |
321 | | mantissa: 0x8f005d5e_f7f59f9b_5c835e16_65c43748_u128, |
322 | | }, |
323 | | DyadicFloat128 { |
324 | | sign: DyadicSign::Pos, |
325 | | exponent: -128, |
326 | | mantissa: 0x91db8f16_64f350e2_10e4f9c1_126e0220_u128, |
327 | | }, |
328 | | DyadicFloat128 { |
329 | | sign: DyadicSign::Pos, |
330 | | exponent: -128, |
331 | | mantissa: 0x94ac72c9_847186f6_18c4f393_f78a32f9_u128, |
332 | | }, |
333 | | DyadicFloat128 { |
334 | | sign: DyadicSign::Pos, |
335 | | exponent: -128, |
336 | | mantissa: 0x97731420_365e538b_abd3fe19_f1aeb6b3_u128, |
337 | | }, |
338 | | DyadicFloat128 { |
339 | | sign: DyadicSign::Pos, |
340 | | exponent: -128, |
341 | | mantissa: 0x9a2f80e6_71bdda20_4226f8e2_204ff3bd_u128, |
342 | | }, |
343 | | DyadicFloat128 { |
344 | | sign: DyadicSign::Pos, |
345 | | exponent: -128, |
346 | | mantissa: 0x9ce1c8e6_a0b8cdb9_f799c4e8_174cf11c_u128, |
347 | | }, |
348 | | DyadicFloat128 { |
349 | | sign: DyadicSign::Pos, |
350 | | exponent: -128, |
351 | | mantissa: 0x9f89fdc4_f4b7a1ec_f8b49264_4f0701e0_u128, |
352 | | }, |
353 | | DyadicFloat128 { |
354 | | sign: DyadicSign::Pos, |
355 | | exponent: -128, |
356 | | mantissa: 0xa22832db_cadaae08_92fe9c08_637af0e6_u128, |
357 | | }, |
358 | | DyadicFloat128 { |
359 | | sign: DyadicSign::Pos, |
360 | | exponent: -128, |
361 | | mantissa: 0xa4bc7d19_34f70924_19a87f2a_457dac9f_u128, |
362 | | }, |
363 | | DyadicFloat128 { |
364 | | sign: DyadicSign::Pos, |
365 | | exponent: -128, |
366 | | mantissa: 0xa746f2dd_b7602294_67b7d66f_2d74e019_u128, |
367 | | }, |
368 | | DyadicFloat128 { |
369 | | sign: DyadicSign::Pos, |
370 | | exponent: -128, |
371 | | mantissa: 0xa9c7abdc_4830f5c8_916a84b5_be7933f6_u128, |
372 | | }, |
373 | | DyadicFloat128 { |
374 | | sign: DyadicSign::Pos, |
375 | | exponent: -128, |
376 | | mantissa: 0xac3ec0fb_997dd6a1_a36273a5_6afa8ef4_u128, |
377 | | }, |
378 | | DyadicFloat128 { |
379 | | sign: DyadicSign::Pos, |
380 | | exponent: -128, |
381 | | mantissa: 0xaeac4c38_b4d8c080_14725e2f_3e52070a_u128, |
382 | | }, |
383 | | DyadicFloat128 { |
384 | | sign: DyadicSign::Pos, |
385 | | exponent: -128, |
386 | | mantissa: 0xb110688a_ebdc6f6a_43d65788_b9f6a7b5_u128, |
387 | | }, |
388 | | DyadicFloat128 { |
389 | | sign: DyadicSign::Pos, |
390 | | exponent: -128, |
391 | | mantissa: 0xb36b31c9_1f043691_59014174_4462f93a_u128, |
392 | | }, |
393 | | DyadicFloat128 { |
394 | | sign: DyadicSign::Pos, |
395 | | exponent: -128, |
396 | | mantissa: 0xb5bcc490_59ecc4af_f8f3cee7_5e3907d5_u128, |
397 | | }, |
398 | | DyadicFloat128 { |
399 | | sign: DyadicSign::Pos, |
400 | | exponent: -128, |
401 | | mantissa: 0xb8053e2b_c2319e73_cb2da552_10a4443d_u128, |
402 | | }, |
403 | | DyadicFloat128 { |
404 | | sign: DyadicSign::Pos, |
405 | | exponent: -128, |
406 | | mantissa: 0xba44bc7d_d470782f_654c2cb1_0942e386_u128, |
407 | | }, |
408 | | DyadicFloat128 { |
409 | | sign: DyadicSign::Pos, |
410 | | exponent: -128, |
411 | | mantissa: 0xbc7b5dea_e98af280_d4113006_e80fb290_u128, |
412 | | }, |
413 | | DyadicFloat128 { |
414 | | sign: DyadicSign::Pos, |
415 | | exponent: -128, |
416 | | mantissa: 0xbea94144_fd049aac_1043c5e7_55282e7d_u128, |
417 | | }, |
418 | | DyadicFloat128 { |
419 | | sign: DyadicSign::Pos, |
420 | | exponent: -128, |
421 | | mantissa: 0xc0ce85b8_ac526640_89dd62c4_6e92fa25_u128, |
422 | | }, |
423 | | DyadicFloat128 { |
424 | | sign: DyadicSign::Pos, |
425 | | exponent: -128, |
426 | | mantissa: 0xc2eb4abb_661628b5_b373fe45_c61bb9fb_u128, |
427 | | }, |
428 | | DyadicFloat128 { |
429 | | sign: DyadicSign::Pos, |
430 | | exponent: -128, |
431 | | mantissa: 0xc4ffaffa_bf8fbd54_8cb43d10_bc9e0221_u128, |
432 | | }, |
433 | | DyadicFloat128 { |
434 | | sign: DyadicSign::Pos, |
435 | | exponent: -128, |
436 | | mantissa: 0xc70bd54c_e602ee13_e7d54fbd_09f2be38_u128, |
437 | | }, |
438 | | DyadicFloat128 { |
439 | | sign: DyadicSign::Pos, |
440 | | exponent: -128, |
441 | | mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128, |
442 | | }, |
443 | | ]; |
444 | | |
445 | | // Approximate atan(x) for |x| <= 2^-7. |
446 | | // Using degree-9 Taylor polynomial: |
447 | | // P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9; |
448 | | // Then the absolute error is bounded by: |
449 | | // |atan(x) - P(x)| < |x|^11/11 < 2^(-7*11) / 11 < 2^-80. |
450 | | // And the relative error is bounded by: |
451 | | // |(atan(x) - P(x))/atan(x)| < |x|^10 / 10 < 2^-73. |
452 | | // For x = x_hi + x_lo, fully expand the polynomial and drop any terms less than |
453 | | // ulp(x_hi^3 / 3) gives us: |
454 | | // P(x) ~ x_hi - x_hi^3/3 + x_hi^5/5 - x_hi^7/7 + x_hi^9/9 + |
455 | | // + x_lo * (1 - x_hi^2 + x_hi^4) |
456 | | // Since p.lo is ~ x^3/3, the relative error from rounding is bounded by: |
457 | | // |(atan(x) - P(x))/atan(x)| < ulp(x^2) <= 2^(-14-52) = 2^-66. |
458 | | #[inline] |
459 | 0 | pub(crate) fn atan_eval(x: DoubleDouble) -> DoubleDouble { |
460 | 0 | let p_hi = x.hi; |
461 | 0 | let x_hi_sq = x.hi * x.hi; |
462 | | // c0 ~ x_hi^2 * 1/5 - 1/3 |
463 | 0 | let c0 = f_fmla( |
464 | 0 | x_hi_sq, |
465 | 0 | f64::from_bits(0x3fc999999999999a), |
466 | 0 | f64::from_bits(0xbfd5555555555555), |
467 | | ); |
468 | | // c1 ~ x_hi^2 * 1/9 - 1/7 |
469 | 0 | let c1 = f_fmla( |
470 | 0 | x_hi_sq, |
471 | 0 | f64::from_bits(0x3fbc71c71c71c71c), |
472 | 0 | f64::from_bits(0xbfc2492492492492), |
473 | | ); |
474 | | // x_hi^3 |
475 | 0 | let x_hi_3 = x_hi_sq * x.hi; |
476 | | // x_hi^4 |
477 | 0 | let x_hi_4 = x_hi_sq * x_hi_sq; |
478 | | // d0 ~ 1/3 - x_hi^2 / 5 + x_hi^4 / 7 - x_hi^6 / 9 |
479 | 0 | let d0 = f_fmla(x_hi_4, c1, c0); |
480 | | // x_lo - x_lo * x_hi^2 + x_lo * x_hi^4 |
481 | 0 | let d1 = f_fmla(x_hi_4 - x_hi_sq, x.lo, x.lo); |
482 | | // p.lo ~ -x_hi^3/3 + x_hi^5/5 - x_hi^7/7 + x_hi^9/9 + |
483 | | // + x_lo * (1 - x_hi^2 + x_hi^4) |
484 | 0 | let p_lo = f_fmla(x_hi_3, d0, d1); |
485 | 0 | DoubleDouble::new(p_lo, p_hi) |
486 | 0 | } |
487 | | |
488 | | #[inline(always)] |
489 | | #[allow(unused)] |
490 | 0 | pub(crate) fn atan_eval_fma(x: DoubleDouble) -> DoubleDouble { |
491 | 0 | let p_hi = x.hi; |
492 | 0 | let x_hi_sq = x.hi * x.hi; |
493 | | // c0 ~ x_hi^2 * 1/5 - 1/3 |
494 | 0 | let c0 = f64::mul_add( |
495 | 0 | x_hi_sq, |
496 | 0 | f64::from_bits(0x3fc999999999999a), |
497 | 0 | f64::from_bits(0xbfd5555555555555), |
498 | | ); |
499 | | // c1 ~ x_hi^2 * 1/9 - 1/7 |
500 | 0 | let c1 = f64::mul_add( |
501 | 0 | x_hi_sq, |
502 | 0 | f64::from_bits(0x3fbc71c71c71c71c), |
503 | 0 | f64::from_bits(0xbfc2492492492492), |
504 | | ); |
505 | | // x_hi^3 |
506 | 0 | let x_hi_3 = x_hi_sq * x.hi; |
507 | | // x_hi^4 |
508 | 0 | let x_hi_4 = x_hi_sq * x_hi_sq; |
509 | | // d0 ~ 1/3 - x_hi^2 / 5 + x_hi^4 / 7 - x_hi^6 / 9 |
510 | 0 | let d0 = f64::mul_add(x_hi_4, c1, c0); |
511 | | // x_lo - x_lo * x_hi^2 + x_lo * x_hi^4 |
512 | 0 | let d1 = f64::mul_add(x_hi_4 - x_hi_sq, x.lo, x.lo); |
513 | | // p.lo ~ -x_hi^3/3 + x_hi^5/5 - x_hi^7/7 + x_hi^9/9 + |
514 | | // + x_lo * (1 - x_hi^2 + x_hi^4) |
515 | 0 | let p_lo = f64::mul_add(x_hi_3, d0, d1); |
516 | 0 | DoubleDouble::new(p_lo, p_hi) |
517 | 0 | } |
518 | | |
519 | | #[inline] |
520 | 0 | fn atan_eval_hard(x: DyadicFloat128) -> DyadicFloat128 { |
521 | | // let x_hi_sq = x * x; |
522 | | // c0 ~ x_hi^2 * 1/5 - 1/3 |
523 | | const C: [DyadicFloat128; 4] = [ |
524 | | DyadicFloat128 { |
525 | | sign: DyadicSign::Neg, |
526 | | exponent: -129, |
527 | | mantissa: 0xaaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaab_u128, |
528 | | }, |
529 | | DyadicFloat128 { |
530 | | sign: DyadicSign::Pos, |
531 | | exponent: -130, |
532 | | mantissa: 0xcccccccc_cccccccc_cccccccc_cccccccd_u128, |
533 | | }, |
534 | | DyadicFloat128 { |
535 | | sign: DyadicSign::Neg, |
536 | | exponent: -130, |
537 | | mantissa: 0x92492492_49249249_24924924_92492492_u128, |
538 | | }, |
539 | | DyadicFloat128 { |
540 | | sign: DyadicSign::Pos, |
541 | | exponent: -131, |
542 | | mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128, |
543 | | }, |
544 | | ]; |
545 | 0 | let dx2 = x * x; |
546 | | // Taylor polynomial |
547 | | // P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9; |
548 | 0 | let p = f_polyeval4(dx2, C[0], C[1], C[2], C[3]); |
549 | 0 | x + dx2 * x * p |
550 | 0 | } |
551 | | |
552 | | #[cold] |
553 | | #[inline(never)] |
554 | 0 | pub(crate) fn atan2_hard(y: f64, x: f64) -> DyadicFloat128 { |
555 | | /* |
556 | | Sage math: |
557 | | from sage.all import * |
558 | | |
559 | | def format_dyadic_hex(value): |
560 | | l = hex(value)[2:] |
561 | | n = 8 |
562 | | x = [l[i:i + n] for i in range(0, len(l), n)] |
563 | | return "0x" + "_".join(x) + "_u128" |
564 | | |
565 | | def print_dyadic(value): |
566 | | (s, m, e) = RealField(128)(value).sign_mantissa_exponent(); |
567 | | print("DyadicFloat128 {") |
568 | | print(f" sign: DyadicSign::{'Pos' if s >= 0 else 'Neg'},") |
569 | | print(f" exponent: {e},") |
570 | | print(f" mantissa: {format_dyadic_hex(m)},") |
571 | | print("},") |
572 | | |
573 | | D3 = RealField(157) |
574 | | |
575 | | print("Minus Pi") |
576 | | print_dyadic(-D3.pi()) |
577 | | |
578 | | print("\nPI over 2") |
579 | | print_dyadic(D3.pi() / 2) |
580 | | |
581 | | print("\nMinus PI over 2") |
582 | | print_dyadic(-D3.pi() / 2) |
583 | | */ |
584 | | static ZERO: DyadicFloat128 = DyadicFloat128 { |
585 | | sign: DyadicSign::Pos, |
586 | | exponent: 0, |
587 | | mantissa: 0_u128, |
588 | | }; |
589 | | static MINUS_PI: DyadicFloat128 = DyadicFloat128 { |
590 | | sign: DyadicSign::Neg, |
591 | | exponent: -126, |
592 | | mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128, |
593 | | }; |
594 | | static PI_OVER_2: DyadicFloat128 = DyadicFloat128 { |
595 | | sign: DyadicSign::Pos, |
596 | | exponent: -127, |
597 | | mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128, |
598 | | }; |
599 | | static MPI_OVER_2: DyadicFloat128 = DyadicFloat128 { |
600 | | sign: DyadicSign::Neg, |
601 | | exponent: -127, |
602 | | mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128, |
603 | | }; |
604 | | static CONST_ADJ: [[[DyadicFloat128; 2]; 2]; 2] = [ |
605 | | [[ZERO, MPI_OVER_2], [ZERO, MPI_OVER_2]], |
606 | | [[MINUS_PI, PI_OVER_2], [MINUS_PI, PI_OVER_2]], |
607 | | ]; |
608 | | |
609 | 0 | let x_sign = x.is_sign_negative() as usize; |
610 | 0 | let y_sign = y.is_sign_negative() as usize; |
611 | 0 | let x_bits = x.to_bits() & 0x7fff_ffff_ffff_ffff; |
612 | 0 | let y_bits = y.to_bits() & 0x7fff_ffff_ffff_ffff; |
613 | 0 | let x_abs = x_bits; |
614 | 0 | let y_abs = y_bits; |
615 | 0 | let recip = x_abs < y_abs; |
616 | | |
617 | 0 | let min_abs = if recip { x_abs } else { y_abs }; |
618 | 0 | let max_abs = if !recip { x_abs } else { y_abs }; |
619 | 0 | let min_exp = min_abs.wrapping_shr(52); |
620 | 0 | let max_exp = max_abs.wrapping_shr(52); |
621 | | |
622 | 0 | let mut num = f64::from_bits(min_abs); |
623 | 0 | let mut den = f64::from_bits(max_abs); |
624 | | |
625 | 0 | if max_exp > 0x7ffu64 - 128u64 || min_exp < 128u64 { |
626 | 0 | let scale_up = min_exp < 128u64; |
627 | 0 | let scale_down = max_exp > 0x7ffu64 - 128u64; |
628 | | // At least one input is denormal, multiply both numerator and denominator |
629 | | // by some large enough power of 2 to normalize denormal inputs. |
630 | 0 | if scale_up { |
631 | 0 | num *= f64::from_bits(0x43f0000000000000); |
632 | 0 | if !scale_down { |
633 | 0 | den *= f64::from_bits(0x43f0000000000000) |
634 | 0 | } |
635 | 0 | } else if scale_down { |
636 | 0 | den *= f64::from_bits(0x3bf0000000000000); |
637 | 0 | if !scale_up { |
638 | 0 | num *= f64::from_bits(0x3bf0000000000000); |
639 | 0 | } |
640 | 0 | } |
641 | 0 | } |
642 | | |
643 | | static IS_NEG: [DyadicSign; 2] = [DyadicSign::Pos, DyadicSign::Neg]; |
644 | | |
645 | 0 | let final_sign = IS_NEG[((x_sign != y_sign) != recip) as usize]; |
646 | 0 | let const_term = CONST_ADJ[x_sign][y_sign][recip as usize]; |
647 | | |
648 | 0 | let num = DyadicFloat128::new_from_f64(num); |
649 | 0 | let den = DyadicFloat128::new_from_f64(den); |
650 | | |
651 | 0 | let den_recip0 = den.reciprocal(); |
652 | | |
653 | 0 | let mut k_product = num * den_recip0; |
654 | 0 | k_product.exponent += 6; |
655 | | |
656 | 0 | let mut k = k_product.round_to_nearest_f64(); |
657 | 0 | let idx = k as u64; |
658 | | // k = idx / 64 |
659 | 0 | k *= f64::from_bits(0x3f90000000000000); |
660 | | |
661 | | // Range reduction: |
662 | | // atan(n/d) - atan(k/64) = atan((n/d - k/64) / (1 + (n/d) * (k/64))) |
663 | | // = atan((n - d * k/64)) / (d + n * k/64)) |
664 | 0 | let k_rational128 = DyadicFloat128::new_from_f64(k); |
665 | 0 | let num_k = num * k_rational128; |
666 | 0 | let den_k = den * k_rational128; |
667 | | |
668 | | // num_dd = n - d * k |
669 | 0 | let num_rational128 = num - den_k; |
670 | | // den_dd = d + n * k |
671 | 0 | let den_rational128 = den + num_k; |
672 | | |
673 | | // q = (n - d * k) / (d + n * k) |
674 | 0 | let den_rational128_recip = den_rational128.reciprocal(); |
675 | 0 | let q = num_rational128 * den_rational128_recip; |
676 | | |
677 | 0 | let p = atan_eval_hard(q); |
678 | | |
679 | 0 | let vl = ATAN_RATIONAL_128[idx as usize]; |
680 | 0 | let mut r = p + vl + const_term; |
681 | 0 | r.sign = r.sign.mult(final_sign); |
682 | | |
683 | 0 | r |
684 | 0 | } |
685 | | |
686 | | static IS_NEG: [f64; 2] = [1.0, -1.0]; |
687 | | const ZERO: DoubleDouble = DoubleDouble::new(0.0, 0.0); |
688 | | const MZERO: DoubleDouble = DoubleDouble::new(-0.0, -0.0); |
689 | | const PI: DoubleDouble = DoubleDouble::new( |
690 | | f64::from_bits(0x3ca1a62633145c07), |
691 | | f64::from_bits(0x400921fb54442d18), |
692 | | ); |
693 | | const MPI: DoubleDouble = DoubleDouble::new( |
694 | | f64::from_bits(0xbca1a62633145c07), |
695 | | f64::from_bits(0xc00921fb54442d18), |
696 | | ); |
697 | | const PI_OVER_2: DoubleDouble = DoubleDouble::new( |
698 | | f64::from_bits(0x3c91a62633145c07), |
699 | | f64::from_bits(0x3ff921fb54442d18), |
700 | | ); |
701 | | const MPI_OVER_2: DoubleDouble = DoubleDouble::new( |
702 | | f64::from_bits(0xbc91a62633145c07), |
703 | | f64::from_bits(0xbff921fb54442d18), |
704 | | ); |
705 | | const PI_OVER_4: DoubleDouble = DoubleDouble::new( |
706 | | f64::from_bits(0x3c81a62633145c07), |
707 | | f64::from_bits(0x3fe921fb54442d18), |
708 | | ); |
709 | | const THREE_PI_OVER_4: DoubleDouble = DoubleDouble::new( |
710 | | f64::from_bits(0x3c9a79394c9e8a0a), |
711 | | f64::from_bits(0x4002d97c7f3321d2), |
712 | | ); |
713 | | |
714 | | // Adjustment for constant term: |
715 | | // CONST_ADJ[x_sign][y_sign][recip] |
716 | | static CONST_ADJ: [[[DoubleDouble; 2]; 2]; 2] = [ |
717 | | [[ZERO, MPI_OVER_2], [MZERO, MPI_OVER_2]], |
718 | | [[MPI, PI_OVER_2], [MPI, PI_OVER_2]], |
719 | | ]; |
720 | | |
721 | | // Exceptional cases: |
722 | | // EXCEPT[y_except][x_except][x_is_neg] |
723 | | // with x_except & y_except: |
724 | | // 0: zero |
725 | | // 1: finite, non-zero |
726 | | // 2: infinity |
727 | | static EXCEPTS: [[[DoubleDouble; 2]; 3]; 3] = [ |
728 | | [[ZERO, PI], [ZERO, PI], [ZERO, PI]], |
729 | | [[PI_OVER_2, PI_OVER_2], [ZERO, ZERO], [ZERO, PI]], |
730 | | [ |
731 | | [PI_OVER_2, PI_OVER_2], |
732 | | [PI_OVER_2, PI_OVER_2], |
733 | | [PI_OVER_4, THREE_PI_OVER_4], |
734 | | ], |
735 | | ]; |
736 | | |
737 | | #[inline(always)] |
738 | 0 | fn atan2_gen_fma(y: f64, x: f64) -> f64 { |
739 | 0 | let x_sign = x.is_sign_negative() as usize; |
740 | 0 | let y_sign = y.is_sign_negative() as usize; |
741 | 0 | let x_bits = x.to_bits() & 0x7fff_ffff_ffff_ffff; |
742 | 0 | let y_bits = y.to_bits() & 0x7fff_ffff_ffff_ffff; |
743 | 0 | let x_abs = x_bits; |
744 | 0 | let y_abs = y_bits; |
745 | 0 | let recip = x_abs < y_abs; |
746 | 0 | let mut min_abs = if recip { x_abs } else { y_abs }; |
747 | 0 | let mut max_abs = if !recip { x_abs } else { y_abs }; |
748 | 0 | let mut min_exp = min_abs.wrapping_shr(52); |
749 | 0 | let mut max_exp = max_abs.wrapping_shr(52); |
750 | | |
751 | 0 | let mut num = f64::from_bits(min_abs); |
752 | 0 | let mut den = f64::from_bits(max_abs); |
753 | | |
754 | | // Check for exceptional cases, whether inputs are 0, inf, nan, or close to |
755 | | // overflow, or close to underflow. |
756 | 0 | if max_exp > 0x7ffu64 - 128u64 || min_exp < 128u64 { |
757 | 0 | if x.is_nan() || y.is_nan() { |
758 | 0 | return f64::NAN; |
759 | 0 | } |
760 | 0 | let x_except = if x == 0.0 { |
761 | 0 | 0 |
762 | 0 | } else if x.is_infinite() { |
763 | 0 | 2 |
764 | | } else { |
765 | 0 | 1 |
766 | | }; |
767 | 0 | let y_except = if y == 0.0 { |
768 | 0 | 0 |
769 | 0 | } else if y.is_infinite() { |
770 | 0 | 2 |
771 | | } else { |
772 | 0 | 1 |
773 | | }; |
774 | | |
775 | 0 | if (x_except != 1) || (y_except != 1) { |
776 | 0 | let r = EXCEPTS[y_except][x_except][x_sign]; |
777 | 0 | return f_fmla(IS_NEG[y_sign], r.hi, IS_NEG[y_sign] * r.lo); |
778 | 0 | } |
779 | 0 | let scale_up = min_exp < 128u64; |
780 | 0 | let scale_down = max_exp > 0x7ffu64 - 128u64; |
781 | | // At least one input is denormal, multiply both numerator and denominator |
782 | | // by some large enough power of 2 to normalize denormal inputs. |
783 | | // if scale_up || scale_down { |
784 | | // return atan2_hard(y, x).fast_as_f64(); |
785 | | // } |
786 | 0 | if scale_up { |
787 | 0 | num *= f64::from_bits(0x43f0000000000000); |
788 | 0 | if !scale_down { |
789 | 0 | den *= f64::from_bits(0x43f0000000000000); |
790 | 0 | } |
791 | 0 | } else if scale_down { |
792 | 0 | den *= f64::from_bits(0x3bf0000000000000); |
793 | 0 | if !scale_up { |
794 | 0 | num *= f64::from_bits(0x3bf0000000000000); |
795 | 0 | } |
796 | 0 | } |
797 | | |
798 | 0 | min_abs = num.to_bits(); |
799 | 0 | max_abs = den.to_bits(); |
800 | 0 | min_exp = min_abs.wrapping_shr(52); |
801 | 0 | max_exp = max_abs.wrapping_shr(52); |
802 | 0 | } |
803 | 0 | let final_sign = IS_NEG[((x_sign != y_sign) != recip) as usize]; |
804 | 0 | let const_term = CONST_ADJ[x_sign][y_sign][recip as usize]; |
805 | 0 | let exp_diff = max_exp - min_exp; |
806 | | // We have the following bound for normalized n and d: |
807 | | // 2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1). |
808 | 0 | if exp_diff > 54 { |
809 | 0 | return f_fmla( |
810 | 0 | final_sign, |
811 | 0 | const_term.hi, |
812 | 0 | final_sign * (const_term.lo + num / den), |
813 | | ); |
814 | 0 | } |
815 | | |
816 | 0 | let mut k = (64.0 * num / den).cpu_round(); |
817 | 0 | let idx = k as u64; |
818 | | // k = idx / 64 |
819 | 0 | k *= f64::from_bits(0x3f90000000000000); |
820 | | |
821 | | // Range reduction: |
822 | | // atan(n/d) - atan(k/64) = atan((n/d - k/64) / (1 + (n/d) * (k/64))) |
823 | | // = atan((n - d * k/64)) / (d + n * k/64)) |
824 | 0 | let num_k = DoubleDouble::from_exact_mult(num, k); |
825 | 0 | let den_k = DoubleDouble::from_exact_mult(den, k); |
826 | | |
827 | | // num_dd = n - d * k |
828 | 0 | let num_dd = DoubleDouble::from_exact_add(num - den_k.hi, -den_k.lo); |
829 | | // den_dd = d + n * k |
830 | 0 | let mut den_dd = DoubleDouble::from_exact_add(den, num_k.hi); |
831 | 0 | den_dd.lo += num_k.lo; |
832 | | |
833 | | // q = (n - d * k) / (d + n * k) |
834 | 0 | let q = DoubleDouble::div(num_dd, den_dd); |
835 | | // p ~ atan(q) |
836 | 0 | let p = atan_eval(q); |
837 | | |
838 | 0 | let vl = ATAN_I[idx as usize]; |
839 | 0 | let vlo = DoubleDouble::from_bit_pair(vl); |
840 | 0 | let mut r = DoubleDouble::add(const_term, DoubleDouble::add(vlo, p)); |
841 | | |
842 | 0 | let err = f_fmla( |
843 | 0 | p.hi, |
844 | 0 | f64::from_bits(0x3bd0000000000000), |
845 | 0 | f64::from_bits(0x3c00000000000000), |
846 | | ); |
847 | | |
848 | 0 | let ub = r.hi + (r.lo + err); |
849 | 0 | let lb = r.hi + (r.lo - err); |
850 | | |
851 | 0 | if ub == lb { |
852 | 0 | r.hi *= final_sign; |
853 | 0 | r.lo *= final_sign; |
854 | | |
855 | 0 | return r.to_f64(); |
856 | 0 | } |
857 | 0 | atan2_hard(y, x).fast_as_f64() |
858 | 0 | } |
859 | | |
860 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
861 | | #[target_feature(enable = "avx", enable = "fma")] |
862 | 0 | unsafe fn atan2_fma_impl(y: f64, x: f64) -> f64 { |
863 | 0 | let x_sign = x.is_sign_negative() as usize; |
864 | 0 | let y_sign = y.is_sign_negative() as usize; |
865 | 0 | let x_bits = x.to_bits() & 0x7fff_ffff_ffff_ffff; |
866 | 0 | let y_bits = y.to_bits() & 0x7fff_ffff_ffff_ffff; |
867 | 0 | let x_abs = x_bits; |
868 | 0 | let y_abs = y_bits; |
869 | 0 | let recip = x_abs < y_abs; |
870 | 0 | let mut min_abs = if recip { x_abs } else { y_abs }; |
871 | 0 | let mut max_abs = if !recip { x_abs } else { y_abs }; |
872 | 0 | let mut min_exp = min_abs.wrapping_shr(52); |
873 | 0 | let mut max_exp = max_abs.wrapping_shr(52); |
874 | | |
875 | 0 | let mut num = f64::from_bits(min_abs); |
876 | 0 | let mut den = f64::from_bits(max_abs); |
877 | | |
878 | | // Check for exceptional cases, whether inputs are 0, inf, nan, or close to |
879 | | // overflow, or close to underflow. |
880 | 0 | if max_exp > 0x7ffu64 - 128u64 || min_exp < 128u64 { |
881 | 0 | if x.is_nan() || y.is_nan() { |
882 | 0 | return f64::NAN; |
883 | 0 | } |
884 | 0 | let x_except = if x == 0.0 { |
885 | 0 | 0 |
886 | 0 | } else if x.is_infinite() { |
887 | 0 | 2 |
888 | | } else { |
889 | 0 | 1 |
890 | | }; |
891 | 0 | let y_except = if y == 0.0 { |
892 | 0 | 0 |
893 | 0 | } else if y.is_infinite() { |
894 | 0 | 2 |
895 | | } else { |
896 | 0 | 1 |
897 | | }; |
898 | | |
899 | 0 | if (x_except != 1) || (y_except != 1) { |
900 | 0 | let r = EXCEPTS[y_except][x_except][x_sign]; |
901 | 0 | return f64::mul_add(IS_NEG[y_sign], r.hi, IS_NEG[y_sign] * r.lo); |
902 | 0 | } |
903 | 0 | let scale_up = min_exp < 128u64; |
904 | 0 | let scale_down = max_exp > 0x7ffu64 - 128u64; |
905 | | // At least one input is denormal, multiply both numerator and denominator |
906 | | // by some large enough power of 2 to normalize denormal inputs. |
907 | | // if scale_up || scale_down { |
908 | | // return atan2_hard(y, x).fast_as_f64(); |
909 | | // } |
910 | 0 | if scale_up { |
911 | 0 | num *= f64::from_bits(0x43f0000000000000); |
912 | 0 | if !scale_down { |
913 | 0 | den *= f64::from_bits(0x43f0000000000000); |
914 | 0 | } |
915 | 0 | } else if scale_down { |
916 | 0 | den *= f64::from_bits(0x3bf0000000000000); |
917 | 0 | if !scale_up { |
918 | 0 | num *= f64::from_bits(0x3bf0000000000000); |
919 | 0 | } |
920 | 0 | } |
921 | | |
922 | 0 | min_abs = num.to_bits(); |
923 | 0 | max_abs = den.to_bits(); |
924 | 0 | min_exp = min_abs.wrapping_shr(52); |
925 | 0 | max_exp = max_abs.wrapping_shr(52); |
926 | 0 | } |
927 | 0 | let final_sign = IS_NEG[((x_sign != y_sign) != recip) as usize]; |
928 | 0 | let const_term = CONST_ADJ[x_sign][y_sign][recip as usize]; |
929 | 0 | let exp_diff = max_exp - min_exp; |
930 | | // We have the following bound for normalized n and d: |
931 | | // 2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1). |
932 | 0 | if exp_diff > 54 { |
933 | 0 | return f64::mul_add( |
934 | 0 | final_sign, |
935 | 0 | const_term.hi, |
936 | 0 | final_sign * (const_term.lo + num / den), |
937 | | ); |
938 | 0 | } |
939 | | |
940 | 0 | let mut k = (64.0 * num / den).round(); |
941 | 0 | let idx = k as u64; |
942 | | // k = idx / 64 |
943 | 0 | k *= f64::from_bits(0x3f90000000000000); |
944 | | |
945 | | // Range reduction: |
946 | | // atan(n/d) - atan(k/64) = atan((n/d - k/64) / (1 + (n/d) * (k/64))) |
947 | | // = atan((n - d * k/64)) / (d + n * k/64)) |
948 | 0 | let num_k = DoubleDouble::from_exact_mult_fma(num, k); |
949 | 0 | let den_k = DoubleDouble::from_exact_mult_fma(den, k); |
950 | | |
951 | | // num_dd = n - d * k |
952 | 0 | let num_dd = DoubleDouble::from_exact_add(num - den_k.hi, -den_k.lo); |
953 | | // den_dd = d + n * k |
954 | 0 | let mut den_dd = DoubleDouble::from_exact_add(den, num_k.hi); |
955 | 0 | den_dd.lo += num_k.lo; |
956 | | |
957 | | // q = (n - d * k) / (d + n * k) |
958 | 0 | let q = DoubleDouble::div_fma(num_dd, den_dd); |
959 | | // p ~ atan(q) |
960 | 0 | let p = atan_eval_fma(q); |
961 | | |
962 | 0 | let vl = ATAN_I[idx as usize]; |
963 | 0 | let vlo = DoubleDouble::from_bit_pair(vl); |
964 | 0 | let mut r = DoubleDouble::add(const_term, DoubleDouble::add(vlo, p)); |
965 | | |
966 | 0 | let err = f64::mul_add( |
967 | 0 | p.hi, |
968 | 0 | f64::from_bits(0x3bd0000000000000), |
969 | 0 | f64::from_bits(0x3c00000000000000), |
970 | | ); |
971 | | |
972 | 0 | let ub = r.hi + (r.lo + err); |
973 | 0 | let lb = r.hi + (r.lo - err); |
974 | | |
975 | 0 | if ub == lb { |
976 | 0 | r.hi *= final_sign; |
977 | 0 | r.lo *= final_sign; |
978 | | |
979 | 0 | return r.to_f64(); |
980 | 0 | } |
981 | 0 | atan2_hard(y, x).fast_as_f64() |
982 | 0 | } |
983 | | |
984 | | /// Computes atan(x) |
985 | | /// |
986 | | /// Max found ULP 0.5 |
987 | 0 | pub fn f_atan2(y: f64, x: f64) -> f64 { |
988 | | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] |
989 | | { |
990 | | atan2_gen_fma(y, x) |
991 | | } |
992 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
993 | | { |
994 | | use std::sync::OnceLock; |
995 | | static EXECUTOR: OnceLock<unsafe fn(f64, f64) -> f64> = OnceLock::new(); |
996 | 0 | let q = EXECUTOR.get_or_init(|| { |
997 | 0 | if std::arch::is_x86_feature_detected!("avx") |
998 | 0 | && std::arch::is_x86_feature_detected!("fma") |
999 | | { |
1000 | 0 | atan2_fma_impl |
1001 | | } else { |
1002 | 0 | fn def_atan2pi(y: f64, x: f64) -> f64 { |
1003 | 0 | atan2_gen_fma(y, x) |
1004 | 0 | } |
1005 | 0 | def_atan2pi |
1006 | | } |
1007 | 0 | }); |
1008 | 0 | unsafe { q(y, x) } |
1009 | | } |
1010 | 0 | } |
1011 | | |
1012 | | #[cfg(test)] |
1013 | | mod tests { |
1014 | | use super::*; |
1015 | | |
1016 | | #[test] |
1017 | | fn test_atan2() { |
1018 | | assert_eq!( |
1019 | | f_atan2(0.05474853958030223, 0.9999995380640253), |
1020 | | 0.05469396182367716 |
1021 | | ); |
1022 | | assert_eq!(f_atan2(-5., 2.), -1.1902899496825317); |
1023 | | assert_eq!(f_atan2(2., -5.), 2.761086276477428); |
1024 | | assert_eq!( |
1025 | | f_atan2(1.220342145227879E-321, 6.9806238698201653E-309), |
1026 | | 0.00000000000017481849301519772 |
1027 | | ); |
1028 | | } |
1029 | | } |