/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/tangent/atan2f.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::f_fmla; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::shared_eval::poly_dekker_generic; |
32 | | |
33 | | static ATAN2F_TABLE: [(u64, u64); 32] = [ |
34 | | (0x3ff0000000000000, 0xba88c1dac5492248), |
35 | | (0xbfd5555555555555, 0xbc755553bf3a2abe), |
36 | | (0x3fc999999999999a, 0xbc699deed1ec9071), |
37 | | (0xbfc2492492492492, 0xbc5fd99c8d18269a), |
38 | | (0x3fbc71c71c71c717, 0xbc2651eee4c4d9d0), |
39 | | (0xbfb745d1745d1649, 0xbc5632683d6c44a6), |
40 | | (0x3fb3b13b13b11c63, 0x3c5bf69c1f8af41d), |
41 | | (0xbfb11111110e6338, 0x3c23c3e431e8bb68), |
42 | | (0x3fae1e1e1dc45c4a, 0xbc4be2db05c77bbf), |
43 | | (0xbfaaf286b8164b4f, 0x3c2a4673491f0942), |
44 | | (0x3fa86185e9ad4846, 0x3c4e12e32d79fcee), |
45 | | (0xbfa642c6d5161fae, 0x3c43ce76c1ca03f0), |
46 | | (0x3fa47ad6f277e5bf, 0xbc3abd8d85bdb714), |
47 | | (0xbfa2f64a2ee8896d, 0x3c2ef87d4b615323), |
48 | | (0x3fa1a6a2b31741b5, 0x3c1a5d9d973547ee), |
49 | | (0xbfa07fbdad65e0a6, 0xbc265ac07f5d35f4), |
50 | | (0x3f9ee9932a9a5f8b, 0x3c2f8b9623f6f55a), |
51 | | (0xbf9ce8b5b9584dc6, 0x3c2fe5af96e8ea2d), |
52 | | (0x3f9ac9cb288087b7, 0xbc3450cdfceaf5ca), |
53 | | (0xbf984b025351f3e6, 0x3c2579561b0d73da), |
54 | | (0x3f952f5b8ecdd52b, 0x3c3036bd2c6fba47), |
55 | | (0xbf9163a8c44909dc, 0x3c318f735ffb9f16), |
56 | | (0x3f8a400dce3eea6f, 0xbc2c90569c0c1b5c), |
57 | | (0xbf81caa78ae6db3a, 0xbc24c60f8161ea09), |
58 | | (0x3f752672453c0731, 0x3c1834efb598c338), |
59 | | (0xbf65850c5be137cf, 0xbc0445fc150ca7f5), |
60 | | (0x3f523eb98d22e1ca, 0xbbf388fbaf1d7830), |
61 | | (0xbf38f4e974a40741, 0x3bd271198a97da34), |
62 | | (0x3f1a5cf2e9cf76e5, 0xbbb887eb4a63b665), |
63 | | (0xbef420c270719e32, 0x3b8efd595b27888b), |
64 | | (0x3ec3ba2d69b51677, 0xbb64fb06829cdfc7), |
65 | | (0xbe829b7e6f676385, 0xbb2a783b6de718fb), |
66 | | ]; |
67 | | |
68 | | #[cold] |
69 | 0 | fn atan2f_tiny(y: f32, x: f32) -> f32 { |
70 | 0 | let dy = y as f64; |
71 | 0 | let dx = x as f64; |
72 | 0 | let z = dy / dx; |
73 | 0 | let mut e = f_fmla(-z, x as f64, y as f64); |
74 | | /* z * x + e = y thus y/x = z + e/x */ |
75 | | const C: f64 = f64::from_bits(0xbfd5555555555555); /* -1/3 rounded to nearest */ |
76 | 0 | let zz = z * z; |
77 | 0 | let cz = C * z; |
78 | 0 | e = e / x as f64 + cz * zz; |
79 | 0 | let mut t = z.to_bits(); |
80 | 0 | if (t & 0xfffffffu64) == 0 { |
81 | | /* boundary case */ |
82 | | /* If z and e are of same sign (resp. of different signs), we increase |
83 | | (resp. decrease) the significand of t by 1 to avoid a double-rounding |
84 | | issue when rounding t.f to binary32. */ |
85 | 0 | if z * e > 0. { |
86 | 0 | t = t.wrapping_add(1); |
87 | 0 | } else { |
88 | 0 | t = t.wrapping_sub(1); |
89 | 0 | } |
90 | 0 | } |
91 | 0 | f64::from_bits(t) as f32 |
92 | 0 | } |
93 | | |
94 | | #[allow(clippy::too_many_arguments)] |
95 | | #[cold] |
96 | 0 | fn atan2f_refine(ay: u32, ax: u32, y: f32, x: f32, zy: f64, zx: f64, gt: usize, i: u32) -> f32 { |
97 | | const PI: f64 = f64::from_bits(0x400921fb54442d18); |
98 | | const PI2: f64 = f64::from_bits(0x3ff921fb54442d18); |
99 | | const PI2L: f64 = f64::from_bits(0x3c91a62633145c07); |
100 | | static OFF: [f64; 8] = [0.0, PI2, PI, PI2, -0.0, -PI2, -PI, -PI2]; |
101 | | static OFFL: [f64; 8] = [0.0, PI2L, 2. * PI2L, PI2L, -0.0, -PI2L, -2. * PI2L, -PI2L]; |
102 | | static SGN: [f64; 2] = [1., -1.]; |
103 | | /* check tiny y/x */ |
104 | 0 | if ay < ax && ((ax - ay) >> 23 >= 25) { |
105 | 0 | return atan2f_tiny(y, x); |
106 | 0 | } |
107 | | let mut zh; |
108 | | let mut zl; |
109 | 0 | if gt == 0 { |
110 | 0 | zh = zy / zx; |
111 | 0 | zl = f_fmla(zh, -zx, zy) / zx; |
112 | 0 | } else { |
113 | 0 | zh = zx / zy; |
114 | 0 | zl = f_fmla(zh, -zy, zx) / zy; |
115 | 0 | } |
116 | 0 | let z2 = DoubleDouble::quick_mult(DoubleDouble::new(zl, zh), DoubleDouble::new(zl, zh)); |
117 | 0 | let mut p = poly_dekker_generic(z2, ATAN2F_TABLE); |
118 | 0 | zh *= SGN[gt]; |
119 | 0 | zl *= SGN[gt]; |
120 | 0 | p = DoubleDouble::quick_mult(DoubleDouble::new(zl, zh), p); |
121 | 0 | let sh = p.hi + OFF[i as usize]; |
122 | 0 | let sl = ((OFF[i as usize] - sh) + p.hi) + p.lo + OFFL[i as usize]; |
123 | 0 | let rf = sh as f32; |
124 | 0 | let th = rf as f64; |
125 | 0 | let dh = sh - th; |
126 | 0 | let mut tm: f64 = dh + sl; |
127 | 0 | let mut tth = th.to_bits(); |
128 | 0 | if th + th * f64::from_bits(0x3c30000000000000) == th - th * f64::from_bits(0x3c30000000000000) |
129 | | { |
130 | 0 | tth &= 0x7ffu64 << 52; |
131 | 0 | tth = tth.wrapping_sub(24 << 52); |
132 | 0 | if tm.abs() > f64::from_bits(tth) { |
133 | 0 | tm *= 1.25; |
134 | 0 | } else { |
135 | 0 | tm *= 0.75; |
136 | 0 | } |
137 | 0 | } |
138 | 0 | let r = th + tm; |
139 | 0 | r as f32 |
140 | 0 | } |
141 | | |
142 | | /// Computes atan2 |
143 | | /// |
144 | | /// Max found ULP 0.49999842 |
145 | | #[inline] |
146 | 0 | pub fn f_atan2f(y: f32, x: f32) -> f32 { |
147 | | const M: [f64; 2] = [0., 1.]; |
148 | | const PI: f64 = f64::from_bits(0x400921fb54442d18); |
149 | | const PI2: f64 = f64::from_bits(0x3ff921fb54442d18); |
150 | | const PI2L: f64 = f64::from_bits(0x3c91a62633145c07); |
151 | | static OFF: [f64; 8] = [0.0, PI2, PI, PI2, -0.0, -PI2, -PI, -PI2]; |
152 | | static OFFL: [f64; 8] = [0.0, PI2L, 2. * PI2L, PI2L, -0.0, -PI2L, -2. * PI2L, -PI2L]; |
153 | | static SGN: [f64; 2] = [1., -1.]; |
154 | 0 | let tx = x.to_bits(); |
155 | 0 | let ty = y.to_bits(); |
156 | 0 | let ux = tx; |
157 | 0 | let uy = ty; |
158 | 0 | let ax = ux & 0x7fffffff; |
159 | 0 | let ay = uy & 0x7fffffff; |
160 | 0 | if ay >= (0xff << 23) || ax >= (0xff << 23) { |
161 | | // x or y is nan or inf |
162 | | /* we use x+y below so that the invalid exception is set |
163 | | for (x,y) = (qnan,snan) or (snan,qnan) */ |
164 | 0 | if ay > (0xff << 23) { |
165 | 0 | return x + y; |
166 | 0 | } // case y nan |
167 | 0 | if ax > (0xff << 23) { |
168 | 0 | return x + y; |
169 | 0 | } // case x nan |
170 | 0 | let yinf = ay == (0xff << 23); |
171 | 0 | let xinf = ax == (0xff << 23); |
172 | 0 | if yinf & xinf { |
173 | 0 | return if (ux >> 31) != 0 { |
174 | 0 | (f64::from_bits(0x4002d97c7f3321d2) * SGN[(uy >> 31) as usize]) as f32 // +/-3pi/4 |
175 | | } else { |
176 | 0 | (f64::from_bits(0x3fe921fb54442d18) * SGN[(uy >> 31) as usize]) as f32 // +/-pi/4 |
177 | | }; |
178 | 0 | } |
179 | 0 | if xinf { |
180 | 0 | return if (ux >> 31) != 0 { |
181 | 0 | (PI * SGN[(uy >> 31) as usize]) as f32 |
182 | | } else { |
183 | 0 | (0.0 * SGN[(uy >> 31) as usize]) as f32 |
184 | | }; |
185 | 0 | } |
186 | 0 | if yinf { |
187 | 0 | return (PI2 * SGN[(uy >> 31) as usize]) as f32; |
188 | 0 | } |
189 | 0 | } |
190 | 0 | if ay == 0 { |
191 | 0 | if ax == 0 { |
192 | 0 | let i = (uy >> 31) |
193 | 0 | .wrapping_mul(4) |
194 | 0 | .wrapping_add((ux >> 31).wrapping_mul(2)); |
195 | 0 | return if (ux >> 31) != 0 { |
196 | 0 | (OFF[i as usize] + OFFL[i as usize]) as f32 |
197 | | } else { |
198 | 0 | OFF[i as usize] as f32 |
199 | | }; |
200 | 0 | } |
201 | 0 | if (ux >> 31) == 0 { |
202 | 0 | return (0.0 * SGN[(uy >> 31) as usize]) as f32; |
203 | 0 | } |
204 | 0 | } |
205 | 0 | let gt = (ay > ax) as usize; |
206 | 0 | let i = (uy >> 31) |
207 | 0 | .wrapping_mul(4) |
208 | 0 | .wrapping_add((ux >> 31).wrapping_mul(2)) |
209 | 0 | .wrapping_add(gt as u32); |
210 | | |
211 | 0 | let zx = x as f64; |
212 | 0 | let zy = y as f64; |
213 | 0 | let mut z = f_fmla(M[gt], zx, M[1usize.wrapping_sub(gt)] * zy) |
214 | 0 | / f_fmla(M[gt], zy, M[1usize.wrapping_sub(gt)] * zx); |
215 | | // z = x/y if |y| > |x|, and z = y/x otherwise |
216 | | let mut r; |
217 | | |
218 | 0 | let d = ax as i32 - ay as i32; |
219 | 0 | if d < (27 << 23) && d > (-(27 << 23)) { |
220 | 0 | let z2 = z * z; |
221 | 0 | let z4 = z2 * z2; |
222 | 0 | let z8 = z4 * z4; |
223 | | /* z2 cannot underflow, since for |y|=0x1p-149 and |x|=0x1.fffffep+127 |
224 | | we get |z| > 2^-277 thus z2 > 2^-554, but z4 and z8 might underflow, |
225 | | which might give spurious underflow exceptions. */ |
226 | | |
227 | | const CN: [u64; 7] = [ |
228 | | 0x3ff0000000000000, |
229 | | 0x40040e0698f94c35, |
230 | | 0x400248c5da347f0d, |
231 | | 0x3fed873386572976, |
232 | | 0x3fc46fa40b20f1d0, |
233 | | 0x3f833f5e041eed0f, |
234 | | 0x3f1546bbf28667c5, |
235 | | ]; |
236 | | const CD: [u64; 7] = [ |
237 | | 0x3ff0000000000000, |
238 | | 0x4006b8b143a3f6da, |
239 | | 0x4008421201d18ed5, |
240 | | 0x3ff8221d086914eb, |
241 | | 0x3fd670657e3a07ba, |
242 | | 0x3fa0f4951fd1e72d, |
243 | | 0x3f4b3874b8798286, |
244 | | ]; |
245 | | |
246 | 0 | let mut cn0 = f_fmla(z2, f64::from_bits(CN[1]), f64::from_bits(CN[0])); |
247 | 0 | let cn2 = f_fmla(z2, f64::from_bits(CN[3]), f64::from_bits(CN[2])); |
248 | 0 | let mut cn4 = f_fmla(z2, f64::from_bits(CN[5]), f64::from_bits(CN[4])); |
249 | 0 | let cn6 = f64::from_bits(CN[6]); |
250 | 0 | cn0 = f_fmla(z4, cn2, cn0); |
251 | 0 | cn4 = f_fmla(z4, cn6, cn4); |
252 | 0 | cn0 = f_fmla(z8, cn4, cn0); |
253 | 0 | let mut cd0 = f_fmla(z2, f64::from_bits(CD[1]), f64::from_bits(CD[0])); |
254 | 0 | let cd2 = f_fmla(z2, f64::from_bits(CD[3]), f64::from_bits(CD[2])); |
255 | 0 | let mut cd4 = f_fmla(z2, f64::from_bits(CD[5]), f64::from_bits(CD[4])); |
256 | 0 | let cd6 = f64::from_bits(CD[6]); |
257 | 0 | cd0 = f_fmla(z4, cd2, cd0); |
258 | 0 | cd4 = f_fmla(z4, cd6, cd4); |
259 | 0 | cd0 = f_fmla(z8, cd4, cd0); |
260 | 0 | r = cn0 / cd0; |
261 | 0 | } else { |
262 | 0 | r = 1.; |
263 | 0 | } |
264 | 0 | z *= SGN[gt]; |
265 | 0 | r = f_fmla(z, r, OFF[i as usize]); |
266 | 0 | let res = r.to_bits(); |
267 | 0 | if ((res.wrapping_add(8)) & 0xfffffff) <= 16 { |
268 | 0 | return atan2f_refine(ay, ax, y, x, zy, zx, gt, i); |
269 | 0 | } |
270 | | |
271 | 0 | r as f32 |
272 | 0 | } Unexecuted instantiation: pxfm::tangent::atan2f::f_atan2f Unexecuted instantiation: pxfm::tangent::atan2f::f_atan2f |
273 | | |
274 | | #[cfg(test)] |
275 | | mod tests { |
276 | | use super::*; |
277 | | |
278 | | #[test] |
279 | | fn f_atan2_test() { |
280 | | assert_eq!( |
281 | | f_atan2f( |
282 | | 0.000000000000000000000000000000000000017907922, |
283 | | 170141180000000000000000000000000000000. |
284 | | ), |
285 | | 0. |
286 | | ); |
287 | | assert_eq!(f_atan2f(-3590000000., -15437000.), -1.5750962); |
288 | | assert_eq!(f_atan2f(-5., 2.), -1.19029); |
289 | | } |
290 | | } |