Coverage Report

Created: 2025-11-24 07:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/tangent/atan2f.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::common::f_fmla;
30
use crate::double_double::DoubleDouble;
31
use crate::shared_eval::poly_dekker_generic;
32
33
static ATAN2F_TABLE: [(u64, u64); 32] = [
34
    (0x3ff0000000000000, 0xba88c1dac5492248),
35
    (0xbfd5555555555555, 0xbc755553bf3a2abe),
36
    (0x3fc999999999999a, 0xbc699deed1ec9071),
37
    (0xbfc2492492492492, 0xbc5fd99c8d18269a),
38
    (0x3fbc71c71c71c717, 0xbc2651eee4c4d9d0),
39
    (0xbfb745d1745d1649, 0xbc5632683d6c44a6),
40
    (0x3fb3b13b13b11c63, 0x3c5bf69c1f8af41d),
41
    (0xbfb11111110e6338, 0x3c23c3e431e8bb68),
42
    (0x3fae1e1e1dc45c4a, 0xbc4be2db05c77bbf),
43
    (0xbfaaf286b8164b4f, 0x3c2a4673491f0942),
44
    (0x3fa86185e9ad4846, 0x3c4e12e32d79fcee),
45
    (0xbfa642c6d5161fae, 0x3c43ce76c1ca03f0),
46
    (0x3fa47ad6f277e5bf, 0xbc3abd8d85bdb714),
47
    (0xbfa2f64a2ee8896d, 0x3c2ef87d4b615323),
48
    (0x3fa1a6a2b31741b5, 0x3c1a5d9d973547ee),
49
    (0xbfa07fbdad65e0a6, 0xbc265ac07f5d35f4),
50
    (0x3f9ee9932a9a5f8b, 0x3c2f8b9623f6f55a),
51
    (0xbf9ce8b5b9584dc6, 0x3c2fe5af96e8ea2d),
52
    (0x3f9ac9cb288087b7, 0xbc3450cdfceaf5ca),
53
    (0xbf984b025351f3e6, 0x3c2579561b0d73da),
54
    (0x3f952f5b8ecdd52b, 0x3c3036bd2c6fba47),
55
    (0xbf9163a8c44909dc, 0x3c318f735ffb9f16),
56
    (0x3f8a400dce3eea6f, 0xbc2c90569c0c1b5c),
57
    (0xbf81caa78ae6db3a, 0xbc24c60f8161ea09),
58
    (0x3f752672453c0731, 0x3c1834efb598c338),
59
    (0xbf65850c5be137cf, 0xbc0445fc150ca7f5),
60
    (0x3f523eb98d22e1ca, 0xbbf388fbaf1d7830),
61
    (0xbf38f4e974a40741, 0x3bd271198a97da34),
62
    (0x3f1a5cf2e9cf76e5, 0xbbb887eb4a63b665),
63
    (0xbef420c270719e32, 0x3b8efd595b27888b),
64
    (0x3ec3ba2d69b51677, 0xbb64fb06829cdfc7),
65
    (0xbe829b7e6f676385, 0xbb2a783b6de718fb),
66
];
67
68
#[cold]
69
0
fn atan2f_tiny(y: f32, x: f32) -> f32 {
70
0
    let dy = y as f64;
71
0
    let dx = x as f64;
72
0
    let z = dy / dx;
73
0
    let mut e = f_fmla(-z, x as f64, y as f64);
74
    /* z * x + e = y thus y/x = z + e/x */
75
    const C: f64 = f64::from_bits(0xbfd5555555555555); /* -1/3 rounded to nearest */
76
0
    let zz = z * z;
77
0
    let cz = C * z;
78
0
    e = e / x as f64 + cz * zz;
79
0
    let mut t = z.to_bits();
80
0
    if (t & 0xfffffffu64) == 0 {
81
        /* boundary case */
82
        /* If z and e are of same sign (resp. of different signs), we increase
83
        (resp. decrease) the significand of t by 1 to avoid a double-rounding
84
        issue when rounding t.f to binary32. */
85
0
        if z * e > 0. {
86
0
            t = t.wrapping_add(1);
87
0
        } else {
88
0
            t = t.wrapping_sub(1);
89
0
        }
90
0
    }
91
0
    f64::from_bits(t) as f32
92
0
}
93
94
#[allow(clippy::too_many_arguments)]
95
#[cold]
96
0
fn atan2f_refine(ay: u32, ax: u32, y: f32, x: f32, zy: f64, zx: f64, gt: usize, i: u32) -> f32 {
97
    const PI: f64 = f64::from_bits(0x400921fb54442d18);
98
    const PI2: f64 = f64::from_bits(0x3ff921fb54442d18);
99
    const PI2L: f64 = f64::from_bits(0x3c91a62633145c07);
100
    static OFF: [f64; 8] = [0.0, PI2, PI, PI2, -0.0, -PI2, -PI, -PI2];
101
    static OFFL: [f64; 8] = [0.0, PI2L, 2. * PI2L, PI2L, -0.0, -PI2L, -2. * PI2L, -PI2L];
102
    static SGN: [f64; 2] = [1., -1.];
103
    /* check tiny y/x */
104
0
    if ay < ax && ((ax - ay) >> 23 >= 25) {
105
0
        return atan2f_tiny(y, x);
106
0
    }
107
    let mut zh;
108
    let mut zl;
109
0
    if gt == 0 {
110
0
        zh = zy / zx;
111
0
        zl = f_fmla(zh, -zx, zy) / zx;
112
0
    } else {
113
0
        zh = zx / zy;
114
0
        zl = f_fmla(zh, -zy, zx) / zy;
115
0
    }
116
0
    let z2 = DoubleDouble::quick_mult(DoubleDouble::new(zl, zh), DoubleDouble::new(zl, zh));
117
0
    let mut p = poly_dekker_generic(z2, ATAN2F_TABLE);
118
0
    zh *= SGN[gt];
119
0
    zl *= SGN[gt];
120
0
    p = DoubleDouble::quick_mult(DoubleDouble::new(zl, zh), p);
121
0
    let sh = p.hi + OFF[i as usize];
122
0
    let sl = ((OFF[i as usize] - sh) + p.hi) + p.lo + OFFL[i as usize];
123
0
    let rf = sh as f32;
124
0
    let th = rf as f64;
125
0
    let dh = sh - th;
126
0
    let mut tm: f64 = dh + sl;
127
0
    let mut tth = th.to_bits();
128
0
    if th + th * f64::from_bits(0x3c30000000000000) == th - th * f64::from_bits(0x3c30000000000000)
129
    {
130
0
        tth &= 0x7ffu64 << 52;
131
0
        tth = tth.wrapping_sub(24 << 52);
132
0
        if tm.abs() > f64::from_bits(tth) {
133
0
            tm *= 1.25;
134
0
        } else {
135
0
            tm *= 0.75;
136
0
        }
137
0
    }
138
0
    let r = th + tm;
139
0
    r as f32
140
0
}
141
142
/// Computes atan2
143
///
144
/// Max found ULP 0.49999842
145
#[inline]
146
0
pub fn f_atan2f(y: f32, x: f32) -> f32 {
147
    const M: [f64; 2] = [0., 1.];
148
    const PI: f64 = f64::from_bits(0x400921fb54442d18);
149
    const PI2: f64 = f64::from_bits(0x3ff921fb54442d18);
150
    const PI2L: f64 = f64::from_bits(0x3c91a62633145c07);
151
    static OFF: [f64; 8] = [0.0, PI2, PI, PI2, -0.0, -PI2, -PI, -PI2];
152
    static OFFL: [f64; 8] = [0.0, PI2L, 2. * PI2L, PI2L, -0.0, -PI2L, -2. * PI2L, -PI2L];
153
    static SGN: [f64; 2] = [1., -1.];
154
0
    let tx = x.to_bits();
155
0
    let ty = y.to_bits();
156
0
    let ux = tx;
157
0
    let uy = ty;
158
0
    let ax = ux & 0x7fffffff;
159
0
    let ay = uy & 0x7fffffff;
160
0
    if ay >= (0xff << 23) || ax >= (0xff << 23) {
161
        // x or y is nan or inf
162
        /* we use x+y below so that the invalid exception is set
163
        for (x,y) = (qnan,snan) or (snan,qnan) */
164
0
        if ay > (0xff << 23) {
165
0
            return x + y;
166
0
        } // case y nan
167
0
        if ax > (0xff << 23) {
168
0
            return x + y;
169
0
        } // case x nan
170
0
        let yinf = ay == (0xff << 23);
171
0
        let xinf = ax == (0xff << 23);
172
0
        if yinf & xinf {
173
0
            return if (ux >> 31) != 0 {
174
0
                (f64::from_bits(0x4002d97c7f3321d2) * SGN[(uy >> 31) as usize]) as f32 // +/-3pi/4
175
            } else {
176
0
                (f64::from_bits(0x3fe921fb54442d18) * SGN[(uy >> 31) as usize]) as f32 // +/-pi/4
177
            };
178
0
        }
179
0
        if xinf {
180
0
            return if (ux >> 31) != 0 {
181
0
                (PI * SGN[(uy >> 31) as usize]) as f32
182
            } else {
183
0
                (0.0 * SGN[(uy >> 31) as usize]) as f32
184
            };
185
0
        }
186
0
        if yinf {
187
0
            return (PI2 * SGN[(uy >> 31) as usize]) as f32;
188
0
        }
189
0
    }
190
0
    if ay == 0 {
191
0
        if ax == 0 {
192
0
            let i = (uy >> 31)
193
0
                .wrapping_mul(4)
194
0
                .wrapping_add((ux >> 31).wrapping_mul(2));
195
0
            return if (ux >> 31) != 0 {
196
0
                (OFF[i as usize] + OFFL[i as usize]) as f32
197
            } else {
198
0
                OFF[i as usize] as f32
199
            };
200
0
        }
201
0
        if (ux >> 31) == 0 {
202
0
            return (0.0 * SGN[(uy >> 31) as usize]) as f32;
203
0
        }
204
0
    }
205
0
    let gt = (ay > ax) as usize;
206
0
    let i = (uy >> 31)
207
0
        .wrapping_mul(4)
208
0
        .wrapping_add((ux >> 31).wrapping_mul(2))
209
0
        .wrapping_add(gt as u32);
210
211
0
    let zx = x as f64;
212
0
    let zy = y as f64;
213
0
    let mut z = f_fmla(M[gt], zx, M[1usize.wrapping_sub(gt)] * zy)
214
0
        / f_fmla(M[gt], zy, M[1usize.wrapping_sub(gt)] * zx);
215
    // z = x/y if |y| > |x|, and z = y/x otherwise
216
    let mut r;
217
218
0
    let d = ax as i32 - ay as i32;
219
0
    if d < (27 << 23) && d > (-(27 << 23)) {
220
0
        let z2 = z * z;
221
0
        let z4 = z2 * z2;
222
0
        let z8 = z4 * z4;
223
        /* z2 cannot underflow, since for |y|=0x1p-149 and |x|=0x1.fffffep+127
224
        we get |z| > 2^-277 thus z2 > 2^-554, but z4 and z8 might underflow,
225
        which might give spurious underflow exceptions. */
226
227
        const CN: [u64; 7] = [
228
            0x3ff0000000000000,
229
            0x40040e0698f94c35,
230
            0x400248c5da347f0d,
231
            0x3fed873386572976,
232
            0x3fc46fa40b20f1d0,
233
            0x3f833f5e041eed0f,
234
            0x3f1546bbf28667c5,
235
        ];
236
        const CD: [u64; 7] = [
237
            0x3ff0000000000000,
238
            0x4006b8b143a3f6da,
239
            0x4008421201d18ed5,
240
            0x3ff8221d086914eb,
241
            0x3fd670657e3a07ba,
242
            0x3fa0f4951fd1e72d,
243
            0x3f4b3874b8798286,
244
        ];
245
246
0
        let mut cn0 = f_fmla(z2, f64::from_bits(CN[1]), f64::from_bits(CN[0]));
247
0
        let cn2 = f_fmla(z2, f64::from_bits(CN[3]), f64::from_bits(CN[2]));
248
0
        let mut cn4 = f_fmla(z2, f64::from_bits(CN[5]), f64::from_bits(CN[4]));
249
0
        let cn6 = f64::from_bits(CN[6]);
250
0
        cn0 = f_fmla(z4, cn2, cn0);
251
0
        cn4 = f_fmla(z4, cn6, cn4);
252
0
        cn0 = f_fmla(z8, cn4, cn0);
253
0
        let mut cd0 = f_fmla(z2, f64::from_bits(CD[1]), f64::from_bits(CD[0]));
254
0
        let cd2 = f_fmla(z2, f64::from_bits(CD[3]), f64::from_bits(CD[2]));
255
0
        let mut cd4 = f_fmla(z2, f64::from_bits(CD[5]), f64::from_bits(CD[4]));
256
0
        let cd6 = f64::from_bits(CD[6]);
257
0
        cd0 = f_fmla(z4, cd2, cd0);
258
0
        cd4 = f_fmla(z4, cd6, cd4);
259
0
        cd0 = f_fmla(z8, cd4, cd0);
260
0
        r = cn0 / cd0;
261
0
    } else {
262
0
        r = 1.;
263
0
    }
264
0
    z *= SGN[gt];
265
0
    r = f_fmla(z, r, OFF[i as usize]);
266
0
    let res = r.to_bits();
267
0
    if ((res.wrapping_add(8)) & 0xfffffff) <= 16 {
268
0
        return atan2f_refine(ay, ax, y, x, zy, zx, gt, i);
269
0
    }
270
271
0
    r as f32
272
0
}
Unexecuted instantiation: pxfm::tangent::atan2f::f_atan2f
Unexecuted instantiation: pxfm::tangent::atan2f::f_atan2f
273
274
#[cfg(test)]
275
mod tests {
276
    use super::*;
277
278
    #[test]
279
    fn f_atan2_test() {
280
        assert_eq!(
281
            f_atan2f(
282
                0.000000000000000000000000000000000000017907922,
283
                170141180000000000000000000000000000000.
284
            ),
285
            0.
286
        );
287
        assert_eq!(f_atan2f(-3590000000., -15437000.), -1.5750962);
288
        assert_eq!(f_atan2f(-5., 2.), -1.19029);
289
    }
290
}