Coverage Report

Created: 2026-01-10 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/tangent/atan2pi.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::acospi::{INV_PI_DD, INV_PI_F128};
30
use crate::common::f_fmla;
31
use crate::double_double::DoubleDouble;
32
use crate::dyadic_float::DyadicFloat128;
33
use crate::rounding::CpuRound;
34
use crate::tangent::atan2::{ATAN_I, atan_eval, atan2_hard};
35
36
/// If one of arguments is too huge or too small, extended precision is required for
37
/// case with big exponent difference
38
#[cold]
39
0
fn atan2pi_big_exp_difference_hard(
40
0
    num: f64,
41
0
    den: f64,
42
0
    x_sign: usize,
43
0
    y_sign: usize,
44
0
    recip: bool,
45
0
    final_sign: f64,
46
0
) -> f64 {
47
    const ZERO: DoubleDouble = DoubleDouble::new(0.0, 0.0);
48
    const MZERO: DoubleDouble = DoubleDouble::new(-0.0, -0.0);
49
    static CONST_ADJ_INV_PI: [[[DoubleDouble; 2]; 2]; 2] = [
50
        [
51
            [ZERO, DoubleDouble::new(0., -1. / 2.)],
52
            [MZERO, DoubleDouble::new(0., -1. / 2.)],
53
        ],
54
        [
55
            [DoubleDouble::new(0., -1.), DoubleDouble::new(0., 1. / 2.)],
56
            [DoubleDouble::new(0., -1.), DoubleDouble::new(0., 1. / 2.)],
57
        ],
58
    ];
59
0
    let const_term = CONST_ADJ_INV_PI[x_sign][y_sign][recip as usize];
60
0
    let scaled_div = DyadicFloat128::from_div_f64(num, den) * INV_PI_F128;
61
0
    let sign_f128 = DyadicFloat128::new_from_f64(final_sign);
62
0
    let p = DyadicFloat128::new_from_f64(const_term.hi * final_sign);
63
0
    let p1 = sign_f128 * (DyadicFloat128::new_from_f64(const_term.lo) + scaled_div);
64
0
    let r = p + p1;
65
0
    r.fast_as_f64()
66
0
}
67
68
static IS_NEG: [f64; 2] = [1.0, -1.0];
69
const ZERO: DoubleDouble = DoubleDouble::new(0.0, 0.0);
70
const MZERO: DoubleDouble = DoubleDouble::new(-0.0, -0.0);
71
const PI: DoubleDouble = DoubleDouble::new(
72
    f64::from_bits(0x3ca1a62633145c07),
73
    f64::from_bits(0x400921fb54442d18),
74
);
75
const MPI: DoubleDouble = DoubleDouble::new(
76
    f64::from_bits(0xbca1a62633145c07),
77
    f64::from_bits(0xc00921fb54442d18),
78
);
79
const PI_OVER_2: DoubleDouble = DoubleDouble::new(
80
    f64::from_bits(0x3c91a62633145c07),
81
    f64::from_bits(0x3ff921fb54442d18),
82
);
83
const MPI_OVER_2: DoubleDouble = DoubleDouble::new(
84
    f64::from_bits(0xbc91a62633145c07),
85
    f64::from_bits(0xbff921fb54442d18),
86
);
87
const PI_OVER_4: DoubleDouble = DoubleDouble::new(
88
    f64::from_bits(0x3c81a62633145c07),
89
    f64::from_bits(0x3fe921fb54442d18),
90
);
91
const THREE_PI_OVER_4: DoubleDouble = DoubleDouble::new(
92
    f64::from_bits(0x3c9a79394c9e8a0a),
93
    f64::from_bits(0x4002d97c7f3321d2),
94
);
95
96
// Adjustment for constant term:
97
//   CONST_ADJ[x_sign][y_sign][recip]
98
static CONST_ADJ: [[[DoubleDouble; 2]; 2]; 2] = [
99
    [[ZERO, MPI_OVER_2], [MZERO, MPI_OVER_2]],
100
    [[MPI, PI_OVER_2], [MPI, PI_OVER_2]],
101
];
102
103
#[inline(always)]
104
0
fn atan2pi_gen_impl(y: f64, x: f64) -> f64 {
105
0
    let x_sign = x.is_sign_negative() as usize;
106
0
    let y_sign = y.is_sign_negative() as usize;
107
0
    let x_bits = x.to_bits() & 0x7fff_ffff_ffff_ffff;
108
0
    let y_bits = y.to_bits() & 0x7fff_ffff_ffff_ffff;
109
0
    let x_abs = x_bits;
110
0
    let y_abs = y_bits;
111
0
    let recip = x_abs < y_abs;
112
0
    let mut min_abs = if recip { x_abs } else { y_abs };
113
0
    let mut max_abs = if !recip { x_abs } else { y_abs };
114
0
    let mut min_exp = min_abs.wrapping_shr(52);
115
0
    let mut max_exp = max_abs.wrapping_shr(52);
116
117
0
    let mut num = f64::from_bits(min_abs);
118
0
    let mut den = f64::from_bits(max_abs);
119
120
    // Check for exceptional cases, whether inputs are 0, inf, nan, or close to
121
    // overflow, or close to underflow.
122
0
    if max_exp > 0x7ffu64 - 128u64 || min_exp < 128u64 {
123
0
        if x.is_nan() || y.is_nan() {
124
0
            return f64::NAN;
125
0
        }
126
0
        let x_except = if x == 0.0 {
127
0
            0
128
0
        } else if x.is_infinite() {
129
0
            2
130
        } else {
131
0
            1
132
        };
133
0
        let y_except = if y == 0.0 {
134
0
            0
135
0
        } else if y.is_infinite() {
136
0
            2
137
        } else {
138
0
            1
139
        };
140
141
        // Exceptional cases:
142
        //   EXCEPT[y_except][x_except][x_is_neg]
143
        // with x_except & y_except:
144
        //   0: zero
145
        //   1: finite, non-zero
146
        //   2: infinity
147
        static EXCEPTS: [[[DoubleDouble; 2]; 3]; 3] = [
148
            [[ZERO, PI], [ZERO, PI], [ZERO, PI]],
149
            [[PI_OVER_2, PI_OVER_2], [ZERO, ZERO], [ZERO, PI]],
150
            [
151
                [PI_OVER_2, PI_OVER_2],
152
                [PI_OVER_2, PI_OVER_2],
153
                [PI_OVER_4, THREE_PI_OVER_4],
154
            ],
155
        ];
156
157
0
        if (x_except != 1) || (y_except != 1) {
158
0
            let mut r = EXCEPTS[y_except][x_except][x_sign];
159
0
            r = DoubleDouble::quick_mult(r, INV_PI_DD);
160
0
            return f_fmla(IS_NEG[y_sign], r.hi, IS_NEG[y_sign] * r.lo);
161
0
        }
162
0
        let scale_up = min_exp < 128u64;
163
0
        let scale_down = max_exp > 0x7ffu64 - 128u64;
164
        // At least one input is denormal, multiply both numerator and denominator
165
        // by some large enough power of 2 to normalize denormal inputs.
166
0
        if scale_up {
167
0
            num *= f64::from_bits(0x43f0000000000000);
168
0
            if !scale_down {
169
0
                den *= f64::from_bits(0x43f0000000000000);
170
0
            }
171
0
        } else if scale_down {
172
0
            den *= f64::from_bits(0x3bf0000000000000);
173
0
            if !scale_up {
174
0
                num *= f64::from_bits(0x3bf0000000000000);
175
0
            }
176
0
        }
177
178
0
        min_abs = num.to_bits();
179
0
        max_abs = den.to_bits();
180
0
        min_exp = min_abs.wrapping_shr(52);
181
0
        max_exp = max_abs.wrapping_shr(52);
182
0
    }
183
184
0
    let final_sign = IS_NEG[((x_sign != y_sign) != recip) as usize];
185
0
    let const_term = CONST_ADJ[x_sign][y_sign][recip as usize];
186
0
    let exp_diff = max_exp - min_exp;
187
    // We have the following bound for normalized n and d:
188
    //   2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1).
189
0
    if exp_diff > 54 {
190
0
        if max_exp >= 1075 || min_exp < 970 {
191
0
            return atan2pi_big_exp_difference_hard(num, den, x_sign, y_sign, recip, final_sign);
192
0
        }
193
0
        let z = DoubleDouble::from_exact_mult(final_sign, const_term.hi);
194
0
        let mut divided = DoubleDouble::from_exact_div(num, den);
195
0
        divided = DoubleDouble::f64_add(const_term.lo, divided);
196
0
        divided = DoubleDouble::quick_mult_f64(divided, final_sign);
197
0
        let r = DoubleDouble::add(z, divided);
198
0
        let p = DoubleDouble::quick_mult(INV_PI_DD, r);
199
0
        return p.to_f64();
200
0
    }
201
202
0
    let mut k = (64.0 * num / den).cpu_round();
203
0
    let idx = k as u64;
204
    // k = idx / 64
205
0
    k *= f64::from_bits(0x3f90000000000000);
206
207
    // Range reduction:
208
    // atan(n/d) - atan(k/64) = atan((n/d - k/64) / (1 + (n/d) * (k/64)))
209
    //                        = atan((n - d * k/64)) / (d + n * k/64))
210
0
    let num_k = DoubleDouble::from_exact_mult(num, k);
211
0
    let den_k = DoubleDouble::from_exact_mult(den, k);
212
213
    // num_dd = n - d * k
214
0
    let num_dd = DoubleDouble::from_exact_add(num - den_k.hi, -den_k.lo);
215
    // den_dd = d + n * k
216
0
    let mut den_dd = DoubleDouble::from_exact_add(den, num_k.hi);
217
0
    den_dd.lo += num_k.lo;
218
219
    // q = (n - d * k) / (d + n * k)
220
0
    let q = DoubleDouble::div(num_dd, den_dd);
221
    // p ~ atan(q)
222
0
    let p = atan_eval(q);
223
224
0
    let vl = ATAN_I[idx as usize];
225
0
    let vlo = DoubleDouble::from_bit_pair(vl);
226
0
    let mut r = DoubleDouble::add(const_term, DoubleDouble::add(vlo, p));
227
228
0
    r = DoubleDouble::quick_mult(r, INV_PI_DD);
229
230
0
    let err = f_fmla(
231
0
        p.hi,
232
0
        f64::from_bits(0x3bd0000000000000),
233
0
        f64::from_bits(0x3c00000000000000),
234
    );
235
236
0
    let ub = r.hi + (r.lo + err);
237
0
    let lb = r.hi + (r.lo - err);
238
239
0
    if ub == lb {
240
0
        r.hi *= final_sign;
241
0
        r.lo *= final_sign;
242
243
0
        return r.to_f64();
244
0
    }
245
246
0
    (atan2_hard(y, x) * INV_PI_F128).fast_as_f64()
247
0
}
248
249
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
250
#[target_feature(enable = "avx", enable = "fma")]
251
0
unsafe fn atan2pi_fma_impl(y: f64, x: f64) -> f64 {
252
0
    let x_sign = x.is_sign_negative() as usize;
253
0
    let y_sign = y.is_sign_negative() as usize;
254
0
    let x_bits = x.to_bits() & 0x7fff_ffff_ffff_ffff;
255
0
    let y_bits = y.to_bits() & 0x7fff_ffff_ffff_ffff;
256
0
    let x_abs = x_bits;
257
0
    let y_abs = y_bits;
258
0
    let recip = x_abs < y_abs;
259
0
    let mut min_abs = if recip { x_abs } else { y_abs };
260
0
    let mut max_abs = if !recip { x_abs } else { y_abs };
261
0
    let mut min_exp = min_abs.wrapping_shr(52);
262
0
    let mut max_exp = max_abs.wrapping_shr(52);
263
264
0
    let mut num = f64::from_bits(min_abs);
265
0
    let mut den = f64::from_bits(max_abs);
266
267
    // Check for exceptional cases, whether inputs are 0, inf, nan, or close to
268
    // overflow, or close to underflow.
269
0
    if max_exp > 0x7ffu64 - 128u64 || min_exp < 128u64 {
270
0
        if x.is_nan() || y.is_nan() {
271
0
            return f64::NAN;
272
0
        }
273
0
        let x_except = if x == 0.0 {
274
0
            0
275
0
        } else if x.is_infinite() {
276
0
            2
277
        } else {
278
0
            1
279
        };
280
0
        let y_except = if y == 0.0 {
281
0
            0
282
0
        } else if y.is_infinite() {
283
0
            2
284
        } else {
285
0
            1
286
        };
287
288
        // Exceptional cases:
289
        //   EXCEPT[y_except][x_except][x_is_neg]
290
        // with x_except & y_except:
291
        //   0: zero
292
        //   1: finite, non-zero
293
        //   2: infinity
294
        static EXCEPTS: [[[DoubleDouble; 2]; 3]; 3] = [
295
            [[ZERO, PI], [ZERO, PI], [ZERO, PI]],
296
            [[PI_OVER_2, PI_OVER_2], [ZERO, ZERO], [ZERO, PI]],
297
            [
298
                [PI_OVER_2, PI_OVER_2],
299
                [PI_OVER_2, PI_OVER_2],
300
                [PI_OVER_4, THREE_PI_OVER_4],
301
            ],
302
        ];
303
304
0
        if (x_except != 1) || (y_except != 1) {
305
0
            let mut r = EXCEPTS[y_except][x_except][x_sign];
306
0
            r = DoubleDouble::quick_mult_fma(r, INV_PI_DD);
307
0
            return f64::mul_add(IS_NEG[y_sign], r.hi, IS_NEG[y_sign] * r.lo);
308
0
        }
309
0
        let scale_up = min_exp < 128u64;
310
0
        let scale_down = max_exp > 0x7ffu64 - 128u64;
311
        // At least one input is denormal, multiply both numerator and denominator
312
        // by some large enough power of 2 to normalize denormal inputs.
313
0
        if scale_up {
314
0
            num *= f64::from_bits(0x43f0000000000000);
315
0
            if !scale_down {
316
0
                den *= f64::from_bits(0x43f0000000000000);
317
0
            }
318
0
        } else if scale_down {
319
0
            den *= f64::from_bits(0x3bf0000000000000);
320
0
            if !scale_up {
321
0
                num *= f64::from_bits(0x3bf0000000000000);
322
0
            }
323
0
        }
324
325
0
        min_abs = num.to_bits();
326
0
        max_abs = den.to_bits();
327
0
        min_exp = min_abs.wrapping_shr(52);
328
0
        max_exp = max_abs.wrapping_shr(52);
329
0
    }
330
331
0
    let final_sign = IS_NEG[((x_sign != y_sign) != recip) as usize];
332
0
    let const_term = CONST_ADJ[x_sign][y_sign][recip as usize];
333
0
    let exp_diff = max_exp - min_exp;
334
    // We have the following bound for normalized n and d:
335
    //   2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1).
336
0
    if exp_diff > 54 {
337
0
        if max_exp >= 1075 || min_exp < 970 {
338
0
            return atan2pi_big_exp_difference_hard(num, den, x_sign, y_sign, recip, final_sign);
339
0
        }
340
0
        let z = DoubleDouble::from_exact_mult_fma(final_sign, const_term.hi);
341
0
        let mut divided = DoubleDouble::from_exact_div_fma(num, den);
342
0
        divided = DoubleDouble::f64_add(const_term.lo, divided);
343
0
        divided = DoubleDouble::quick_mult_f64_fma(divided, final_sign);
344
0
        let r = DoubleDouble::add(z, divided);
345
0
        let p = DoubleDouble::quick_mult_fma(INV_PI_DD, r);
346
0
        return p.to_f64();
347
0
    }
348
349
0
    let mut k = (64.0 * num / den).round();
350
0
    let idx = k as u64;
351
    // k = idx / 64
352
0
    k *= f64::from_bits(0x3f90000000000000);
353
354
    // Range reduction:
355
    // atan(n/d) - atan(k/64) = atan((n/d - k/64) / (1 + (n/d) * (k/64)))
356
    //                        = atan((n - d * k/64)) / (d + n * k/64))
357
0
    let num_k = DoubleDouble::from_exact_mult_fma(num, k);
358
0
    let den_k = DoubleDouble::from_exact_mult_fma(den, k);
359
360
    // num_dd = n - d * k
361
0
    let num_dd = DoubleDouble::from_exact_add(num - den_k.hi, -den_k.lo);
362
    // den_dd = d + n * k
363
0
    let mut den_dd = DoubleDouble::from_exact_add(den, num_k.hi);
364
0
    den_dd.lo += num_k.lo;
365
366
    // q = (n - d * k) / (d + n * k)
367
0
    let q = DoubleDouble::div_fma(num_dd, den_dd);
368
    // p ~ atan(q)
369
    use crate::tangent::atan2::atan_eval_fma;
370
0
    let p = atan_eval_fma(q);
371
372
0
    let vl = ATAN_I[idx as usize];
373
0
    let vlo = DoubleDouble::from_bit_pair(vl);
374
0
    let mut r = DoubleDouble::add(const_term, DoubleDouble::add(vlo, p));
375
376
0
    r = DoubleDouble::quick_mult_fma(r, INV_PI_DD);
377
378
0
    let err = f64::mul_add(
379
0
        p.hi,
380
0
        f64::from_bits(0x3bd0000000000000),
381
0
        f64::from_bits(0x3c00000000000000),
382
    );
383
384
0
    let ub = r.hi + (r.lo + err);
385
0
    let lb = r.hi + (r.lo - err);
386
387
0
    if ub == lb {
388
0
        r.hi *= final_sign;
389
0
        r.lo *= final_sign;
390
391
0
        return r.to_f64();
392
0
    }
393
394
0
    (atan2_hard(y, x) * INV_PI_F128).fast_as_f64()
395
0
}
396
397
/// Computes atan(x)/PI
398
///
399
/// Max found ULP 0.5
400
0
pub fn f_atan2pi(y: f64, x: f64) -> f64 {
401
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
402
    {
403
        atan2pi_gen_impl(y, x)
404
    }
405
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
406
    {
407
        use std::sync::OnceLock;
408
        static EXECUTOR: OnceLock<unsafe fn(f64, f64) -> f64> = OnceLock::new();
409
0
        let q = EXECUTOR.get_or_init(|| {
410
0
            if std::arch::is_x86_feature_detected!("avx")
411
0
                && std::arch::is_x86_feature_detected!("fma")
412
            {
413
0
                atan2pi_fma_impl
414
            } else {
415
0
                fn def_atan2pi(y: f64, x: f64) -> f64 {
416
0
                    atan2pi_gen_impl(y, x)
417
0
                }
418
0
                def_atan2pi
419
            }
420
0
        });
421
0
        unsafe { q(y, x) }
422
    }
423
0
}
424
425
#[cfg(test)]
426
mod tests {
427
    use super::*;
428
429
    #[test]
430
    fn test_atan2pi() {
431
        assert_eq!(
432
            f_atan2pi(-0.000000000000010659658919444194, 2088960.4374061823),
433
            -0.0000000000000000000016242886924270424
434
        );
435
        assert_eq!(f_atan2pi(-3.9999999981625933, 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003577142133480227), -0.5);
436
        assert_eq!(f_atan2pi(0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000472842255026406,
437
            0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008045886150098693
438
        ),1.8706499392673612e-162);
439
        assert_eq!(f_atan2pi(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002670088630208647,
440
                             2.0000019071157054
441
        ), 4.249573987697093e-307);
442
        assert_eq!(f_atan2pi(-5., 2.), -0.3788810584091566);
443
        assert_eq!(f_atan2pi(2., -5.), 0.8788810584091566);
444
    }
445
}