Coverage Report

Created: 2025-11-05 08:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/tangent/atanpi.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::common::{dd_fmla, dyad_fmla, f_fmla};
30
use crate::double_double::DoubleDouble;
31
use crate::shared_eval::poly_dd_3;
32
use crate::tangent::atan::{ATAN_CIRCLE, ATAN_REDUCE};
33
34
const ONE_OVER_PI_DD: DoubleDouble = DoubleDouble::new(
35
    f64::from_bits(0xbc76b01ec5417056),
36
    f64::from_bits(0x3fd45f306dc9c883),
37
);
38
39
const ONE_OVER_3PI: f64 = f64::from_bits(0x3fbb2995e7b7b604); // approximates 1/(3pi)
40
41
0
fn atanpi_small(x: f64) -> f64 {
42
0
    if x == 0. {
43
0
        return x;
44
0
    }
45
0
    if x.abs() == f64::from_bits(0x0015cba89af1f855) {
46
0
        return if x > 0. {
47
0
            f_fmla(
48
0
                f64::from_bits(0x9a70000000000000),
49
0
                f64::from_bits(0x1a70000000000000),
50
0
                f64::from_bits(0x0006f00f7cd3a40b),
51
            )
52
        } else {
53
0
            f_fmla(
54
0
                f64::from_bits(0x1a70000000000000),
55
0
                f64::from_bits(0x1a70000000000000),
56
0
                f64::from_bits(0x8006f00f7cd3a40b),
57
            )
58
        };
59
0
    }
60
    // generic worst case
61
0
    let mut v = x.to_bits();
62
0
    if (v & 0xfffffffffffff) == 0x59af9a1194efe
63
    // +/-0x1.59af9a1194efe*2^e
64
    {
65
0
        let e = v >> 52;
66
0
        if (e & 0x7ff) > 2 {
67
0
            v = ((e - 2) << 52) | 0xb824198b94a89;
68
0
            return if x > 0. {
69
0
                dd_fmla(
70
0
                    f64::from_bits(0x9a70000000000000),
71
0
                    f64::from_bits(0x1a70000000000000),
72
0
                    f64::from_bits(v),
73
                )
74
            } else {
75
0
                dd_fmla(
76
0
                    f64::from_bits(0x1a70000000000000),
77
0
                    f64::from_bits(0x1a70000000000000),
78
0
                    f64::from_bits(v),
79
                )
80
            };
81
0
        }
82
0
    }
83
0
    let h = x * ONE_OVER_PI_DD.hi;
84
    /* Assuming h = x*ONE_OVER_PI_DD.hi - e, the correction term is
85
    e + x * ONE_OVER_PIL, but we need to scale values to avoid underflow. */
86
0
    let mut corr = dd_fmla(
87
0
        x * f64::from_bits(0x4690000000000000),
88
0
        ONE_OVER_PI_DD.hi,
89
0
        -h * f64::from_bits(0x4690000000000000),
90
    );
91
0
    corr = dd_fmla(
92
0
        x * f64::from_bits(0x4690000000000000),
93
0
        ONE_OVER_PI_DD.lo,
94
0
        corr,
95
0
    );
96
    // now return h + corr * 2^-106
97
98
0
    dyad_fmla(corr, f64::from_bits(0x3950000000000000), h)
99
0
}
100
101
/* Deal with the case where |x| is large:
102
for x > 0, atanpi(x) = 1/2 - 1/pi * 1/x + 1/(3pi) * 1/x^3 + O(1/x^5)
103
for x < 0, atanpi(x) = -1/2 - 1/pi * 1/x + 1/(3pi) * 1/x^3 + O(1/x^5).
104
The next term 1/5*x^5/pi is smaller than 2^-107 * atanpi(x)
105
when |x| > 0x1.bep20. */
106
0
fn atanpi_asympt(x: f64) -> f64 {
107
0
    let h = f64::copysign(0.5, x);
108
    // approximate 1/x as yh + yl
109
0
    let rcy = DoubleDouble::from_quick_recip(x);
110
0
    let mut m = DoubleDouble::quick_mult(rcy, ONE_OVER_PI_DD);
111
    // m + l ~ 1/pi * 1/x
112
0
    m.hi = -m.hi;
113
0
    m.lo = dd_fmla(ONE_OVER_3PI * rcy.hi, rcy.hi * rcy.hi, -m.lo);
114
    // m + l ~ - 1/pi * 1/x + 1/(3pi) * 1/x^3
115
0
    let vh = DoubleDouble::from_exact_add(h, m.hi);
116
0
    m.hi = vh.hi;
117
0
    m = DoubleDouble::from_exact_add(vh.lo, m.lo);
118
0
    if m.hi.abs() == f64::from_bits(0x3c80000000000000) {
119
        // this is 1/2 ulp(atan(x))
120
0
        m.hi = if m.hi * m.lo > 0. {
121
0
            f64::copysign(f64::from_bits(0x3c80000000000001), m.hi)
122
        } else {
123
0
            f64::copysign(f64::from_bits(0x3c7fffffffffffff), m.hi)
124
        };
125
0
    }
126
0
    vh.hi + m.hi
127
0
}
128
129
#[inline]
130
0
fn atanpi_tiny(x: f64) -> f64 {
131
0
    let mut dx = DoubleDouble::quick_mult_f64(ONE_OVER_PI_DD, x);
132
0
    dx.lo = dd_fmla(-ONE_OVER_3PI * x, x * x, dx.lo);
133
0
    dx.to_f64()
134
0
}
135
136
#[cold]
137
0
fn as_atanpi_refine2(x: f64, a: f64) -> f64 {
138
0
    if x.abs() > f64::from_bits(0x413be00000000000) {
139
0
        return atanpi_asympt(x);
140
0
    }
141
0
    if x.abs() < f64::from_bits(0x3e4c700000000000) {
142
0
        return atanpi_tiny(x);
143
0
    }
144
    const CH: [(u64, u64); 3] = [
145
        (0xbfd5555555555555, 0xbc75555555555555),
146
        (0x3fc999999999999a, 0xbc6999999999bcb8),
147
        (0xbfc2492492492492, 0xbc6249242093c016),
148
    ];
149
    const CL: [u64; 4] = [
150
        0x3fbc71c71c71c71c,
151
        0xbfb745d1745d1265,
152
        0x3fb3b13b115bcbc4,
153
        0xbfb1107c41ad3253,
154
    ];
155
0
    let phi = ((a.abs()) * f64::from_bits(0x40545f306dc9c883) + 256.5).to_bits();
156
0
    let i: i64 = ((phi >> (52 - 8)) & 0xff) as i64;
157
0
    let dh: DoubleDouble = if i == 128 {
158
0
        DoubleDouble::from_quick_recip(-x)
159
    } else {
160
0
        let ta = f64::copysign(f64::from_bits(ATAN_REDUCE[i as usize].0), x);
161
0
        let dzta = DoubleDouble::from_exact_mult(x, ta);
162
0
        let zmta = x - ta;
163
0
        let v = 1. + dzta.hi;
164
0
        let d = 1. - v;
165
0
        let ev = (d + dzta.hi) - ((d + v) - 1.) + dzta.lo;
166
0
        let r = 1.0 / v;
167
0
        let rl = f_fmla(-ev, r, dd_fmla(r, -v, 1.0)) * r;
168
0
        DoubleDouble::quick_mult_f64(DoubleDouble::new(rl, r), zmta)
169
    };
170
0
    let d2 = DoubleDouble::quick_mult(dh, dh);
171
0
    let h4 = d2.hi * d2.hi;
172
0
    let h3 = DoubleDouble::quick_mult(dh, d2);
173
174
0
    let fl0 = f_fmla(d2.hi, f64::from_bits(CL[1]), f64::from_bits(CL[0]));
175
0
    let fl1 = f_fmla(d2.hi, f64::from_bits(CL[3]), f64::from_bits(CL[2]));
176
177
0
    let fl = d2.hi * f_fmla(h4, fl1, fl0);
178
0
    let mut f = poly_dd_3(d2, CH, fl);
179
0
    f = DoubleDouble::quick_mult(h3, f);
180
    let (ah, mut al, mut at);
181
0
    if i == 0 {
182
0
        ah = dh.hi;
183
0
        al = f.hi;
184
0
        at = f.lo;
185
0
    } else {
186
0
        let mut df = 0.;
187
0
        if i < 128 {
188
0
            df = f64::copysign(1.0, x) * f64::from_bits(ATAN_REDUCE[i as usize].1);
189
0
        }
190
0
        let id = f64::copysign(i as f64, x);
191
0
        ah = f64::from_bits(0x3f8921fb54442d00) * id;
192
0
        al = f64::from_bits(0x3c88469898cc5180) * id;
193
0
        at = f64::from_bits(0xb97fc8f8cbb5bf80) * id;
194
0
        let v0 = DoubleDouble::add(DoubleDouble::new(at, al), DoubleDouble::new(0., df));
195
0
        let v1 = DoubleDouble::add(v0, dh);
196
0
        let v2 = DoubleDouble::add(v1, f);
197
0
        al = v2.hi;
198
0
        at = v2.lo;
199
    }
200
201
0
    let v2 = DoubleDouble::from_exact_add(ah, al);
202
0
    let v1 = DoubleDouble::from_exact_add(v2.lo, at);
203
204
0
    let z0 = DoubleDouble::quick_mult(DoubleDouble::new(v1.hi, v2.hi), ONE_OVER_PI_DD);
205
    // atanpi_end
206
0
    z0.to_f64()
207
0
}
208
209
/// Computes atan(x)/pi
210
///
211
/// Max ULP 0.5
212
0
pub fn f_atanpi(x: f64) -> f64 {
213
    const CH: [u64; 4] = [
214
        0x3ff0000000000000,
215
        0xbfd555555555552b,
216
        0x3fc9999999069c20,
217
        0xbfc248d2c8444ac6,
218
    ];
219
0
    let t = x.to_bits();
220
0
    let at: u64 = t & 0x7fff_ffff_ffff_ffff;
221
0
    let mut i = (at >> 51).wrapping_sub(2030u64);
222
0
    if at < 0x3f7b21c475e6362au64 {
223
        // |x| < 0.006624
224
0
        if at < 0x3c90000000000000u64 {
225
            // |x| < 2^-54
226
0
            return atanpi_small(x);
227
0
        }
228
0
        if x == 0. {
229
0
            return x;
230
0
        }
231
        const CH2: [u64; 4] = [
232
            0xbfd5555555555555,
233
            0x3fc99999999998c1,
234
            0xbfc249249176aec0,
235
            0x3fbc711fd121ae80,
236
        ];
237
0
        let x2 = x * x;
238
0
        let x3 = x * x2;
239
0
        let x4 = x2 * x2;
240
241
0
        let f0 = f_fmla(x2, f64::from_bits(CH2[3]), f64::from_bits(CH2[2]));
242
0
        let f1 = f_fmla(x2, f64::from_bits(CH2[1]), f64::from_bits(CH2[0]));
243
244
0
        let f = x3 * f_fmla(x4, f0, f1);
245
        // begin_atanpi
246
        /* Here x+f approximates atan(x), with absolute error bounded by
247
        0x4.8p-52*f (see atan.c). After multiplying by 1/pi this error
248
        will be bounded by 0x1.6fp-52*f. For |x| < 0x1.b21c475e6362ap-8
249
        we have |f| < 2^-16*|x|, thus the error is bounded by
250
        0x1.6fp-52*2^-16*|x| < 0x1.6fp-68. */
251
        // multiply x + f by 1/pi
252
0
        let hy = DoubleDouble::quick_mult(DoubleDouble::new(f, x), ONE_OVER_PI_DD);
253
        /* The rounding error in muldd and the approximation error between
254
        1/pi and ONE_OVER_PIH + ONE_OVER_PIL are covered by the difference
255
        between 0x4.8p-52*pi and 0x1.6fp-52, which is > 2^-61.8. */
256
0
        let mut ub = hy.hi + dd_fmla(f64::from_bits(0x3bb6f00000000000), x, hy.lo);
257
0
        let lb = hy.hi + dd_fmla(f64::from_bits(0xbbb6f00000000000), x, hy.lo);
258
0
        if ub == lb {
259
0
            return ub;
260
0
        }
261
        // end_atanpi
262
0
        ub = (f + f * f64::from_bits(0x3cd2000000000000)) + x; // atanpi_specific, original value in atan
263
0
        return as_atanpi_refine2(x, ub);
264
0
    }
265
    // now |x| >= 0x1.b21c475e6362ap-8
266
    let h;
267
    let mut a: DoubleDouble;
268
0
    if at > 0x4062ded8e34a9035u64 {
269
        // |x| > 0x1.2ded8e34a9035p+7, atanpi|x| > 0.49789
270
0
        if at >= 0x43445f306dc9c883u64 {
271
            // |x| >= 0x1.45f306dc9c883p+53, atanpi|x| > 0.5 - 0x1p-55
272
0
            if at >= (0x7ffu64 << 52) {
273
                // case Inf or NaN
274
0
                if at == 0x7ffu64 << 52 {
275
                    // Inf
276
0
                    return f64::copysign(0.5, x);
277
0
                } // atanpi_specific
278
0
                return x + x; // NaN
279
0
            }
280
0
            return f64::copysign(0.5, x) - f64::copysign(f64::from_bits(0x3c70000000000000), x);
281
0
        }
282
0
        h = -1.0 / x;
283
0
        a = DoubleDouble::new(
284
0
            f64::copysign(f64::from_bits(0x3c91a62633145c07), x),
285
0
            f64::copysign(f64::from_bits(0x3ff921fb54442d18), x),
286
0
        );
287
    } else {
288
        // 0x1.b21c475e6362ap-8 <= |x| <= 0x1.2ded8e34a9035p+7
289
        /* we need to deal with |x| = 1 separately since in this case
290
        h=0 below, and the error is measured in terms of multiple of h */
291
0
        if at == 0x3ff0000000000000 {
292
            // |x| = 1
293
0
            return f64::copysign(f64::from_bits(0x3fd0000000000000), x);
294
0
        }
295
0
        let u: u64 = t & 0x0007ffffffffffff;
296
0
        let ut = u >> (51 - 16);
297
0
        let ut2 = (ut * ut) >> 16;
298
0
        let vc = ATAN_CIRCLE[i as usize];
299
0
        i = (((vc[0] as u64).wrapping_shl(16)) + ut * (vc[1] as u64) - ut2 * (vc[2] as u64))
300
0
            >> (16 + 9);
301
0
        let va = ATAN_REDUCE[i as usize];
302
0
        let ta = f64::copysign(1.0, x) * f64::from_bits(va.0);
303
0
        let id = f64::copysign(1.0, x) * i as f64;
304
0
        h = (x - ta) / (1. + x * ta);
305
0
        a = DoubleDouble::new(
306
0
            f64::copysign(1.0, x) * f64::from_bits(va.1) + f64::from_bits(0x3c88469898cc5170) * id,
307
0
            f64::from_bits(0x3f8921fb54442d00) * id,
308
0
        );
309
    }
310
0
    let h2 = h * h;
311
0
    let h4 = h2 * h2;
312
313
0
    let f0 = f_fmla(h2, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
314
0
    let f1 = f_fmla(h2, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
315
316
0
    let f = f_fmla(h4, f0, f1);
317
0
    a.lo = dd_fmla(h, f, a.lo);
318
    // begin_atanpi
319
    /* Now ah + al approximates atan(x) with error bounded by 0x3.fp-52*h
320
    (see atan.c), thus by 0x1.41p-52*h after multiplication by 1/pi.
321
    We normalize ah+al so that the rounding error in muldd is negligible
322
    below. */
323
0
    let e0 = h * f64::from_bits(0x3ccf800000000000); // original value in atan.c
324
0
    let ub0 = (a.lo + e0) + a.hi; // original value in atan.c
325
0
    a = DoubleDouble::from_exact_add(a.hi, a.lo);
326
0
    a = DoubleDouble::quick_mult(a, ONE_OVER_PI_DD);
327
    /* The rounding error in muldd() and the approximation error between 1/pi
328
    and ONE_OVER_PIH+ONE_OVER_PIL are absorbed when rounding up 0x3.fp-52*pi
329
    to 0x1.41p-52. */
330
0
    let e = h * f64::from_bits(0x3cb4100000000000); // atanpi_specific
331
    // end_atanpi
332
0
    let ub = (a.lo + e) + a.hi;
333
0
    let lb = (a.lo - e) + a.hi;
334
0
    if ub == lb {
335
0
        return ub;
336
0
    }
337
0
    as_atanpi_refine2(x, ub0)
338
0
}
339
340
#[cfg(test)]
341
mod tests {
342
343
    use super::*;
344
345
    #[test]
346
    fn atanpi_test() {
347
        assert_eq!(f_atanpi(119895888825197.97), 0.49999999999999734);
348
        assert_eq!(f_atanpi(1.5610674235815122E-307), 4.969031939254545e-308);
349
        assert_eq!(f_atanpi(0.00004577636903266291), 0.000014571070806516354);
350
        assert_eq!(f_atanpi(-0.00004577636903266291), -0.000014571070806516354);
351
        assert_eq!(f_atanpi(-0.4577636903266291), -0.13664770469904508);
352
        assert_eq!(f_atanpi(0.9876636903266291), 0.24802445507819193);
353
    }
354
}