/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/tangent/atanf.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::{f_fmla, f_fmlaf}; |
30 | | |
31 | | #[inline(always)] |
32 | 0 | fn atanf_gen_impl<Q: Fn(f64, f64, f64) -> f64>(x: f32, fma: Q) -> f32 { |
33 | | const PI2: f64 = f64::from_bits(0x3ff921fb54442d18); |
34 | 0 | let t = x.to_bits(); |
35 | 0 | let e = (t >> 23) & 0xff; |
36 | 0 | let gt = e >= 127; |
37 | 0 | let ta = t & 0x7fffffff; |
38 | 0 | if ta >= 0x4c700518u32 { |
39 | | // |x| >= 6.29198e+07 |
40 | 0 | if ta > 0x7f800000u32 { |
41 | 0 | return x + x; |
42 | 0 | } // nan |
43 | 0 | return f32::copysign(PI2 as f32, x); // inf or |x| >= 6.29198e+07 |
44 | 0 | } |
45 | 0 | if e < 127 - 13 { |
46 | | // |x| < 2^-13 |
47 | 0 | if e < 127 - 25 { |
48 | | // |x| < 2^-25 |
49 | 0 | if t << 1 == 0 { |
50 | 0 | return x; |
51 | 0 | } |
52 | 0 | let res = f_fmlaf(-x, x.abs(), x); |
53 | 0 | return res; |
54 | 0 | } |
55 | 0 | return f_fmlaf(-f64::from_bits(0x3fd5555560000000) as f32 * x, x * x, x); |
56 | 0 | } |
57 | | /* now |x| >= 0.00012207 */ |
58 | 0 | let mut z = x as f64; |
59 | 0 | if gt { |
60 | 0 | z = 1.0 / z; |
61 | 0 | } /* gt is non-zero for |x| >= 1 */ |
62 | 0 | let z2 = z * z; |
63 | 0 | let z4 = z2 * z2; |
64 | 0 | let z8 = z4 * z4; |
65 | | /* polynomials generated using rminimax |
66 | | (https://gitlab.inria.fr/sfilip/rminimax) with the following command: |
67 | | ./ratapprox --function="atan(x)" --dom=[0.000122070,1] --num=[x,x^3,x^5,x^7,x^9,x^11,x^13] --den=[1,x^2,x^4,x^6,x^8,x^10,x^12] --output=atanf.sollya --log |
68 | | (see output atanf.sollya) |
69 | | The coefficient cd[0] was slightly reduced from the original value |
70 | | 0.330005 to avoid an exceptional case for |x| = 0.069052 |
71 | | and rounding to nearest. |
72 | | */ |
73 | | const CN: [u64; 7] = [ |
74 | | 0x3fd51eccde075d67, |
75 | | 0x3fea76bb5637f2f2, |
76 | | 0x3fe81e0eed20de88, |
77 | | 0x3fd376c8ca67d11d, |
78 | | 0x3faaec7b69202ac6, |
79 | | 0x3f69561899acc73e, |
80 | | 0x3efbf9fa5b67e600, |
81 | | ]; |
82 | | const CD: [u64; 7] = [ |
83 | | 0x3fd51eccde075d66, |
84 | | 0x3fedfbdd7b392d28, |
85 | | 0x3ff0000000000000, |
86 | | 0x3fdfd22bf0e89b54, |
87 | | 0x3fbd91ff8b576282, |
88 | | 0x3f8653ea99fc9bb0, |
89 | | 0x3f31e7fcc202340a, |
90 | | ]; |
91 | 0 | let mut cn0 = fma(z2, f64::from_bits(CN[1]), f64::from_bits(CN[0])); |
92 | 0 | let cn2 = fma(z2, f64::from_bits(CN[3]), f64::from_bits(CN[2])); |
93 | 0 | let mut cn4 = fma(z2, f64::from_bits(CN[5]), f64::from_bits(CN[4])); |
94 | 0 | let cn6 = f64::from_bits(CN[6]); |
95 | 0 | cn0 = fma(z4, cn2, cn0); |
96 | 0 | cn4 = fma(z4, cn6, cn4); |
97 | 0 | cn0 = fma(z8, cn4, cn0); |
98 | 0 | cn0 *= z; |
99 | 0 | let mut cd0 = fma(z2, f64::from_bits(CD[1]), f64::from_bits(CD[0])); |
100 | 0 | let cd2 = fma(z2, f64::from_bits(CD[3]), f64::from_bits(CD[2])); |
101 | 0 | let mut cd4 = fma(z2, f64::from_bits(CD[5]), f64::from_bits(CD[4])); |
102 | 0 | let cd6 = f64::from_bits(CD[6]); |
103 | 0 | cd0 = fma(z4, cd2, cd0); |
104 | 0 | cd4 = fma(z4, cd6, cd4); |
105 | 0 | cd0 = fma(z8, cd4, cd0); |
106 | 0 | let r = cn0 / cd0; |
107 | 0 | if !gt { |
108 | 0 | return r as f32; |
109 | 0 | } /* for |x| < 1, (float) r is correctly rounded */ |
110 | | |
111 | | const PI_OVER2_H: f64 = f64::from_bits(0x3ff9000000000000); |
112 | | const PI_OVER2_L: f64 = f64::from_bits(0x3f80fdaa22168c23); |
113 | | /* now r approximates atan(1/x), we use atan(x) + atan(1/x) = sign(x)*pi/2, |
114 | | where PI_OVER2_H + PI_OVER2_L approximates pi/2. |
115 | | With sign(z)*L + (-r + sign(z)*H), it fails for x=0x1.98c252p+12 and |
116 | | rounding upward. |
117 | | With sign(z)*PI - r, where PI is a double approximation of pi to nearest, |
118 | | it fails for x=0x1.ddf9f6p+0 and rounding upward. */ |
119 | 0 | ((f64::copysign(PI_OVER2_L, z) - r) + f64::copysign(PI_OVER2_H, z)) as f32 |
120 | 0 | } Unexecuted instantiation: pxfm::tangent::atanf::atanf_gen_impl::<<f64>::mul_add> Unexecuted instantiation: pxfm::tangent::atanf::atanf_gen_impl::<pxfm::common::f_fmla> |
121 | | |
122 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
123 | | #[target_feature(enable = "avx", enable = "fma")] |
124 | 0 | unsafe fn atanf_fma_impl(x: f32) -> f32 { |
125 | 0 | atanf_gen_impl(x, f64::mul_add) |
126 | 0 | } |
127 | | |
128 | | /// Computes atan |
129 | | /// |
130 | | /// Max found ULP 0.49999973 |
131 | | #[inline] |
132 | 0 | pub fn f_atanf(x: f32) -> f32 { |
133 | | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] |
134 | | { |
135 | | atanf_gen_impl(x, f_fmla) |
136 | | } |
137 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
138 | | { |
139 | | use std::sync::OnceLock; |
140 | | static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new(); |
141 | 0 | let q = EXECUTOR.get_or_init(|| { |
142 | 0 | if std::arch::is_x86_feature_detected!("avx") |
143 | 0 | && std::arch::is_x86_feature_detected!("fma") |
144 | | { |
145 | 0 | atanf_fma_impl |
146 | | } else { |
147 | 0 | fn def_atanf(x: f32) -> f32 { |
148 | 0 | atanf_gen_impl(x, f_fmla) |
149 | 0 | } |
150 | 0 | def_atanf |
151 | | } |
152 | 0 | }); |
153 | 0 | unsafe { q(x) } |
154 | | } |
155 | 0 | } |
156 | | |
157 | | #[cfg(test)] |
158 | | mod tests { |
159 | | use super::*; |
160 | | |
161 | | #[test] |
162 | | fn f_atan_test() { |
163 | | assert!( |
164 | | (f_atanf(1.0) - std::f32::consts::PI / 4f32).abs() < 1e-6, |
165 | | "Invalid result {}", |
166 | | f_atanf(1f32) |
167 | | ); |
168 | | assert!( |
169 | | (f_atanf(2f32) - 1.107148717794090503017065f32).abs() < 1e-6, |
170 | | "Invalid result {}", |
171 | | f_atanf(2f32) |
172 | | ); |
173 | | assert!( |
174 | | (f_atanf(5f32) - 1.3734007669450158608612719264f32).abs() < 1e-6, |
175 | | "Invalid result {}", |
176 | | f_atanf(5f32) |
177 | | ); |
178 | | } |
179 | | } |