/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/tangent/atanpif.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::f_fmla; |
30 | | |
31 | | #[inline(always)] |
32 | 0 | fn atanpif_gen_impl<Q: Fn(f64, f64, f64) -> f64>(x: f32, fma: Q) -> f32 { |
33 | 0 | let t = x.to_bits(); |
34 | 0 | let e: i32 = ((t >> 23) & 0xff) as i32; |
35 | 0 | let gt = e >= 127; |
36 | 0 | if e > 127 + 24 { |
37 | | // |x| >= 2^25 |
38 | 0 | let f = f32::copysign(0.5, x); |
39 | 0 | if e == 0xff { |
40 | 0 | if (t.wrapping_shl(9)) != 0 { |
41 | 0 | return x + x; |
42 | 0 | } // nan |
43 | 0 | return f; // inf |
44 | 0 | } |
45 | | // Warning: 0x1.45f306p-2f / x underflows for |x| >= 0x1.45f306p+124 |
46 | 0 | return if x.abs() >= f32::from_bits(0x7da2f983) { |
47 | 0 | f - f32::copysign(f32::from_bits(0x32800000), x) |
48 | | } else { |
49 | 0 | f - f32::from_bits(0x3ea2f983) / x |
50 | | }; |
51 | 0 | } |
52 | 0 | let mut z = x as f64; |
53 | 0 | if e < 127 - 13 { |
54 | | // |x| < 2^-13 |
55 | 0 | let sx = z * f64::from_bits(0x3fd45f306dc9c883); |
56 | 0 | if e < 127 - 25 { |
57 | | // |x| < 2^-25 |
58 | 0 | return sx as f32; |
59 | 0 | } |
60 | 0 | let zz0 = sx - (f64::from_bits(0x3fd5555555555555) * sx) * (x as f64 * x as f64); |
61 | 0 | return zz0 as f32; |
62 | 0 | } |
63 | 0 | let ax = t & 0x7fff_ffff; |
64 | 0 | if ax == 0x3fa267ddu32 { |
65 | 0 | return f32::copysign(f32::from_bits(0x3e933802), x) |
66 | 0 | - f32::copysign(f32::from_bits(0x24000000), x); |
67 | 0 | }; |
68 | 0 | if ax == 0x3f693531u32 { |
69 | 0 | return f32::copysign(f32::from_bits(0x3e70d331), x) |
70 | 0 | + f32::copysign(f32::from_bits(0x31800000), x); |
71 | 0 | }; |
72 | 0 | if ax == 0x3f800000u32 { |
73 | 0 | return f32::copysign(f32::from_bits(0x3e800000), x); |
74 | 0 | }; |
75 | 0 | if gt { |
76 | 0 | z = 1. / z; |
77 | 0 | } |
78 | 0 | let z2 = z * z; |
79 | 0 | let z4 = z2 * z2; |
80 | 0 | let z8 = z4 * z4; |
81 | | const CN: [u64; 6] = [ |
82 | | 0x3fd45f306dc9c882, |
83 | | 0x3fe733b561bc23d5, |
84 | | 0x3fe28d9805bdfbf2, |
85 | | 0x3fc8c3ba966ae287, |
86 | | 0x3f994a7f81ee634b, |
87 | | 0x3f4a6bbf6127a6df, |
88 | | ]; |
89 | 0 | let mut cn0 = fma(z2, f64::from_bits(CN[1]), f64::from_bits(CN[0])); |
90 | 0 | let cn2 = fma(z2, f64::from_bits(CN[3]), f64::from_bits(CN[2])); |
91 | 0 | let cn4 = fma(z2, f64::from_bits(CN[5]), f64::from_bits(CN[4])); |
92 | 0 | cn0 += z4 * cn2; |
93 | 0 | cn0 += z8 * cn4; |
94 | 0 | cn0 *= z; |
95 | | |
96 | | const CD: [u64; 7] = [ |
97 | | 0x3ff0000000000000, |
98 | | 0x4004e3b3ecc2518f, |
99 | | 0x4003ef4a360ff063, |
100 | | 0x3ff0f1dc55bad551, |
101 | | 0x3fc8da0fecc018a4, |
102 | | 0x3f88fa87803776bf, |
103 | | 0x3f1dadf2ca0acb43, |
104 | | ]; |
105 | | |
106 | 0 | let mut cd0 = fma(z2, f64::from_bits(CD[1]), f64::from_bits(CD[0])); |
107 | 0 | let cd2 = fma(z2, f64::from_bits(CD[3]), f64::from_bits(CD[2])); |
108 | 0 | let mut cd4 = fma(z2, f64::from_bits(CD[5]), f64::from_bits(CD[4])); |
109 | 0 | let cd6 = f64::from_bits(CD[6]); |
110 | 0 | cd0 += z4 * cd2; |
111 | 0 | cd4 += z4 * cd6; |
112 | 0 | cd0 = fma(z8, cd4, cd0); |
113 | 0 | let mut r = cn0 / cd0; |
114 | 0 | if gt { |
115 | 0 | r = f64::copysign(0.5, z) - r; |
116 | 0 | } |
117 | 0 | r as f32 |
118 | 0 | } Unexecuted instantiation: pxfm::tangent::atanpif::atanpif_gen_impl::<<f64>::mul_add> Unexecuted instantiation: pxfm::tangent::atanpif::atanpif_gen_impl::<pxfm::common::f_fmla> |
119 | | |
120 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
121 | | #[target_feature(enable = "avx", enable = "fma")] |
122 | 0 | unsafe fn atanpif_fma_impl(x: f32) -> f32 { |
123 | 0 | atanpif_gen_impl(x, f64::mul_add) |
124 | 0 | } |
125 | | |
126 | | /// Computes atan(x)/PI |
127 | | /// |
128 | | /// Max ULP 0.5 |
129 | | #[inline] |
130 | 0 | pub fn f_atanpif(x: f32) -> f32 { |
131 | | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] |
132 | | { |
133 | | atanpif_gen_impl(x, f_fmla) |
134 | | } |
135 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
136 | | { |
137 | | use std::sync::OnceLock; |
138 | | static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new(); |
139 | 0 | let q = EXECUTOR.get_or_init(|| { |
140 | 0 | if std::arch::is_x86_feature_detected!("avx") |
141 | 0 | && std::arch::is_x86_feature_detected!("fma") |
142 | | { |
143 | 0 | atanpif_fma_impl |
144 | | } else { |
145 | 0 | fn def_atanpif(x: f32) -> f32 { |
146 | 0 | atanpif_gen_impl(x, f_fmla) |
147 | 0 | } |
148 | 0 | def_atanpif |
149 | | } |
150 | 0 | }); |
151 | 0 | unsafe { q(x) } |
152 | | } |
153 | 0 | } |
154 | | |
155 | | #[cfg(test)] |
156 | | mod tests { |
157 | | use super::*; |
158 | | #[test] |
159 | | fn test_atanpif() { |
160 | | assert_eq!(f_atanpif(0.0), 0.0); |
161 | | assert_eq!(f_atanpif(1.0), 0.25); |
162 | | assert_eq!(f_atanpif(1.5), 0.31283295); |
163 | | assert_eq!(f_atanpif(-1.0), -0.25); |
164 | | assert_eq!(f_atanpif(-1.5), -0.31283295); |
165 | | } |
166 | | } |