/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/acospif.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::asinpif::ASINCOSF_PI_TABLE; |
30 | | use crate::common::{dd_fmla, f_fmla}; |
31 | | |
32 | | #[inline(always)] |
33 | | /// fma - fma |
34 | | /// dd_fma - mandatory fma fallback |
35 | 0 | fn acospif_gen_impl<Q: Fn(f64, f64, f64) -> f64, F: Fn(f64, f64, f64) -> f64>( |
36 | 0 | x: f32, |
37 | 0 | fma: Q, |
38 | 0 | dd_fma: F, |
39 | 0 | ) -> f32 { |
40 | 0 | let ax = x.abs(); |
41 | 0 | let az = ax as f64; |
42 | 0 | let z = x as f64; |
43 | 0 | let t: u32 = x.to_bits(); |
44 | 0 | let e: i32 = ((t >> 23) & 0xff) as i32; |
45 | 0 | if e >= 127 { |
46 | 0 | if x == 1.0 { |
47 | 0 | return 0.0; |
48 | 0 | } |
49 | 0 | if x == -1.0 { |
50 | 0 | return 1.0; |
51 | 0 | } |
52 | 0 | if e == 0xff && (t.wrapping_shl(9)) != 0 { |
53 | 0 | return x + x; |
54 | 0 | } // nan |
55 | 0 | return f32::NAN; |
56 | 0 | } |
57 | 0 | let s: i32 = 146i32.wrapping_sub(e); |
58 | 0 | let mut i = 0i32; |
59 | 0 | if s < 32 { |
60 | 0 | i = (((t & 0x007fffff) | 1 << 23) >> s) as i32; |
61 | 0 | } |
62 | 0 | let c = ASINCOSF_PI_TABLE[i as usize & 15]; |
63 | 0 | let z2 = z * z; |
64 | 0 | let z4 = z2 * z2; |
65 | 0 | if i == 0 { |
66 | 0 | let mut c0 = fma(z2, f64::from_bits(c[1]), f64::from_bits(c[0])); |
67 | 0 | let c2 = fma(z2, f64::from_bits(c[3]), f64::from_bits(c[2])); |
68 | 0 | let mut c4 = fma(z2, f64::from_bits(c[5]), f64::from_bits(c[4])); |
69 | 0 | let c6 = fma(z2, f64::from_bits(c[7]), f64::from_bits(c[6])); |
70 | 0 | c0 += c2 * z4; |
71 | 0 | c4 += c6 * z4; |
72 | | /* For |x| <= 0x1.0fd288p-127, c0 += c4*(z4*z4) would raise a spurious |
73 | | underflow exception, we use an FMA instead, where c4 * z4 does not |
74 | | underflow. */ |
75 | 0 | c0 = dd_fma(c4 * z4, z4, c0); |
76 | 0 | fma(-z, c0, 0.5) as f32 |
77 | | } else { |
78 | 0 | let f = (1. - az).sqrt(); |
79 | 0 | let mut c0 = fma(az, f64::from_bits(c[1]), f64::from_bits(c[0])); |
80 | 0 | let c2 = fma(az, f64::from_bits(c[3]), f64::from_bits(c[2])); |
81 | 0 | let mut c4 = fma(az, f64::from_bits(c[5]), f64::from_bits(c[4])); |
82 | 0 | let c6 = fma(az, f64::from_bits(c[7]), f64::from_bits(c[6])); |
83 | 0 | c0 += c2 * z2; |
84 | 0 | c4 += c6 * z2; |
85 | 0 | c0 += c4 * z4; |
86 | | static SIGN: [f64; 2] = [0., 1.]; |
87 | 0 | let r = SIGN[(t >> 31) as usize] + c0 * f64::copysign(f, x as f64); |
88 | 0 | r as f32 |
89 | | } |
90 | 0 | } Unexecuted instantiation: pxfm::acospif::acospif_gen_impl::<<f64>::mul_add, <f64>::mul_add> Unexecuted instantiation: pxfm::acospif::acospif_gen_impl::<pxfm::common::f_fmla, pxfm::common::dd_fmla> |
91 | | |
92 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
93 | | #[target_feature(enable = "avx", enable = "fma")] |
94 | 0 | unsafe fn acospif_fma_impl(x: f32) -> f32 { |
95 | 0 | acospif_gen_impl(x, f64::mul_add, f64::mul_add) |
96 | 0 | } |
97 | | |
98 | | /// Computes acos(x)/PI |
99 | | /// |
100 | | /// Max ULP 0.5 |
101 | | #[inline] |
102 | 0 | pub fn f_acospif(x: f32) -> f32 { |
103 | | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] |
104 | | { |
105 | | acospif_gen_impl(x, f_fmla, dd_fmla) |
106 | | } |
107 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
108 | | { |
109 | | use std::sync::OnceLock; |
110 | | static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new(); |
111 | 0 | let q = EXECUTOR.get_or_init(|| { |
112 | 0 | if std::arch::is_x86_feature_detected!("avx") |
113 | 0 | && std::arch::is_x86_feature_detected!("fma") |
114 | | { |
115 | 0 | acospif_fma_impl |
116 | | } else { |
117 | 0 | fn def_acospif(x: f32) -> f32 { |
118 | 0 | acospif_gen_impl(x, f_fmla, dd_fmla) |
119 | 0 | } |
120 | 0 | def_acospif |
121 | | } |
122 | 0 | }); |
123 | 0 | unsafe { q(x) } |
124 | | } |
125 | 0 | } |
126 | | |
127 | | #[cfg(test)] |
128 | | mod tests { |
129 | | use super::*; |
130 | | #[test] |
131 | | fn test_acospif() { |
132 | | assert_eq!(f_acospif(0.0), 0.5); |
133 | | assert_eq!(f_acospif(0.5), 0.33333334); |
134 | | assert_eq!(f_acospif(1.0), 0.0); |
135 | | } |
136 | | } |