/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/tangent/atanf.rs

Source
/*
 * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::{f_fmla, f_fmlaf};

#[inline(always)]
fn atanf_gen_impl<Q: Fn(f64, f64, f64) -> f64>(x: f32, fma: Q) -> f32 {
    const PI2: f64 = f64::from_bits(0x3ff921fb54442d18);
    let t = x.to_bits();
    let e = (t >> 23) & 0xff;
    let gt = e >= 127;
    let ta = t & 0x7fffffff;
    if ta >= 0x4c700518u32 {
        // |x| >= 6.29198e+07
        if ta > 0x7f800000u32 {
            return x + x;
        } // nan
        return f32::copysign(PI2 as f32, x); // inf or |x| >= 6.29198e+07
    }
    if e < 127 - 13 {
        // |x| < 2^-13
        if e < 127 - 25 {
            // |x| < 2^-25
            if t << 1 == 0 {
                return x;
            }
            let res = f_fmlaf(-x, x.abs(), x);
            return res;
        }
        return f_fmlaf(-f64::from_bits(0x3fd5555560000000) as f32 * x, x * x, x);
    }
    /* now |x| >= 0.00012207 */
    let mut z = x as f64;
    if gt {
        z = 1.0 / z;
    } /* gt is non-zero for |x| >= 1 */
    let z2 = z * z;
    let z4 = z2 * z2;
    let z8 = z4 * z4;
    /* polynomials generated using rminimax
       (https://gitlab.inria.fr/sfilip/rminimax) with the following command:
       ./ratapprox --function="atan(x)" --dom=[0.000122070,1] --num=[x,x^3,x^5,x^7,x^9,x^11,x^13] --den=[1,x^2,x^4,x^6,x^8,x^10,x^12] --output=atanf.sollya --log
       (see output atanf.sollya)
       The coefficient cd[0] was slightly reduced from the original value
       0.330005 to avoid an exceptional case for |x| = 0.069052
       and rounding to nearest.
    */
    const CN: [u64; 7] = [
        0x3fd51eccde075d67,
        0x3fea76bb5637f2f2,
        0x3fe81e0eed20de88,
        0x3fd376c8ca67d11d,
        0x3faaec7b69202ac6,
        0x3f69561899acc73e,
        0x3efbf9fa5b67e600,
    ];
    const CD: [u64; 7] = [
        0x3fd51eccde075d66,
        0x3fedfbdd7b392d28,
        0x3ff0000000000000,
        0x3fdfd22bf0e89b54,
        0x3fbd91ff8b576282,
        0x3f8653ea99fc9bb0,
        0x3f31e7fcc202340a,
    ];
    let mut cn0 = fma(z2, f64::from_bits(CN[1]), f64::from_bits(CN[0]));
    let cn2 = fma(z2, f64::from_bits(CN[3]), f64::from_bits(CN[2]));
    let mut cn4 = fma(z2, f64::from_bits(CN[5]), f64::from_bits(CN[4]));
    let cn6 = f64::from_bits(CN[6]);
    cn0 = fma(z4, cn2, cn0);
    cn4 = fma(z4, cn6, cn4);
    cn0 = fma(z8, cn4, cn0);
    cn0 *= z;
    let mut cd0 = fma(z2, f64::from_bits(CD[1]), f64::from_bits(CD[0]));
    let cd2 = fma(z2, f64::from_bits(CD[3]), f64::from_bits(CD[2]));
    let mut cd4 = fma(z2, f64::from_bits(CD[5]), f64::from_bits(CD[4]));
    let cd6 = f64::from_bits(CD[6]);
    cd0 = fma(z4, cd2, cd0);
    cd4 = fma(z4, cd6, cd4);
    cd0 = fma(z8, cd4, cd0);
    let r = cn0 / cd0;
    if !gt {
        return r as f32;
    } /* for |x| < 1, (float) r is correctly rounded */

    const PI_OVER2_H: f64 = f64::from_bits(0x3ff9000000000000);
    const PI_OVER2_L: f64 = f64::from_bits(0x3f80fdaa22168c23);
    /* now r approximates atan(1/x), we use atan(x) + atan(1/x) = sign(x)*pi/2,
    where PI_OVER2_H + PI_OVER2_L approximates pi/2.
    With sign(z)*L + (-r + sign(z)*H), it fails for x=0x1.98c252p+12 and
    rounding upward.
    With sign(z)*PI - r, where PI is a double approximation of pi to nearest,
    it fails for x=0x1.ddf9f6p+0 and rounding upward. */
    ((f64::copysign(PI_OVER2_L, z) - r) + f64::copysign(PI_OVER2_H, z)) as f32
}

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx", enable = "fma")]
unsafe fn atanf_fma_impl(x: f32) -> f32 {
    atanf_gen_impl(x, f64::mul_add)
}

/// Computes atan
///
/// Max found ULP 0.49999973
#[inline]
pub fn f_atanf(x: f32) -> f32 {
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
    {
        atanf_gen_impl(x, f_fmla)
    }
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    {
        use std::sync::OnceLock;
        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
        let q = EXECUTOR.get_or_init(|| {
            if std::arch::is_x86_feature_detected!("avx")
                && std::arch::is_x86_feature_detected!("fma")
            {
                atanf_fma_impl
            } else {
                fn def_atanf(x: f32) -> f32 {
                    atanf_gen_impl(x, f_fmla)
                }
                def_atanf
            }
        });
        unsafe { q(x) }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn f_atan_test() {
        assert!(
            (f_atanf(1.0) - std::f32::consts::PI / 4f32).abs() < 1e-6,
            "Invalid result {}",
            f_atanf(1f32)
        );
        assert!(
            (f_atanf(2f32) - 1.107148717794090503017065f32).abs() < 1e-6,
            "Invalid result {}",
            f_atanf(2f32)
        );
        assert!(
            (f_atanf(5f32) - 1.3734007669450158608612719264f32).abs() < 1e-6,
            "Invalid result {}",
            f_atanf(5f32)
        );
    }
}

Coverage Report

Created: 2025-12-20 06:48

Line	Count	Source
1		/*
2		* // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
3		* //
4		* // Redistribution and use in source and binary forms, with or without modification,
5		* // are permitted provided that the following conditions are met:
6		* //
7		* // 1. Redistributions of source code must retain the above copyright notice, this
8		* // list of conditions and the following disclaimer.
9		* //
10		* // 2. Redistributions in binary form must reproduce the above copyright notice,
11		* // this list of conditions and the following disclaimer in the documentation
12		* // and/or other materials provided with the distribution.
13		* //
14		* // 3. Neither the name of the copyright holder nor the names of its
15		* // contributors may be used to endorse or promote products derived from
16		* // this software without specific prior written permission.
17		* //
18		* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19		* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20		* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21		* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22		* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23		* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24		* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25		* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26		* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27		* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28		*/
29		use crate::common::{f_fmla, f_fmlaf};
30
31		#[inline(always)]
32	0	fn atanf_gen_impl<Q: Fn(f64, f64, f64) -> f64>(x: f32, fma: Q) -> f32 {
33		const PI2: f64 = f64::from_bits(0x3ff921fb54442d18);
34	0	let t = x.to_bits();
35	0	let e = (t >> 23) & 0xff;
36	0	let gt = e >= 127;
37	0	let ta = t & 0x7fffffff;
38	0	if ta >= 0x4c700518u32 {
39		// \|x\| >= 6.29198e+07
40	0	if ta > 0x7f800000u32 {
41	0	return x + x;
42	0	} // nan
43	0	return f32::copysign(PI2 as f32, x); // inf or \|x\| >= 6.29198e+07
44	0	}
45	0	if e < 127 - 13 {
46		// \|x\| < 2^-13
47	0	if e < 127 - 25 {
48		// \|x\| < 2^-25
49	0	if t << 1 == 0 {
50	0	return x;
51	0	}
52	0	let res = f_fmlaf(-x, x.abs(), x);
53	0	return res;
54	0	}
55	0	return f_fmlaf(-f64::from_bits(0x3fd5555560000000) as f32 * x, x * x, x);
56	0	}
57		/* now \|x\| >= 0.00012207 */
58	0	let mut z = x as f64;
59	0	if gt {
60	0	z = 1.0 / z;
61	0	} /* gt is non-zero for \|x\| >= 1 */
62	0	let z2 = z * z;
63	0	let z4 = z2 * z2;
64	0	let z8 = z4 * z4;
65		/* polynomials generated using rminimax
66		(https://gitlab.inria.fr/sfilip/rminimax) with the following command:
67		./ratapprox --function="atan(x)" --dom=[0.000122070,1] --num=[x,x^3,x^5,x^7,x^9,x^11,x^13] --den=[1,x^2,x^4,x^6,x^8,x^10,x^12] --output=atanf.sollya --log
68		(see output atanf.sollya)
69		The coefficient cd[0] was slightly reduced from the original value
70		0.330005 to avoid an exceptional case for \|x\| = 0.069052
71		and rounding to nearest.
72		*/
73		const CN: [u64; 7] = [
74		0x3fd51eccde075d67,
75		0x3fea76bb5637f2f2,
76		0x3fe81e0eed20de88,
77		0x3fd376c8ca67d11d,
78		0x3faaec7b69202ac6,
79		0x3f69561899acc73e,
80		0x3efbf9fa5b67e600,
81		];
82		const CD: [u64; 7] = [
83		0x3fd51eccde075d66,
84		0x3fedfbdd7b392d28,
85		0x3ff0000000000000,
86		0x3fdfd22bf0e89b54,
87		0x3fbd91ff8b576282,
88		0x3f8653ea99fc9bb0,
89		0x3f31e7fcc202340a,
90		];
91	0	let mut cn0 = fma(z2, f64::from_bits(CN[1]), f64::from_bits(CN[0]));
92	0	let cn2 = fma(z2, f64::from_bits(CN[3]), f64::from_bits(CN[2]));
93	0	let mut cn4 = fma(z2, f64::from_bits(CN[5]), f64::from_bits(CN[4]));
94	0	let cn6 = f64::from_bits(CN[6]);
95	0	cn0 = fma(z4, cn2, cn0);
96	0	cn4 = fma(z4, cn6, cn4);
97	0	cn0 = fma(z8, cn4, cn0);
98	0	cn0 *= z;
99	0	let mut cd0 = fma(z2, f64::from_bits(CD[1]), f64::from_bits(CD[0]));
100	0	let cd2 = fma(z2, f64::from_bits(CD[3]), f64::from_bits(CD[2]));
101	0	let mut cd4 = fma(z2, f64::from_bits(CD[5]), f64::from_bits(CD[4]));
102	0	let cd6 = f64::from_bits(CD[6]);
103	0	cd0 = fma(z4, cd2, cd0);
104	0	cd4 = fma(z4, cd6, cd4);
105	0	cd0 = fma(z8, cd4, cd0);
106	0	let r = cn0 / cd0;
107	0	if !gt {
108	0	return r as f32;
109	0	} /* for \|x\| < 1, (float) r is correctly rounded */
110
111		const PI_OVER2_H: f64 = f64::from_bits(0x3ff9000000000000);
112		const PI_OVER2_L: f64 = f64::from_bits(0x3f80fdaa22168c23);
113		/* now r approximates atan(1/x), we use atan(x) + atan(1/x) = sign(x)*pi/2,
114		where PI_OVER2_H + PI_OVER2_L approximates pi/2.
115		With sign(z)L + (-r + sign(z)H), it fails for x=0x1.98c252p+12 and
116		rounding upward.
117		With sign(z)*PI - r, where PI is a double approximation of pi to nearest,
118		it fails for x=0x1.ddf9f6p+0 and rounding upward. */
119	0	((f64::copysign(PI_OVER2_L, z) - r) + f64::copysign(PI_OVER2_H, z)) as f32
120	0	} Unexecuted instantiation: pxfm::tangent::atanf::atanf_gen_impl::<<f64>::mul_add> Unexecuted instantiation: pxfm::tangent::atanf::atanf_gen_impl::<pxfm::common::f_fmla>
121
122		#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
123		#[target_feature(enable = "avx", enable = "fma")]
124	0	unsafe fn atanf_fma_impl(x: f32) -> f32 {
125	0	atanf_gen_impl(x, f64::mul_add)
126	0	}
127
128		/// Computes atan
129		///
130		/// Max found ULP 0.49999973
131		#[inline]
132	0	pub fn f_atanf(x: f32) -> f32 {
133		#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
134		{
135		atanf_gen_impl(x, f_fmla)
136		}
137		#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
138		{
139		use std::sync::OnceLock;
140		static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
141	0	let q = EXECUTOR.get_or_init(\|\| {
142	0	if std::arch::is_x86_feature_detected!("avx")
143	0	&& std::arch::is_x86_feature_detected!("fma")
144		{
145	0	atanf_fma_impl
146		} else {
147	0	fn def_atanf(x: f32) -> f32 {
148	0	atanf_gen_impl(x, f_fmla)
149	0	}
150	0	def_atanf
151		}
152	0	});
153	0	unsafe { q(x) }
154		}
155	0	}
156
157		#[cfg(test)]
158		mod tests {
159		use super::*;
160
161		#[test]
162		fn f_atan_test() {
163		assert!(
164		(f_atanf(1.0) - std::f32::consts::PI / 4f32).abs() < 1e-6,
165		"Invalid result {}",
166		f_atanf(1f32)
167		);
168		assert!(
169		(f_atanf(2f32) - 1.107148717794090503017065f32).abs() < 1e-6,
170		"Invalid result {}",
171		f_atanf(2f32)
172		);
173		assert!(
174		(f_atanf(5f32) - 1.3734007669450158608612719264f32).abs() < 1e-6,
175		"Invalid result {}",
176		f_atanf(5f32)
177		);
178		}
179		}