/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/err/rerf.rs

Source
/*
 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::err::rerf_poly::RERF_HARD;
use crate::polyeval::f_polyeval7;

#[cold]
#[inline(never)]
fn rerf_poly_tiny_hard(x: f64, z2: DoubleDouble) -> f64 {
    // Polynomial for x/erf(x)
    // Generated by Sollya.
    // d = [0, 1/16];
    // f = x/erf(x);
    // Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18|], [|107...|], d);
    // See ./notes/r_erf_tiny_hard.sollya
    const C: [(u64, u64); 10] = [
        (0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b),
        (0xbc6d7696fe4a7cd0, 0x3fd2e7fb0bcdf4f2),
        (0xbc0cb8b926064434, 0x3f842aa561ecc102),
        (0x3c1cd94c2f3e6f09, 0xbf75207c7ef80727),
        (0xbbb35c4effe3c87c, 0x3f2db4a8d7c32472),
        (0x3bbf1d1edd1e109a, 0x3f20faa7a99a4d3d),
        (0xbb9e05d21f4e1755, 0xbef3adb84631c39c),
        (0x3b6ee5dc31565280, 0xbec366647cacdcc9),
        (0x3b3698f8162c5fac, 0x3eaabb9db9f3b048),
        (0xbb026f5401fce891, 0xbe66cd40349520b6),
    ];
    let mut p = DoubleDouble::mul_add(
        z2,
        DoubleDouble::from_bit_pair(C[9]),
        DoubleDouble::from_bit_pair(C[8]),
    );
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[7]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[6]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[5]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[4]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[3]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[2]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[1]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[0]));
    p = DoubleDouble::from_exact_add(p.hi, p.lo);
    let z = DoubleDouble::div_dd_f64(p, x);
    z.to_f64()
}

#[inline]
fn rerf_poly_tiny(z: f64, x: f64) -> f64 {
    let z2 = DoubleDouble::from_exact_mult(z, z);

    // Polynomial for x/erf(x)
    // Generated by Sollya.
    // d = [0, 1/16];
    // f = x/erf(x);
    // Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18|], [|107, 107, 107, D...|], d);
    // See ./notes/r_erf_tiny.sollya
    let p = f_polyeval7(
        z2.hi,
        f64::from_bits(0xbf75207c7ef80727),
        f64::from_bits(0x3f2db4a8d7c36a03),
        f64::from_bits(0x3f20faa7a8db7f27),
        f64::from_bits(0xbef3adae94983bb2),
        f64::from_bits(0xbec3b05fe5c49f32),
        f64::from_bits(0x3ed67902690892be),
        f64::from_bits(0xbf3090033375e5ee),
    );
    let mut r = DoubleDouble::quick_mul_f64_add(
        z2,
        p,
        DoubleDouble::from_bit_pair((0xbc0cb29fd910c494, 0x3f842aa561ecc102)),
    );
    r = DoubleDouble::quick_mul_add(
        z2,
        r,
        DoubleDouble::from_bit_pair((0xbc6d7696ff4f712a, 0x3fd2e7fb0bcdf4f2)),
    );
    r = DoubleDouble::quick_mul_add(
        z2,
        r,
        DoubleDouble::from_bit_pair((0xbc8618f13eb7ca11, 0x3fec5bf891b4ef6b)),
    );
    r = DoubleDouble::from_exact_add(r.hi, r.lo);
    r = DoubleDouble::div_dd_f64(r, x);
    let err = f_fmla(
        r.hi,
        f64::from_bits(0x3c10000000000000), // 2^-62
        f64::from_bits(0x3b90000000000000), // 2^-70
    );
    let ub = r.hi + (r.lo + err);
    let lb = r.hi + (r.lo - err);
    if ub == lb {
        return r.to_f64();
    }
    rerf_poly_tiny_hard(x, z2)
}

#[inline]
fn rerf_poly_hard(x: f64, z2: DoubleDouble, idx: usize) -> f64 {
    let c = &RERF_HARD[idx];
    let mut p = DoubleDouble::mul_add(
        z2,
        DoubleDouble::from_bit_pair(c[10]),
        DoubleDouble::from_bit_pair(c[9]),
    );
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[8]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[7]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[6]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[5]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[4]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[3]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[2]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[1]));
    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[0]));
    p = DoubleDouble::from_exact_add(p.hi, p.lo);
    let z = DoubleDouble::div_dd_f64(p, x);
    z.to_f64()
}

/// Computes 1/erf(x)
///
/// Max ulp 0.5001
pub fn f_rerf(x: f64) -> f64 {
    let z = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
    let t = z.to_bits();
    let ux = t;
    /* 1/erf(x) rounds to +/-1 for RNDN for |x| > 0x4017afb48dc96626 */
    if ux > 0x4017afb48dc96626
    // |x| > 0x4017afb48dc96626
    {
        let os = f64::copysign(1.0, x);
        const MASK: u64 = 0x7ff0000000000000u64;
        if ux > MASK {
            return x + x; /* NaN */
        }
        if ux == MASK {
            return os; /* +/-Inf */
        }
        return f_fmla(-f64::from_bits(0x3c90000000000000), os, os);
    }

    /* now |x| <= 0x4017afb48dc96626 */
    if z < f64::from_bits(0x3c20000000000000) {
        // |x| < 0.0000000000000000004336808689942018
        /* for x=-0 the code below returns +0 which is wrong */
        if x == 0. {
            return if x.is_sign_negative() {
                f64::NEG_INFINITY
            } else {
                f64::INFINITY
            };
        }

        if z.to_bits() <= 0x38b7f12369dedu64 {
            return if x.is_sign_negative() {
                f64::NEG_INFINITY
            } else {
                f64::INFINITY
            };
        }

        /* double-double approximation of 2/sqrt(pi) to nearest */
        const SQRT_PI_OVER_2: DoubleDouble = DoubleDouble::new(
            f64::from_bits(0xbc8618f13eb7ca89),
            f64::from_bits(0x3fec5bf891b4ef6b),
        );

        /* tiny x is Taylor Series: 1/erf(x) ~ sqrt(pi)/(2 * x) + O(x^3), where the ratio of the O(x^3)
        term to the main term is in x^2/3, thus less than 2^-123 */
        /* scale x by 2^106 to get out the subnormal range */
        let sx = x * f64::from_bits(0x4690000000000000);
        let mut prod = DoubleDouble::div_dd_f64(SQRT_PI_OVER_2, sx);
        // scale back by 2^106, since we're performed the division
        prod = DoubleDouble::quick_mult_f64(prod, f64::from_bits(0x4690000000000000));
        return prod.to_f64();
    }

    if z.to_bits() < 0x3fb0000000000000u64 {
        return rerf_poly_tiny(z, x);
    }

    const SIXTEEN: u64 = 4 << 52;
    let idx =
        unsafe { f64::from_bits(z.to_bits().wrapping_add(SIXTEEN)).to_int_unchecked::<usize>() };
    let z2 = DoubleDouble::from_exact_mult(z, z);
    rerf_poly_hard(x, z2, idx)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_erf() {
        assert_eq!(f_rerf(65.), 1.0);
        assert_eq!(f_rerf(3.), 1.0000220909849995);
        assert_eq!(f_rerf(-3.), -1.0000220909849995);
        assert_eq!(f_rerf(-0.03723630312089732), -23.811078627277197);
        assert_eq!(
            f_rerf(0.0000000000000000002336808689942018),
            3.7924667486354975e18
        );
        assert_eq!(f_rerf(2.000225067138672), 1.004695025872889);
        assert_eq!(f_rerf(0.), f64::INFINITY);
        assert_eq!(f_rerf(-0.), f64::NEG_INFINITY);
        assert!(f_rerf(f64::NAN).is_nan());
    }
}

Coverage Report

Created: 2026-01-19 07:25

Line	Count	Source
1		/*
2		* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3		* //
4		* // Redistribution and use in source and binary forms, with or without modification,
5		* // are permitted provided that the following conditions are met:
6		* //
7		* // 1. Redistributions of source code must retain the above copyright notice, this
8		* // list of conditions and the following disclaimer.
9		* //
10		* // 2. Redistributions in binary form must reproduce the above copyright notice,
11		* // this list of conditions and the following disclaimer in the documentation
12		* // and/or other materials provided with the distribution.
13		* //
14		* // 3. Neither the name of the copyright holder nor the names of its
15		* // contributors may be used to endorse or promote products derived from
16		* // this software without specific prior written permission.
17		* //
18		* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19		* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20		* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21		* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22		* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23		* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24		* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25		* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26		* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27		* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28		*/
29		use crate::common::f_fmla;
30		use crate::double_double::DoubleDouble;
31		use crate::err::rerf_poly::RERF_HARD;
32		use crate::polyeval::f_polyeval7;
33
34		#[cold]
35		#[inline(never)]
36	0	fn rerf_poly_tiny_hard(x: f64, z2: DoubleDouble) -> f64 {
37		// Polynomial for x/erf(x)
38		// Generated by Sollya.
39		// d = [0, 1/16];
40		// f = x/erf(x);
41		// Q = fpminimax(f, [\|0, 2, 4, 6, 8, 10, 12, 14, 16, 18\|], [\|107...\|], d);
42		// See ./notes/r_erf_tiny_hard.sollya
43		const C: [(u64, u64); 10] = [
44		(0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b),
45		(0xbc6d7696fe4a7cd0, 0x3fd2e7fb0bcdf4f2),
46		(0xbc0cb8b926064434, 0x3f842aa561ecc102),
47		(0x3c1cd94c2f3e6f09, 0xbf75207c7ef80727),
48		(0xbbb35c4effe3c87c, 0x3f2db4a8d7c32472),
49		(0x3bbf1d1edd1e109a, 0x3f20faa7a99a4d3d),
50		(0xbb9e05d21f4e1755, 0xbef3adb84631c39c),
51		(0x3b6ee5dc31565280, 0xbec366647cacdcc9),
52		(0x3b3698f8162c5fac, 0x3eaabb9db9f3b048),
53		(0xbb026f5401fce891, 0xbe66cd40349520b6),
54		];
55	0	let mut p = DoubleDouble::mul_add(
56	0	z2,
57	0	DoubleDouble::from_bit_pair(C[9]),
58	0	DoubleDouble::from_bit_pair(C[8]),
59		);
60	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[7]));
61	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[6]));
62	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[5]));
63	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[4]));
64	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[3]));
65	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[2]));
66	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[1]));
67	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[0]));
68	0	p = DoubleDouble::from_exact_add(p.hi, p.lo);
69	0	let z = DoubleDouble::div_dd_f64(p, x);
70	0	z.to_f64()
71	0	}
72
73		#[inline]
74	0	fn rerf_poly_tiny(z: f64, x: f64) -> f64 {
75	0	let z2 = DoubleDouble::from_exact_mult(z, z);
76
77		// Polynomial for x/erf(x)
78		// Generated by Sollya.
79		// d = [0, 1/16];
80		// f = x/erf(x);
81		// Q = fpminimax(f, [\|0, 2, 4, 6, 8, 10, 12, 14, 16, 18\|], [\|107, 107, 107, D...\|], d);
82		// See ./notes/r_erf_tiny.sollya
83	0	let p = f_polyeval7(
84	0	z2.hi,
85	0	f64::from_bits(0xbf75207c7ef80727),
86	0	f64::from_bits(0x3f2db4a8d7c36a03),
87	0	f64::from_bits(0x3f20faa7a8db7f27),
88	0	f64::from_bits(0xbef3adae94983bb2),
89	0	f64::from_bits(0xbec3b05fe5c49f32),
90	0	f64::from_bits(0x3ed67902690892be),
91	0	f64::from_bits(0xbf3090033375e5ee),
92		);
93	0	let mut r = DoubleDouble::quick_mul_f64_add(
94	0	z2,
95	0	p,
96	0	DoubleDouble::from_bit_pair((0xbc0cb29fd910c494, 0x3f842aa561ecc102)),
97		);
98	0	r = DoubleDouble::quick_mul_add(
99	0	z2,
100	0	r,
101	0	DoubleDouble::from_bit_pair((0xbc6d7696ff4f712a, 0x3fd2e7fb0bcdf4f2)),
102	0	);
103	0	r = DoubleDouble::quick_mul_add(
104	0	z2,
105	0	r,
106	0	DoubleDouble::from_bit_pair((0xbc8618f13eb7ca11, 0x3fec5bf891b4ef6b)),
107	0	);
108	0	r = DoubleDouble::from_exact_add(r.hi, r.lo);
109	0	r = DoubleDouble::div_dd_f64(r, x);
110	0	let err = f_fmla(
111	0	r.hi,
112	0	f64::from_bits(0x3c10000000000000), // 2^-62
113	0	f64::from_bits(0x3b90000000000000), // 2^-70
114		);
115	0	let ub = r.hi + (r.lo + err);
116	0	let lb = r.hi + (r.lo - err);
117	0	if ub == lb {
118	0	return r.to_f64();
119	0	}
120	0	rerf_poly_tiny_hard(x, z2)
121	0	}
122
123		#[inline]
124	0	fn rerf_poly_hard(x: f64, z2: DoubleDouble, idx: usize) -> f64 {
125	0	let c = &RERF_HARD[idx];
126	0	let mut p = DoubleDouble::mul_add(
127	0	z2,
128	0	DoubleDouble::from_bit_pair(c[10]),
129	0	DoubleDouble::from_bit_pair(c[9]),
130		);
131	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[8]));
132	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[7]));
133	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[6]));
134	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[5]));
135	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[4]));
136	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[3]));
137	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[2]));
138	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[1]));
139	0	p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[0]));
140	0	p = DoubleDouble::from_exact_add(p.hi, p.lo);
141	0	let z = DoubleDouble::div_dd_f64(p, x);
142	0	z.to_f64()
143	0	}
144
145		/// Computes 1/erf(x)
146		///
147		/// Max ulp 0.5001
148	0	pub fn f_rerf(x: f64) -> f64 {
149	0	let z = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
150	0	let t = z.to_bits();
151	0	let ux = t;
152		/* 1/erf(x) rounds to +/-1 for RNDN for \|x\| > 0x4017afb48dc96626 */
153	0	if ux > 0x4017afb48dc96626
154		// \|x\| > 0x4017afb48dc96626
155		{
156	0	let os = f64::copysign(1.0, x);
157		const MASK: u64 = 0x7ff0000000000000u64;
158	0	if ux > MASK {
159	0	return x + x; /* NaN */
160	0	}
161	0	if ux == MASK {
162	0	return os; /* +/-Inf */
163	0	}
164	0	return f_fmla(-f64::from_bits(0x3c90000000000000), os, os);
165	0	}
166
167		/* now \|x\| <= 0x4017afb48dc96626 */
168	0	if z < f64::from_bits(0x3c20000000000000) {
169		// \|x\| < 0.0000000000000000004336808689942018
170		/* for x=-0 the code below returns +0 which is wrong */
171	0	if x == 0. {
172	0	return if x.is_sign_negative() {
173	0	f64::NEG_INFINITY
174		} else {
175	0	f64::INFINITY
176		};
177	0	}
178
179	0	if z.to_bits() <= 0x38b7f12369dedu64 {
180	0	return if x.is_sign_negative() {
181	0	f64::NEG_INFINITY
182		} else {
183	0	f64::INFINITY
184		};
185	0	}
186
187		/* double-double approximation of 2/sqrt(pi) to nearest */
188		const SQRT_PI_OVER_2: DoubleDouble = DoubleDouble::new(
189		f64::from_bits(0xbc8618f13eb7ca89),
190		f64::from_bits(0x3fec5bf891b4ef6b),
191		);
192
193		/* tiny x is Taylor Series: 1/erf(x) ~ sqrt(pi)/(2 * x) + O(x^3), where the ratio of the O(x^3)
194		term to the main term is in x^2/3, thus less than 2^-123 */
195		/* scale x by 2^106 to get out the subnormal range */
196	0	let sx = x * f64::from_bits(0x4690000000000000);
197	0	let mut prod = DoubleDouble::div_dd_f64(SQRT_PI_OVER_2, sx);
198		// scale back by 2^106, since we're performed the division
199	0	prod = DoubleDouble::quick_mult_f64(prod, f64::from_bits(0x4690000000000000));
200	0	return prod.to_f64();
201	0	}
202
203	0	if z.to_bits() < 0x3fb0000000000000u64 {
204	0	return rerf_poly_tiny(z, x);
205	0	}
206
207		const SIXTEEN: u64 = 4 << 52;
208	0	let idx =
209	0	unsafe { f64::from_bits(z.to_bits().wrapping_add(SIXTEEN)).to_int_unchecked::<usize>() };
210	0	let z2 = DoubleDouble::from_exact_mult(z, z);
211	0	rerf_poly_hard(x, z2, idx)
212	0	}
213
214		#[cfg(test)]
215		mod tests {
216		use super::*;
217
218		#[test]
219		fn test_erf() {
220		assert_eq!(f_rerf(65.), 1.0);
221		assert_eq!(f_rerf(3.), 1.0000220909849995);
222		assert_eq!(f_rerf(-3.), -1.0000220909849995);
223		assert_eq!(f_rerf(-0.03723630312089732), -23.811078627277197);
224		assert_eq!(
225		f_rerf(0.0000000000000000002336808689942018),
226		3.7924667486354975e18
227		);
228		assert_eq!(f_rerf(2.000225067138672), 1.004695025872889);
229		assert_eq!(f_rerf(0.), f64::INFINITY);
230		assert_eq!(f_rerf(-0.), f64::NEG_INFINITY);
231		assert!(f_rerf(f64::NAN).is_nan());
232		}
233		}