/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/sincpi.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::f_fmla; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::polyeval::f_estrin_polyeval5; |
32 | | use crate::sincospi::reduce_pi_64; |
33 | | use crate::sincospi_tables::SINPI_K_PI_OVER_64; |
34 | | |
35 | | /** |
36 | | Sinpi on range [0.0, 0.03515625] |
37 | | |
38 | | Generated poly by Sollya: |
39 | | ```text |
40 | | d = [0, 0.03515625]; |
41 | | f_sincpi = sin(y*pi)/(y*pi); |
42 | | Q = fpminimax(f_sincpi, [|0, 2, 4, 6, 8, 10, 12|], [|107...|], d, relative, floating); |
43 | | ``` |
44 | | See ./notes/sincpi_at_zero_dd.sollya |
45 | | **/ |
46 | | #[cold] |
47 | 0 | fn as_sincpi_zero(x: f64) -> f64 { |
48 | | const C: [(u64, u64); 7] = [ |
49 | | (0xb9d3080000000000, 0x3ff0000000000000), |
50 | | (0xbc81873d86314302, 0xbffa51a6625307d3), |
51 | | (0x3c84871b4ffeefae, 0x3fe9f9cb402bc46c), |
52 | | (0xbc5562d6ae037010, 0xbfc86a8e4720db66), |
53 | | (0xbc386c93f4549bac, 0x3f9ac6805cf31ffd), |
54 | | (0x3c0dbda368edfa40, 0xbf633816a3399d4e), |
55 | | (0xbbcf22ccc18f27a9, 0x3f23736e6a59edd9), |
56 | | ]; |
57 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
58 | 0 | let mut p = DoubleDouble::quick_mul_add( |
59 | 0 | x2, |
60 | 0 | DoubleDouble::from_bit_pair(C[6]), |
61 | 0 | DoubleDouble::from_bit_pair(C[5]), |
62 | | ); |
63 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[4])); |
64 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3])); |
65 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2])); |
66 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1])); |
67 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0])); |
68 | 0 | p.to_f64() |
69 | 0 | } |
70 | | |
71 | | /// Computes sin(PI\*x)/(PI\*x) |
72 | | /// |
73 | | /// Produces normalized sinc. |
74 | | /// |
75 | | /// Max ULP 0.5 |
76 | 0 | pub fn f_sincpi(x: f64) -> f64 { |
77 | 0 | let ix = x.to_bits(); |
78 | 0 | let ax = ix & 0x7fff_ffff_ffff_ffff; |
79 | 0 | if ax == 0 { |
80 | 0 | return 1.; |
81 | 0 | } |
82 | 0 | let e: i32 = (ax >> 52) as i32; |
83 | 0 | let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52); |
84 | 0 | let sgn: i64 = (ix as i64) >> 63; |
85 | 0 | let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn); |
86 | 0 | let mut s: i32 = 1063i32.wrapping_sub(e); |
87 | 0 | if s < 0 { |
88 | 0 | if e == 0x7ff { |
89 | 0 | if (ix << 12) == 0 { |
90 | 0 | return f64::NAN; |
91 | 0 | } |
92 | 0 | return x + x; // case x=NaN |
93 | 0 | } |
94 | 0 | s = -s - 1; |
95 | 0 | if s > 10 { |
96 | 0 | return f64::copysign(0.0, x); |
97 | 0 | } |
98 | 0 | let iq: u64 = (m as u64).wrapping_shl(s as u32); |
99 | 0 | if (iq & 2047) == 0 { |
100 | 0 | return f64::copysign(0.0, x); |
101 | 0 | } |
102 | 0 | } |
103 | | |
104 | 0 | if ax <= 0x3fa2000000000000u64 { |
105 | | // |x| <= 0.03515625 |
106 | | |
107 | 0 | if ax < 0x3c90000000000000u64 { |
108 | | // |x| < f64::EPSILON |
109 | 0 | if ax <= 0x3b05798ee2308c3au64 { |
110 | | // |x| <= 2.2204460492503131e-24 |
111 | 0 | return 1.; |
112 | 0 | } |
113 | | // Small values approximated with Taylor poly |
114 | | // sincpi(x) ~ 1 - x^2*Pi^2/6 + O(x^4) |
115 | | #[cfg(any( |
116 | | all( |
117 | | any(target_arch = "x86", target_arch = "x86_64"), |
118 | | target_feature = "fma" |
119 | | ), |
120 | | target_arch = "aarch64" |
121 | | ))] |
122 | | { |
123 | | const M_SQR_PI_OVER_6: f64 = f64::from_bits(0xbffa51a6625307d3); |
124 | | let p = f_fmla(x, M_SQR_PI_OVER_6 * x, 1.); |
125 | | return p; |
126 | | } |
127 | | #[cfg(not(any( |
128 | | all( |
129 | | any(target_arch = "x86", target_arch = "x86_64"), |
130 | | target_feature = "fma" |
131 | | ), |
132 | | target_arch = "aarch64" |
133 | | )))] |
134 | | { |
135 | | use crate::common::min_normal_f64; |
136 | 0 | return 1. - min_normal_f64(); |
137 | | } |
138 | 0 | } |
139 | | |
140 | | // Poly generated by Sollya: |
141 | | // d = [0, 0.03515625]; |
142 | | // f_sincpi = sin(y*pi)/(y*pi); |
143 | | // Q = fpminimax(f_sincpi, [|0, 2, 4, 6, 8, 10|], [|107, D...|], d, relative, floating); |
144 | | // See ./notes/sincpi_at_zero.sollya |
145 | | |
146 | 0 | let x2 = x * x; |
147 | | |
148 | 0 | let eps = x * f_fmla( |
149 | 0 | x2, |
150 | 0 | f64::from_bits(0x3d00000000000000), // 2^-47 |
151 | 0 | f64::from_bits(0x3bd0000000000000), // 2^-66 |
152 | 0 | ); |
153 | | |
154 | | const C: [u64; 5] = [ |
155 | | 0xbffa51a6625307d3, |
156 | | 0x3fe9f9cb402bbeaa, |
157 | | 0xbfc86a8e466bbb5b, |
158 | | 0x3f9ac66d887e2f38, |
159 | | 0xbf628473a38d289a, |
160 | | ]; |
161 | | |
162 | | const F: DoubleDouble = |
163 | | DoubleDouble::from_bit_pair((0xbb93f0a925810000, 0x3ff0000000000000)); |
164 | | |
165 | 0 | let p = f_estrin_polyeval5( |
166 | 0 | x2, |
167 | 0 | f64::from_bits(C[0]), |
168 | 0 | f64::from_bits(C[1]), |
169 | 0 | f64::from_bits(C[2]), |
170 | 0 | f64::from_bits(C[3]), |
171 | 0 | f64::from_bits(C[4]), |
172 | | ); |
173 | 0 | let v = DoubleDouble::from_exact_mult(p, x2); |
174 | 0 | let z = DoubleDouble::add(F, v); |
175 | | |
176 | 0 | let lb = z.hi + (z.lo - eps); |
177 | 0 | let ub = z.hi + (z.lo + eps); |
178 | 0 | if lb == ub { |
179 | 0 | return lb; |
180 | 0 | } |
181 | 0 | return as_sincpi_zero(x); |
182 | 0 | } |
183 | | |
184 | 0 | let si = e.wrapping_sub(1011); |
185 | 0 | if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 { |
186 | | // x is integer or half-integer |
187 | 0 | if (m0.wrapping_shl(si as u32)) == 0 { |
188 | 0 | return f64::copysign(0.0, x); // x is integer |
189 | 0 | } |
190 | 0 | let t = (m0.wrapping_shl((si - 1) as u32)) >> 63; |
191 | | // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2 |
192 | | #[cfg(any( |
193 | | all( |
194 | | any(target_arch = "x86", target_arch = "x86_64"), |
195 | | target_feature = "fma" |
196 | | ), |
197 | | target_arch = "aarch64" |
198 | | ))] |
199 | | { |
200 | | let num = if t == 0 { |
201 | | f64::copysign(1.0, x) |
202 | | } else { |
203 | | -f64::copysign(1.0, x) |
204 | | }; |
205 | | const PI: DoubleDouble = |
206 | | DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18)); |
207 | | let r = DoubleDouble::quick_mult_f64(PI, x); |
208 | | let v = DoubleDouble::from_f64_div_dd(num, r); |
209 | | return v.to_f64(); |
210 | | } |
211 | | |
212 | | #[cfg(not(any( |
213 | | all( |
214 | | any(target_arch = "x86", target_arch = "x86_64"), |
215 | | target_feature = "fma" |
216 | | ), |
217 | | target_arch = "aarch64" |
218 | | )))] |
219 | | { |
220 | | use crate::double_double::two_product_compatible; |
221 | 0 | if two_product_compatible(x) { |
222 | 0 | let num = if t == 0 { |
223 | 0 | f64::copysign(1.0, x) |
224 | | } else { |
225 | 0 | -f64::copysign(1.0, x) |
226 | | }; |
227 | | const PI: DoubleDouble = |
228 | | DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18)); |
229 | 0 | let r = DoubleDouble::quick_mult_f64(PI, x); |
230 | 0 | let v = DoubleDouble::from_f64_div_dd(num, r); |
231 | 0 | return v.to_f64(); |
232 | | } else { |
233 | | use crate::dyadic_float::{DyadicFloat128, DyadicSign}; |
234 | 0 | let num = DyadicFloat128::new_from_f64(if t == 0 { |
235 | 0 | f64::copysign(1.0, x) |
236 | | } else { |
237 | 0 | -f64::copysign(1.0, x) |
238 | | }); |
239 | | const PI: DyadicFloat128 = DyadicFloat128 { |
240 | | sign: DyadicSign::Pos, |
241 | | exponent: -126, |
242 | | mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128, |
243 | | }; |
244 | 0 | let dx = DyadicFloat128::new_from_f64(x); |
245 | 0 | let r = (PI * dx).reciprocal(); |
246 | 0 | return (num * r).fast_as_f64(); |
247 | | } |
248 | | } |
249 | 0 | } |
250 | | |
251 | 0 | let (y, k) = reduce_pi_64(x); |
252 | | |
253 | | // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64). |
254 | 0 | let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]); |
255 | 0 | let cos_k = DoubleDouble::from_bit_pair( |
256 | 0 | SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize], |
257 | | ); |
258 | | |
259 | 0 | let r_sincos = crate::sincospi::sincospi_eval(y); |
260 | | |
261 | | const PI: DoubleDouble = DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18)); |
262 | 0 | let scale = DoubleDouble::quick_mult_f64(PI, x); |
263 | | |
264 | 0 | let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos); |
265 | 0 | let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin); |
266 | | |
267 | | // sin_k_cos_y is always >> cos_k_sin_y |
268 | 0 | let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi); |
269 | 0 | rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo; |
270 | 0 | rr = DoubleDouble::div(rr, scale); |
271 | | |
272 | 0 | let ub = rr.hi + (rr.lo + r_sincos.err); // (rr.lo + ERR); |
273 | 0 | let lb = rr.hi + (rr.lo - r_sincos.err); // (rr.lo - ERR); |
274 | | |
275 | 0 | if ub == lb { |
276 | 0 | return rr.to_f64(); |
277 | 0 | } |
278 | 0 | sincpi_dd(y, sin_k, cos_k, scale) |
279 | 0 | } |
280 | | |
281 | | #[cold] |
282 | 0 | fn sincpi_dd(x: f64, sin_k: DoubleDouble, cos_k: DoubleDouble, scale: DoubleDouble) -> f64 { |
283 | 0 | let r_sincos = crate::sincospi::sincospi_eval_dd(x); |
284 | 0 | let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin); |
285 | 0 | let mut rr = DoubleDouble::mul_add(sin_k, r_sincos.v_cos, cos_k_sin_y); |
286 | 0 | rr = DoubleDouble::div(rr, scale); |
287 | 0 | rr.to_f64() |
288 | 0 | } |
289 | | |
290 | | #[cfg(test)] |
291 | | mod tests { |
292 | | use super::*; |
293 | | |
294 | | #[test] |
295 | | fn test_sincpi_zero() { |
296 | | assert_eq!(f_sincpi(2.2204460492503131e-24), 1.0); |
297 | | assert_eq!(f_sincpi(f64::EPSILON), 1.0); |
298 | | assert_eq!(f_sincpi(0.007080019335262543), 0.9999175469662566); |
299 | | assert_eq!(f_sincpi(0.05468860710998057), 0.9950875152844803); |
300 | | assert_eq!(f_sincpi(0.5231231231), 0.6068750737806441); |
301 | | assert_eq!(f_sincpi(1.), 0.); |
302 | | assert_eq!(f_sincpi(-1.), 0.); |
303 | | assert_eq!(f_sincpi(-2.), 0.); |
304 | | assert_eq!(f_sincpi(-3.), 0.); |
305 | | assert!(f_sincpi(f64::INFINITY).is_nan()); |
306 | | assert!(f_sincpi(f64::NEG_INFINITY).is_nan()); |
307 | | assert!(f_sincpi(f64::NAN).is_nan()); |
308 | | } |
309 | | } |