/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.26/src/sin.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::{dyad_fmla, f_fmla, min_normal_f64}; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::dyadic_float::{DyadicFloat128, DyadicSign}; |
32 | | use crate::rounding::CpuRound; |
33 | | use crate::sin_helper::sincos_eval_dd; |
34 | | use crate::sin_table::SIN_K_PI_OVER_128; |
35 | | use crate::sincos_dyadic::SIN_K_PI_OVER_128_F128; |
36 | | use crate::sincos_reduce::LargeArgumentReduction; |
37 | | |
38 | | // For 2^-7 < |x| < 2^16, return k and u such that: |
39 | | // k = round(x * 128/pi) |
40 | | // x mod pi/128 = x - k * pi/128 ~ u.hi + u.lo |
41 | | // Error bound: |
42 | | // |(x - k * pi/128) - (u_hi + u_lo)| <= max(ulp(ulp(u_hi)), 2^-119) |
43 | | // <= 2^-111. |
44 | | #[inline] |
45 | 0 | pub(crate) fn range_reduction_small(x: f64) -> (DoubleDouble, u64) { |
46 | | const MPI_OVER_128: [u64; 3] = [0xbf9921fb54400000, 0xbd70b4611a600000, 0xbb43198a2e037073]; |
47 | | const ONE_TWENTY_EIGHT_OVER_PI_D: f64 = f64::from_bits(0x40445f306dc9c883); |
48 | 0 | let prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI_D; |
49 | 0 | let kd = prod_hi.cpu_round(); |
50 | | |
51 | | // Let y = x - k * (pi/128) |
52 | | // Then |y| < pi / 256 |
53 | | // With extra rounding errors, we can bound |y| < 1.6 * 2^-7. |
54 | 0 | let y_hi = f_fmla(kd, f64::from_bits(MPI_OVER_128[0]), x); // Exact |
55 | | // |u.hi| < 1.6*2^-7 |
56 | 0 | let u_hi = f_fmla(kd, f64::from_bits(MPI_OVER_128[1]), y_hi); |
57 | | |
58 | 0 | let u0 = y_hi - u_hi; // Exact |
59 | | // |u.lo| <= max(ulp(u.hi), |kd * MPI_OVER_128[2]|) |
60 | 0 | let u1 = f_fmla(kd, f64::from_bits(MPI_OVER_128[1]), u0); // Exact |
61 | 0 | let u_lo = f_fmla(kd, f64::from_bits(MPI_OVER_128[2]), u1); |
62 | | // Error bound: |
63 | | // |x - k * pi/128| - (u.hi + u.lo) <= ulp(u.lo) |
64 | | // <= ulp(max(ulp(u.hi), kd*MPI_OVER_128[2])) |
65 | | // <= 2^(-7 - 104) = 2^-111. |
66 | 0 | (DoubleDouble::new(u_lo, u_hi), unsafe { |
67 | 0 | kd.to_int_unchecked::<i64>() as u64 // indeterminate values is always filtered out before this call, as well only lowest bits are used |
68 | 0 | }) |
69 | 0 | } |
70 | | |
71 | | #[inline] |
72 | 0 | pub(crate) fn get_sin_k_rational(kk: u64) -> DyadicFloat128 { |
73 | 0 | let idx = if (kk & 64) != 0 { |
74 | 0 | 64 - (kk & 63) |
75 | | } else { |
76 | 0 | kk & 63 |
77 | | }; |
78 | 0 | let mut ans = SIN_K_PI_OVER_128_F128[idx as usize]; |
79 | 0 | if (kk & 128) != 0 { |
80 | 0 | ans.sign = DyadicSign::Neg; |
81 | 0 | } |
82 | 0 | ans |
83 | 0 | } |
84 | | |
85 | | pub(crate) struct SinCos { |
86 | | pub(crate) v_sin: DoubleDouble, |
87 | | pub(crate) v_cos: DoubleDouble, |
88 | | pub(crate) err: f64, |
89 | | } |
90 | | |
91 | | #[inline] |
92 | 0 | pub(crate) fn sincos_eval(u: DoubleDouble) -> SinCos { |
93 | | // Evaluate sin(y) = sin(x - k * (pi/128)) |
94 | | // We use the degree-7 Taylor approximation: |
95 | | // sin(y) ~ y - y^3/3! + y^5/5! - y^7/7! |
96 | | // Then the error is bounded by: |
97 | | // |sin(y) - (y - y^3/3! + y^5/5! - y^7/7!)| < |y|^9/9! < 2^-54/9! < 2^-72. |
98 | | // For y ~ u_hi + u_lo, fully expanding the polynomial and drop any terms |
99 | | // < ulp(u_hi^3) gives us: |
100 | | // y - y^3/3! + y^5/5! - y^7/7! = ... |
101 | | // ~ u_hi + u_hi^3 * (-1/6 + u_hi^2 * (1/120 - u_hi^2 * 1/5040)) + |
102 | | // + u_lo (1 + u_hi^2 * (-1/2 + u_hi^2 / 24)) |
103 | 0 | let u_hi_sq = u.hi * u.hi; // Error < ulp(u_hi^2) < 2^(-6 - 52) = 2^-58. |
104 | | // p1 ~ 1/120 + u_hi^2 / 5040. |
105 | 0 | let p1 = f_fmla( |
106 | 0 | u_hi_sq, |
107 | 0 | f64::from_bits(0xbf2a01a01a01a01a), |
108 | 0 | f64::from_bits(0x3f81111111111111), |
109 | | ); |
110 | | // q1 ~ -1/2 + u_hi^2 / 24. |
111 | 0 | let q1 = f_fmla( |
112 | 0 | u_hi_sq, |
113 | 0 | f64::from_bits(0x3fa5555555555555), |
114 | 0 | f64::from_bits(0xbfe0000000000000), |
115 | | ); |
116 | 0 | let u_hi_3 = u_hi_sq * u.hi; |
117 | | // p2 ~ -1/6 + u_hi^2 (1/120 - u_hi^2 * 1/5040) |
118 | 0 | let p2 = f_fmla(u_hi_sq, p1, f64::from_bits(0xbfc5555555555555)); |
119 | | // q2 ~ 1 + u_hi^2 (-1/2 + u_hi^2 / 24) |
120 | 0 | let q2 = f_fmla(u_hi_sq, q1, 1.0); |
121 | 0 | let sin_lo = f_fmla(u_hi_3, p2, u.lo * q2); |
122 | | // Overall, |sin(y) - (u_hi + sin_lo)| < 2*ulp(u_hi^3) < 2^-69. |
123 | | |
124 | | // Evaluate cos(y) = cos(x - k * (pi/128)) |
125 | | // We use the degree-8 Taylor approximation: |
126 | | // cos(y) ~ 1 - y^2/2 + y^4/4! - y^6/6! + y^8/8! |
127 | | // Then the error is bounded by: |
128 | | // |cos(y) - (...)| < |y|^10/10! < 2^-81 |
129 | | // For y ~ u_hi + u_lo, fully expanding the polynomial and drop any terms |
130 | | // < ulp(u_hi^3) gives us: |
131 | | // 1 - y^2/2 + y^4/4! - y^6/6! + y^8/8! = ... |
132 | | // ~ 1 - u_hi^2/2 + u_hi^4(1/24 + u_hi^2 (-1/720 + u_hi^2/40320)) + |
133 | | // + u_hi u_lo (-1 + u_hi^2/6) |
134 | | // We compute 1 - u_hi^2 accurately: |
135 | | // v_hi + v_lo ~ 1 - u_hi^2/2 |
136 | | // with error <= 2^-105. |
137 | 0 | let u_hi_neg_half = (-0.5) * u.hi; |
138 | | |
139 | | let (mut v_lo, v_hi); |
140 | | |
141 | | #[cfg(any( |
142 | | all( |
143 | | any(target_arch = "x86", target_arch = "x86_64"), |
144 | | target_feature = "fma" |
145 | | ), |
146 | | target_arch = "aarch64" |
147 | | ))] |
148 | | { |
149 | | v_hi = f_fmla(u.hi, u_hi_neg_half, 1.0); |
150 | | v_lo = 1.0 - v_hi; // Exact |
151 | | v_lo = f_fmla(u.hi, u_hi_neg_half, v_lo); |
152 | | } |
153 | | |
154 | | #[cfg(not(any( |
155 | | all( |
156 | | any(target_arch = "x86", target_arch = "x86_64"), |
157 | | target_feature = "fma" |
158 | | ), |
159 | | target_arch = "aarch64" |
160 | | )))] |
161 | 0 | { |
162 | 0 | let u_hi_sq_neg_half = DoubleDouble::from_exact_mult(u.hi, u_hi_neg_half); |
163 | 0 | let v = DoubleDouble::from_exact_add(1.0, u_hi_sq_neg_half.hi); |
164 | 0 | v_lo = v.lo; |
165 | 0 | v_lo += u_hi_sq_neg_half.lo; |
166 | 0 | v_hi = v.hi; |
167 | 0 | } |
168 | | |
169 | | // r1 ~ -1/720 + u_hi^2 / 40320 |
170 | 0 | let r1 = f_fmla( |
171 | 0 | u_hi_sq, |
172 | 0 | f64::from_bits(0x3efa01a01a01a01a), |
173 | 0 | f64::from_bits(0xbf56c16c16c16c17), |
174 | | ); |
175 | | // s1 ~ -1 + u_hi^2 / 6 |
176 | 0 | let s1 = f_fmla(u_hi_sq, f64::from_bits(0x3fc5555555555555), -1.0); |
177 | 0 | let u_hi_4 = u_hi_sq * u_hi_sq; |
178 | 0 | let u_hi_u_lo = u.hi * u.lo; |
179 | | // r2 ~ 1/24 + u_hi^2 (-1/720 + u_hi^2 / 40320) |
180 | 0 | let r2 = f_fmla(u_hi_sq, r1, f64::from_bits(0x3fa5555555555555)); |
181 | | // s2 ~ v_lo + u_hi * u_lo * (-1 + u_hi^2 / 6) |
182 | 0 | let s2 = f_fmla(u_hi_u_lo, s1, v_lo); |
183 | 0 | let cos_lo = f_fmla(u_hi_4, r2, s2); |
184 | | // Overall, |cos(y) - (v_hi + cos_lo)| < 2*ulp(u_hi^4) < 2^-75. |
185 | | |
186 | 0 | let sin_u = DoubleDouble::from_exact_add(u.hi, sin_lo); |
187 | 0 | let cos_u = DoubleDouble::from_exact_add(v_hi, cos_lo); |
188 | | |
189 | 0 | let err = f_fmla( |
190 | 0 | u_hi_3, |
191 | 0 | f64::from_bits(0x3cc0000000000000), |
192 | 0 | f64::from_bits(0x3960000000000000), |
193 | | ); |
194 | | |
195 | 0 | SinCos { |
196 | 0 | v_sin: sin_u, |
197 | 0 | v_cos: cos_u, |
198 | 0 | err, |
199 | 0 | } |
200 | 0 | } |
201 | | |
202 | | #[cold] |
203 | | #[inline(never)] |
204 | 0 | fn sin_accurate(y: DoubleDouble, sin_k: DoubleDouble, cos_k: DoubleDouble) -> f64 { |
205 | 0 | let r_sincos = sincos_eval_dd(y); |
206 | | |
207 | | // k is an integer and -pi / 256 <= y <= pi / 256. |
208 | | // Then sin(x) = sin((k * pi/128 + y) |
209 | | // = sin(y) * cos(k*pi/128) + cos(y) * sin(k*pi/128) |
210 | | |
211 | 0 | let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k); |
212 | 0 | let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k); |
213 | | |
214 | 0 | let rr = DoubleDouble::full_dd_add(sin_k_cos_y, cos_k_sin_y); |
215 | 0 | rr.to_f64() |
216 | 0 | } |
217 | | |
218 | | #[cold] |
219 | 0 | pub(crate) fn sin_accurate_near_zero(x: f64) -> f64 { |
220 | | // Generated by Sollya: |
221 | | // d = [0, 0.0490874]; |
222 | | // f_sin = sin(y)/y; |
223 | | // Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10, 12|], [|107...|], d, relative, floating); |
224 | | const C: [(u64, u64); 7] = [ |
225 | | (0x0000000000000000, 0x3ff0000000000000), |
226 | | (0xbc65555555555217, 0xbfc5555555555555), |
227 | | (0x3c011110f7d49e8c, 0x3f81111111111111), |
228 | | (0xbb65e5b30986fc80, 0xbf2a01a01a01a01a), |
229 | | (0xbb689e86245bd566, 0x3ec71de3a556c6e5), |
230 | | (0xbaccb3d55ccfca58, 0xbe5ae64567954935), |
231 | | (0x3a6333ef7a00ce40, 0x3de6120ca00ce3a2), |
232 | | ]; |
233 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
234 | 0 | let mut p = DoubleDouble::quick_mul_add( |
235 | 0 | x2, |
236 | 0 | DoubleDouble::from_bit_pair(C[6]), |
237 | 0 | DoubleDouble::from_bit_pair(C[5]), |
238 | | ); |
239 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[4])); |
240 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3])); |
241 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2])); |
242 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1])); |
243 | 0 | p = DoubleDouble::quick_mul_add_f64(x2, p, f64::from_bits(0x3ff0000000000000)); |
244 | 0 | p = DoubleDouble::quick_mult_f64(p, x); |
245 | 0 | p.to_f64() |
246 | 0 | } |
247 | | |
248 | | /// Sine for double precision |
249 | | /// |
250 | | /// ULP 0.5 |
251 | 0 | pub fn f_sin(x: f64) -> f64 { |
252 | 0 | let x_e = (x.to_bits() >> 52) & 0x7ff; |
253 | | const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64; |
254 | | |
255 | | let y: DoubleDouble; |
256 | | let k; |
257 | | |
258 | 0 | let mut argument_reduction = LargeArgumentReduction::default(); |
259 | | |
260 | 0 | if x_e < E_BIAS + 16 { |
261 | | // |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA) |
262 | 0 | let ax = x.to_bits() & 0x7fff_ffff_ffff_ffff; |
263 | 0 | if ax <= 0x3fa921fbd34a9550 { |
264 | | // |x| <= 0.0490874 |
265 | 0 | if x_e < E_BIAS - 26 { |
266 | | // |x| < 2^-26 |
267 | 0 | if x == 0.0 { |
268 | | // Signed zeros. |
269 | 0 | return x; |
270 | 0 | } |
271 | | |
272 | | // For |x| < 2^-26, |sin(x) - x| < ulp(x)/2. |
273 | 0 | return dyad_fmla(x, f64::from_bits(0xbc90000000000000), x); |
274 | 0 | } |
275 | | |
276 | | // Polynomial for sin(x)/x |
277 | | // Generated by Sollya: |
278 | | // d = [0, 0.0490874]; |
279 | | // f_sin = sin(y)/y; |
280 | | // Q = fpminimax(f_sin, [|0, 2, 4, 6, 8|], [|1, D...|], d, relative, floating); |
281 | | |
282 | 0 | let x2 = x * x; |
283 | 0 | let x4 = x2 * x2; |
284 | | const C: [u64; 4] = [ |
285 | | 0xbfc5555555555555, |
286 | | 0x3f8111111110e45a, |
287 | | 0xbf2a019ffd7fdaaf, |
288 | | 0x3ec71819b9bf01ef, |
289 | | ]; |
290 | 0 | let p01 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0])); |
291 | 0 | let p23 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2])); |
292 | 0 | let w0 = f_fmla(x4, p23, p01); |
293 | 0 | let w1 = x2 * w0 * x; |
294 | 0 | let r = DoubleDouble::from_exact_add(x, w1); |
295 | 0 | let err = f_fmla( |
296 | 0 | x2, |
297 | 0 | f64::from_bits(0x3cb0000000000000), // 2^-52 |
298 | 0 | f64::from_bits(0x3be0000000000000), // 2^-65 |
299 | | ); |
300 | 0 | let ub = r.hi + (r.lo + err); |
301 | 0 | let lb = r.hi + (r.lo - err); |
302 | 0 | if ub == lb { |
303 | 0 | return ub; |
304 | 0 | } |
305 | 0 | return sin_accurate_near_zero(x); |
306 | 0 | } |
307 | | |
308 | | // Small range reduction. |
309 | 0 | (y, k) = range_reduction_small(x); |
310 | | } else { |
311 | | // Inf or NaN |
312 | 0 | if x_e > 2 * E_BIAS { |
313 | | // sin(+-Inf) = NaN |
314 | 0 | return x + f64::NAN; |
315 | 0 | } |
316 | | |
317 | | // Large range reduction. |
318 | 0 | (k, y) = argument_reduction.reduce(x); |
319 | | } |
320 | | |
321 | 0 | let r_sincos = sincos_eval(y); |
322 | | |
323 | | // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128). |
324 | 0 | let sk = SIN_K_PI_OVER_128[(k & 255) as usize]; |
325 | 0 | let ck = SIN_K_PI_OVER_128[((k.wrapping_add(64)) & 255) as usize]; |
326 | | |
327 | 0 | let sin_k = DoubleDouble::from_bit_pair(sk); |
328 | 0 | let cos_k = DoubleDouble::from_bit_pair(ck); |
329 | | |
330 | 0 | let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k); |
331 | 0 | let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k); |
332 | | |
333 | | // sin_k_cos_y is always >> cos_k_sin_y |
334 | 0 | let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi); |
335 | 0 | rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo; |
336 | | |
337 | 0 | let rlp = rr.lo + r_sincos.err; |
338 | 0 | let rlm = rr.lo - r_sincos.err; |
339 | | |
340 | 0 | let r_upper = rr.hi + rlp; // (rr.lo + ERR); |
341 | 0 | let r_lower = rr.hi + rlm; // (rr.lo - ERR); |
342 | | |
343 | | // Ziv's accuracy test |
344 | 0 | if r_upper == r_lower { |
345 | 0 | return rr.to_f64(); |
346 | 0 | } |
347 | | |
348 | 0 | sin_accurate(y, sin_k, cos_k) |
349 | 0 | } |
350 | | |
351 | | #[cold] |
352 | | #[inline(never)] |
353 | 0 | fn cos_accurate(y: DoubleDouble, msin_k: DoubleDouble, cos_k: DoubleDouble) -> f64 { |
354 | 0 | let r_sincos = sincos_eval_dd(y); |
355 | | |
356 | | // After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128). |
357 | | // So k is an integer and -pi / 256 <= y <= pi / 256. |
358 | | // Then sin(x) = sin((k * pi/128 + y) |
359 | | // = sin(y) * cos(k*pi/128) + cos(y) * sin(k*pi/128) |
360 | | |
361 | 0 | let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, cos_k); |
362 | 0 | let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, msin_k); |
363 | | |
364 | 0 | let rr = DoubleDouble::full_dd_add(sin_k_cos_y, cos_k_sin_y); |
365 | 0 | rr.to_f64() |
366 | 0 | } |
367 | | |
368 | | #[cold] |
369 | 0 | pub(crate) fn cos_accurate_near_zero(x: f64) -> f64 { |
370 | | // Generated by Sollya: |
371 | | // d = [0, 0.0490874]; |
372 | | // f_sin = sin(y)/y; |
373 | | // Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107...|], d, relative, floating); |
374 | | const C: [(u64, u64); 6] = [ |
375 | | (0xba2fa1c000000000, 0x3ff0000000000000), |
376 | | (0x3b1cd6ead3800000, 0xbfe0000000000000), |
377 | | (0x3c45112931063916, 0x3fa5555555555555), |
378 | | (0xbba1c1e3ab3b09e0, 0xbf56c16c16c16b2b), |
379 | | (0x3b937bc2f4ea7db6, 0x3efa01a019495fca), |
380 | | (0x3b307fd5e1b021b3, 0xbe927e0d57d894f8), |
381 | | ]; |
382 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
383 | 0 | let mut p = DoubleDouble::quick_mul_add( |
384 | 0 | x2, |
385 | 0 | DoubleDouble::from_bit_pair(C[5]), |
386 | 0 | DoubleDouble::from_bit_pair(C[4]), |
387 | | ); |
388 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3])); |
389 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2])); |
390 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1])); |
391 | 0 | p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0])); |
392 | 0 | p.to_f64() |
393 | 0 | } |
394 | | |
395 | | /// Cosine for double precision |
396 | | /// |
397 | | /// ULP 0.5 |
398 | 0 | pub fn f_cos(x: f64) -> f64 { |
399 | 0 | let x_e = (x.to_bits() >> 52) & 0x7ff; |
400 | | const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64; |
401 | | |
402 | | let y: DoubleDouble; |
403 | | let k; |
404 | | |
405 | 0 | let mut argument_reduction = LargeArgumentReduction::default(); |
406 | | |
407 | 0 | if x_e < E_BIAS + 16 { |
408 | | // |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA) |
409 | 0 | let ax = x.to_bits() & 0x7fff_ffff_ffff_ffff; |
410 | 0 | if ax <= 0x3fa921fbd34a9550 { |
411 | | // |x| <= 0.0490874 |
412 | 0 | if x_e < E_BIAS - 27 { |
413 | | // |x| < 2^-27 |
414 | 0 | if x == 0.0 { |
415 | | // Signed zeros. |
416 | 0 | return 1.0; |
417 | 0 | } |
418 | | // For |x| < 2^-26, |sin(x) - x| < ulp(x)/2. |
419 | 0 | return 1.0 - min_normal_f64(); |
420 | 0 | } |
421 | | |
422 | | // Polynomial for cos(x) |
423 | | // Generated by Sollya: |
424 | | // d = [0, 0.0490874]; |
425 | | // f_cos = cos(y); |
426 | | // Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|1, D...|], d, relative, floating); |
427 | | |
428 | 0 | let x2 = x * x; |
429 | 0 | let x4 = x2 * x2; |
430 | | const C: [u64; 4] = [ |
431 | | 0xbfe0000000000000, |
432 | | 0x3fa55555555554a4, |
433 | | 0xbf56c16c1619b84a, |
434 | | 0x3efa013d3d01cf7f, |
435 | | ]; |
436 | 0 | let p01 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0])); |
437 | 0 | let p23 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2])); |
438 | 0 | let w0 = f_fmla(x4, p23, p01); |
439 | 0 | let w1 = x2 * w0; |
440 | 0 | let r = DoubleDouble::from_exact_add(1., w1); |
441 | 0 | let err = f_fmla( |
442 | 0 | x2, |
443 | 0 | f64::from_bits(0x3cb0000000000000), // 2^-52 |
444 | 0 | f64::from_bits(0x3be0000000000000), // 2^-65 |
445 | | ); |
446 | | |
447 | 0 | let ub = r.hi + (r.lo + err); |
448 | 0 | let lb = r.hi + (r.lo - err); |
449 | 0 | if ub == lb { |
450 | 0 | return ub; |
451 | 0 | } |
452 | 0 | return cos_accurate_near_zero(x); |
453 | 0 | } else { |
454 | 0 | // // Small range reduction. |
455 | 0 | (y, k) = range_reduction_small(x); |
456 | 0 | } |
457 | | } else { |
458 | | // Inf or NaN |
459 | 0 | if x_e > 2 * E_BIAS { |
460 | | // cos(+-Inf) = NaN |
461 | 0 | return x + f64::NAN; |
462 | 0 | } |
463 | | |
464 | | // Large range reduction. |
465 | 0 | (k, y) = argument_reduction.reduce(x); |
466 | | } |
467 | 0 | let r_sincos = sincos_eval(y); |
468 | | |
469 | | // After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128). |
470 | | // So k is an integer and -pi / 256 <= y <= pi / 256. |
471 | | // Then cos(x) = cos((k * pi/128 + y) |
472 | | // = cos(y) * cos(k*pi/128) - sin(y) * sin(k*pi/128) |
473 | 0 | let sk = SIN_K_PI_OVER_128[(k.wrapping_add(128) & 255) as usize]; |
474 | 0 | let ck = SIN_K_PI_OVER_128[((k.wrapping_add(64)) & 255) as usize]; |
475 | 0 | let msin_k = DoubleDouble::from_bit_pair(sk); |
476 | 0 | let cos_k = DoubleDouble::from_bit_pair(ck); |
477 | | |
478 | 0 | let cos_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, cos_k); |
479 | 0 | let cos_k_msin_y = DoubleDouble::quick_mult(r_sincos.v_sin, msin_k); |
480 | | |
481 | | // cos_k_cos_y is always >> cos_k_msin_y |
482 | 0 | let mut rr = DoubleDouble::from_exact_add(cos_k_cos_y.hi, cos_k_msin_y.hi); |
483 | 0 | rr.lo += cos_k_cos_y.lo + cos_k_msin_y.lo; |
484 | 0 | let rlp = rr.lo + r_sincos.err; |
485 | 0 | let rlm = rr.lo - r_sincos.err; |
486 | | |
487 | 0 | let r_upper = rr.hi + rlp; // (rr.lo + ERR); |
488 | 0 | let r_lower = rr.hi + rlm; // (rr.lo - ERR); |
489 | | |
490 | | // Ziv's accuracy test |
491 | 0 | if r_upper == r_lower { |
492 | 0 | return rr.to_f64(); |
493 | 0 | } |
494 | 0 | cos_accurate(y, msin_k, cos_k) |
495 | 0 | } |
496 | | |
497 | | #[cfg(test)] |
498 | | mod tests { |
499 | | use super::*; |
500 | | |
501 | | #[test] |
502 | | fn cos_test() { |
503 | | assert_eq!(f_cos(0.0), 1.0); |
504 | | assert_eq!(f_cos(0.0024312312), 0.9999970445588818); |
505 | | assert_eq!(f_cos(1.0), 0.5403023058681398); |
506 | | assert_eq!(f_cos(-0.5), 0.8775825618903728); |
507 | | assert!(f_cos(f64::INFINITY).is_nan()); |
508 | | assert!(f_cos(f64::NEG_INFINITY).is_nan()); |
509 | | assert!(f_cos(f64::NAN).is_nan()); |
510 | | } |
511 | | |
512 | | #[test] |
513 | | fn sin_test() { |
514 | | assert_eq!(f_sin(0.00000002149119332890786), 0.00000002149119332890786); |
515 | | assert_eq!(f_sin(2.8477476437362989E-306), 2.8477476437362989E-306); |
516 | | assert_eq!(f_sin(0.0024312312), 0.002431228804879309); |
517 | | assert_eq!(f_sin(0.0), 0.0); |
518 | | assert_eq!(f_sin(1.0), 0.8414709848078965); |
519 | | assert_eq!(f_sin(-0.5), -0.479425538604203); |
520 | | assert!(f_sin(f64::INFINITY).is_nan()); |
521 | | assert!(f_sin(f64::NEG_INFINITY).is_nan()); |
522 | | assert!(f_sin(f64::NAN).is_nan()); |
523 | | } |
524 | | } |