Coverage Report

Created: 2026-01-22 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/sincospi.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::common::{dd_fmla, dyad_fmla, f_fmla, is_odd_integer};
30
use crate::double_double::DoubleDouble;
31
use crate::polyeval::{f_polyeval3, f_polyeval4};
32
use crate::rounding::CpuRound;
33
use crate::sin::SinCos;
34
use crate::sincospi_tables::SINPI_K_PI_OVER_64;
35
36
/**
37
Cospi(x) on [0; 0.000244140625]
38
39
Generated by Sollya:
40
```text
41
d = [0, 0.000244140625];
42
f_cos = cos(y*pi);
43
Q = fpminimax(f_cos, [|0, 2, 4, 6, 8, 10|], [|107...|], d, relative, floating);
44
```
45
46
See ./notes/cospi_zero_dd.sollya
47
**/
48
#[cold]
49
#[inline(always)]
50
0
fn as_cospi_zero<B: SinCosPiBackend>(x: f64, backend: &B) -> f64 {
51
    const C: [(u64, u64); 5] = [
52
        (0xbcb692b71366cc04, 0xc013bd3cc9be45de),
53
        (0xbcb32b33fb803bd5, 0x40103c1f081b5ac4),
54
        (0xbc9f5b752e98b088, 0xbff55d3c7e3cbff9),
55
        (0x3c30023d540b9350, 0x3fce1f506446cb66),
56
        (0x3c1a5d47937787d2, 0xbf8a9b062a36ba1c),
57
    ];
58
0
    let x2 = backend.exact_mult(x, x);
59
0
    let mut p = backend.quick_mul_add(
60
0
        x2,
61
0
        DoubleDouble::from_bit_pair(C[3]),
62
0
        DoubleDouble::from_bit_pair(C[3]),
63
    );
64
0
    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
65
0
    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
66
0
    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
67
0
    p = backend.mul_add_f64(x2, p, 1.);
68
0
    p.to_f64()
69
0
}
Unexecuted instantiation: pxfm::sincospi::as_cospi_zero::<pxfm::sincospi::FmaSinCosPiBackend>
Unexecuted instantiation: pxfm::sincospi::as_cospi_zero::<pxfm::sincospi::GenSinCosPiBackend>
70
71
/**
72
Sinpi on range [0.0, 0.03515625]
73
74
Generated poly by Sollya:
75
```text
76
d = [0, 0.03515625];
77
78
f_sin = sin(y*pi)/y;
79
Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107...|], d, relative, floating);
80
```
81
See ./notes/sinpi_zero_dd.sollya
82
**/
83
#[cold]
84
#[inline(always)]
85
0
fn as_sinpi_zero<B: SinCosPiBackend>(x: f64, backend: &B) -> f64 {
86
    const C: [(u64, u64); 6] = [
87
        (0x3ca1a626311d9056, 0x400921fb54442d18),
88
        (0x3cb055f12c462211, 0xc014abbce625be53),
89
        (0xbc9789ea63534250, 0x400466bc6775aae1),
90
        (0xbc78b86de6962184, 0xbfe32d2cce62874e),
91
        (0x3c4eddf7cd887302, 0x3fb507833e2b781f),
92
        (0x3bf180c9d4af2894, 0xbf7e2ea4e143707e),
93
    ];
94
0
    let x2 = backend.exact_mult(x, x);
95
0
    let mut p = backend.quick_mul_add(
96
0
        x2,
97
0
        DoubleDouble::from_bit_pair(C[5]),
98
0
        DoubleDouble::from_bit_pair(C[4]),
99
    );
100
0
    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
101
0
    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
102
0
    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
103
0
    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
104
0
    p = backend.quick_mult_f64(p, x);
105
0
    p.to_f64()
106
0
}
Unexecuted instantiation: pxfm::sincospi::as_sinpi_zero::<pxfm::sincospi::FmaSinCosPiBackend>
Unexecuted instantiation: pxfm::sincospi::as_sinpi_zero::<pxfm::sincospi::GenSinCosPiBackend>
107
108
// Return k and y, where
109
// k = round(x * 64 / pi) and y = (x * 64 / pi) - k.
110
#[inline]
111
0
pub(crate) fn reduce_pi_64(x: f64) -> (f64, i64) {
112
0
    let kd = (x * 64.).cpu_round();
113
0
    let y = dd_fmla(kd, -1. / 64., x);
114
0
    (y, unsafe {
115
0
        kd.to_int_unchecked::<i64>() // indeterminate values is always filtered out before this call, as well only lowest bits are used
116
0
    })
117
0
}
118
119
// Return k and y, where
120
// k = round(x * 64 / pi) and y = (x * 64 / pi) - k.
121
#[inline(always)]
122
#[allow(unused)]
123
0
pub(crate) fn reduce_pi_64_fma(x: f64) -> (f64, i64) {
124
0
    let kd = (x * 64.).round();
125
0
    let y = f64::mul_add(kd, -1. / 64., x);
126
0
    (y, unsafe {
127
0
        kd.to_int_unchecked::<i64>() // indeterminate values is always filtered out before this call, as well only lowest bits are used
128
0
    })
129
0
}
130
131
pub(crate) trait SinCosPiBackend {
132
    fn fma(&self, x: f64, y: f64, z: f64) -> f64;
133
    fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64;
134
    fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64;
135
    fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64;
136
    fn arg_reduce_pi_64(&self, x: f64) -> (f64, i64);
137
    fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble;
138
    fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble;
139
    fn odd_integer(&self, x: f64) -> bool;
140
    fn div(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble;
141
    fn mul_add_f64(&self, a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble;
142
    fn quick_mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble;
143
    fn mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble;
144
    fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble;
145
}
146
147
pub(crate) struct GenSinCosPiBackend {}
148
149
impl SinCosPiBackend for GenSinCosPiBackend {
150
    #[inline(always)]
151
0
    fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
152
0
        f_fmla(x, y, z)
153
0
    }
154
    #[inline(always)]
155
0
    fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64 {
156
0
        dd_fmla(x, y, z)
157
0
    }
158
    #[inline(always)]
159
0
    fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64 {
160
0
        dyad_fmla(x, y, z)
161
0
    }
162
    #[inline(always)]
163
0
    fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64 {
164
        use crate::polyeval::f_polyeval3;
165
0
        f_polyeval3(x, a0, a1, a2)
166
0
    }
167
    #[inline(always)]
168
0
    fn arg_reduce_pi_64(&self, x: f64) -> (f64, i64) {
169
0
        reduce_pi_64(x)
170
0
    }
171
    #[inline(always)]
172
0
    fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble {
173
0
        DoubleDouble::quick_mult_f64(x, y)
174
0
    }
175
    #[inline(always)]
176
0
    fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
177
0
        DoubleDouble::quick_mult(x, y)
178
0
    }
179
180
    #[inline(always)]
181
0
    fn odd_integer(&self, x: f64) -> bool {
182
0
        is_odd_integer(x)
183
0
    }
184
185
    #[inline(always)]
186
0
    fn div(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
187
0
        DoubleDouble::div(x, y)
188
0
    }
189
190
    #[inline(always)]
191
0
    fn mul_add_f64(&self, a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
192
0
        DoubleDouble::mul_add_f64(a, b, c)
193
0
    }
194
195
    #[inline(always)]
196
0
    fn quick_mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
197
0
        DoubleDouble::quick_mul_add(a, b, c)
198
0
    }
199
200
    #[inline(always)]
201
0
    fn mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
202
0
        DoubleDouble::mul_add(a, b, c)
203
0
    }
204
205
    #[inline(always)]
206
0
    fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble {
207
0
        DoubleDouble::from_exact_mult(x, y)
208
0
    }
209
}
210
211
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
212
pub(crate) struct FmaSinCosPiBackend {}
213
214
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
215
impl SinCosPiBackend for FmaSinCosPiBackend {
216
    #[inline(always)]
217
0
    fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
218
0
        f64::mul_add(x, y, z)
219
0
    }
220
    #[inline(always)]
221
0
    fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64 {
222
0
        f64::mul_add(x, y, z)
223
0
    }
224
    #[inline(always)]
225
0
    fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64 {
226
0
        f64::mul_add(x, y, z)
227
0
    }
228
    #[inline(always)]
229
0
    fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64 {
230
        use crate::polyeval::d_polyeval3;
231
0
        d_polyeval3(x, a0, a1, a2)
232
0
    }
233
    #[inline(always)]
234
0
    fn arg_reduce_pi_64(&self, x: f64) -> (f64, i64) {
235
0
        reduce_pi_64_fma(x)
236
0
    }
237
    #[inline(always)]
238
0
    fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble {
239
0
        DoubleDouble::quick_mult_f64_fma(x, y)
240
0
    }
241
    #[inline(always)]
242
0
    fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
243
0
        DoubleDouble::quick_mult_fma(x, y)
244
0
    }
245
246
    #[inline(always)]
247
0
    fn odd_integer(&self, x: f64) -> bool {
248
        use crate::common::is_odd_integer_fast;
249
0
        is_odd_integer_fast(x)
250
0
    }
251
252
    #[inline(always)]
253
0
    fn div(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
254
0
        DoubleDouble::div_fma(x, y)
255
0
    }
256
257
    #[inline(always)]
258
0
    fn mul_add_f64(&self, a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
259
0
        DoubleDouble::mul_add_f64_fma(a, b, c)
260
0
    }
261
262
    #[inline(always)]
263
0
    fn quick_mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
264
0
        DoubleDouble::quick_mul_add_fma(a, b, c)
265
0
    }
266
267
    #[inline(always)]
268
0
    fn mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
269
0
        DoubleDouble::mul_add_fma(a, b, c)
270
0
    }
271
272
    #[inline(always)]
273
0
    fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble {
274
0
        DoubleDouble::from_exact_mult_fma(x, y)
275
0
    }
276
}
277
278
#[inline(always)]
279
0
pub(crate) fn sincospi_eval<B: SinCosPiBackend>(x: f64, backend: &B) -> SinCos {
280
0
    let x2 = x * x;
281
    /*
282
        sinpi(pi*x) poly generated by Sollya:
283
        d = [0, 0.0078128];
284
        f_sin = sin(y*pi)/y;
285
        Q = fpminimax(f_sin, [|0, 2, 4, 6|], [|107, D...|], d, relative, floating);
286
        See ./notes/sinpi.sollya
287
    */
288
0
    let sin_lop = backend.polyeval3(
289
0
        x2,
290
0
        f64::from_bits(0xc014abbce625be4d),
291
0
        f64::from_bits(0x400466bc6767f259),
292
0
        f64::from_bits(0xbfe32d176b0b3baf),
293
0
    ) * x2;
294
    // We're splitting polynomial in two parts, since first term dominates
295
    // we compute: (a0_lo + a0_hi) * x + x * (a1 * x^2 + a2 + x^4) ...
296
0
    let sin_lo = backend.dd_fma(f64::from_bits(0x3ca1a5c04563817a), x, sin_lop * x);
297
0
    let sin_hi = x * f64::from_bits(0x400921fb54442d18);
298
299
    /*
300
       cospi(pi*x) poly generated by Sollya:
301
       d = [0, 0.015625];
302
       f_cos = cos(y*pi);
303
       Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, D...|], d, relative, floating);
304
       See ./notes/cospi.sollya
305
    */
306
0
    let p = backend.polyeval3(
307
0
        x2,
308
0
        f64::from_bits(0xc013bd3cc9be45cf),
309
0
        f64::from_bits(0x40103c1f08085ad1),
310
0
        f64::from_bits(0xbff55d1e43463fc3),
311
    );
312
313
    // We're splitting polynomial in two parts, since first term dominates
314
    // we compute: (a0_lo + a0_hi) + (a1 * x^2 + a2 + x^4)...
315
0
    let cos_lo = backend.dd_fma(p, x2, f64::from_bits(0xbbdf72adefec0800));
316
0
    let cos_hi = f64::from_bits(0x3ff0000000000000);
317
318
0
    let err = backend.fma(
319
0
        x2,
320
0
        f64::from_bits(0x3cb0000000000000), // 2^-52
321
0
        f64::from_bits(0x3c40000000000000), // 2^-59
322
    );
323
0
    SinCos {
324
0
        v_sin: DoubleDouble::from_exact_add(sin_hi, sin_lo),
325
0
        v_cos: DoubleDouble::from_exact_add(cos_hi, cos_lo),
326
0
        err,
327
0
    }
328
0
}
Unexecuted instantiation: pxfm::sincospi::sincospi_eval::<pxfm::sincospi::FmaSinCosPiBackend>
Unexecuted instantiation: pxfm::sincospi::sincospi_eval::<pxfm::sincospi::GenSinCosPiBackend>
329
330
#[inline(always)]
331
0
pub(crate) fn sincospi_eval_dd<B: SinCosPiBackend>(x: f64, backend: &B) -> SinCos {
332
0
    let x2 = backend.exact_mult(x, x);
333
    // Sin coeffs
334
    // poly sin(pi*x) generated by Sollya:
335
    // d = [0, 0.0078128];
336
    // f_sin = sin(y*pi)/y;
337
    // Q = fpminimax(f_sin, [|0, 2, 4, 6, 8|], [|107...|], d, relative, floating);
338
    // see ./notes/sinpi_dd.sollya
339
    const SC: [(u64, u64); 5] = [
340
        (0x3ca1a626330ccf19, 0x400921fb54442d18),
341
        (0x3cb05540f6323de9, 0xc014abbce625be53),
342
        (0xbc9050fdd1229756, 0x400466bc6775aadf),
343
        (0xbc780d406f3472e8, 0xbfe32d2cce5a7bf1),
344
        (0x3c4cfcf8b6b817f2, 0x3fb5077069d8a182),
345
    ];
346
347
0
    let mut sin_y = backend.quick_mul_add(
348
0
        x2,
349
0
        DoubleDouble::from_bit_pair(SC[4]),
350
0
        DoubleDouble::from_bit_pair(SC[3]),
351
    );
352
0
    sin_y = backend.quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[2]));
353
0
    sin_y = backend.quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[1]));
354
0
    sin_y = backend.quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[0]));
355
0
    sin_y = backend.quick_mult_f64(sin_y, x);
356
357
    // Cos coeffs
358
    // d = [0, 0.0078128];
359
    // f_cos = cos(y*pi);
360
    // Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107...|], d, relative, floating);
361
    // See ./notes/cospi_dd.sollya
362
    const CC: [(u64, u64); 5] = [
363
        (0xbaaa70a580000000, 0x3ff0000000000000),
364
        (0xbcb69211d8dd1237, 0xc013bd3cc9be45de),
365
        (0xbcbd96cfd637eeb7, 0x40103c1f081b5abf),
366
        (0x3c994d75c577f029, 0xbff55d3c7e2e4ba5),
367
        (0xbc5c542d998a4e48, 0x3fce1f2f5f747411),
368
    ];
369
370
0
    let mut cos_y = backend.quick_mul_add(
371
0
        x2,
372
0
        DoubleDouble::from_bit_pair(CC[4]),
373
0
        DoubleDouble::from_bit_pair(CC[3]),
374
    );
375
0
    cos_y = backend.quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[2]));
376
0
    cos_y = backend.quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[1]));
377
0
    cos_y = backend.quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[0]));
378
0
    SinCos {
379
0
        v_sin: sin_y,
380
0
        v_cos: cos_y,
381
0
        err: 0.,
382
0
    }
383
0
}
Unexecuted instantiation: pxfm::sincospi::sincospi_eval_dd::<pxfm::sincospi::FmaSinCosPiBackend>
Unexecuted instantiation: pxfm::sincospi::sincospi_eval_dd::<pxfm::sincospi::GenSinCosPiBackend>
384
385
#[cold]
386
#[inline(always)]
387
0
fn sinpi_dd<B: SinCosPiBackend>(
388
0
    x: f64,
389
0
    sin_k: DoubleDouble,
390
0
    cos_k: DoubleDouble,
391
0
    backend: &B,
392
0
) -> f64 {
393
0
    let r_sincos = sincospi_eval_dd(x, backend);
394
0
    let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
395
0
    let rr = backend.mul_add(sin_k, r_sincos.v_cos, cos_k_sin_y);
396
0
    rr.to_f64()
397
0
}
Unexecuted instantiation: pxfm::sincospi::sinpi_dd::<pxfm::sincospi::FmaSinCosPiBackend>
Unexecuted instantiation: pxfm::sincospi::sinpi_dd::<pxfm::sincospi::GenSinCosPiBackend>
398
399
#[cold]
400
#[inline(always)]
401
0
fn sincospi_dd<B: SinCosPiBackend>(
402
0
    x: f64,
403
0
    sin_sin_k: DoubleDouble,
404
0
    sin_cos_k: DoubleDouble,
405
0
    cos_sin_k: DoubleDouble,
406
0
    cos_cos_k: DoubleDouble,
407
0
    backend: &B,
408
0
) -> (f64, f64) {
409
0
    let r_sincos = sincospi_eval_dd(x, backend);
410
411
0
    let cos_k_sin_y = backend.quick_mult(sin_cos_k, r_sincos.v_sin);
412
0
    let rr_sin = backend.mul_add(sin_sin_k, r_sincos.v_cos, cos_k_sin_y);
413
414
0
    let cos_k_sin_y = backend.quick_mult(cos_cos_k, r_sincos.v_sin);
415
0
    let rr_cos = backend.mul_add(cos_sin_k, r_sincos.v_cos, cos_k_sin_y);
416
417
0
    (rr_sin.to_f64(), rr_cos.to_f64())
418
0
}
Unexecuted instantiation: pxfm::sincospi::sincospi_dd::<pxfm::sincospi::FmaSinCosPiBackend>
Unexecuted instantiation: pxfm::sincospi::sincospi_dd::<pxfm::sincospi::GenSinCosPiBackend>
419
420
// [sincospi_eval] gives precision around 2^-66 what is not enough for DD case this method gives 2^-84
421
#[inline]
422
0
fn sincospi_eval_extended(x: f64) -> SinCos {
423
0
    let x2 = DoubleDouble::from_exact_mult(x, x);
424
    /*
425
        sinpi(pi*x) poly generated by Sollya:
426
        d = [0, 0.0078128];
427
        f_sin = sin(y*pi)/y;
428
        Q = fpminimax(f_sin, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating);
429
        See ./notes/sinpi.sollya
430
    */
431
0
    let sin_lop = f_polyeval3(
432
0
        x2.hi,
433
0
        f64::from_bits(0x400466bc67763662),
434
0
        f64::from_bits(0xbfe32d2cce5aad86),
435
0
        f64::from_bits(0x3fb5077099a1f35b),
436
    );
437
0
    let mut v_sin = DoubleDouble::mul_f64_add(
438
0
        x2,
439
0
        sin_lop,
440
0
        DoubleDouble::from_bit_pair((0x3cb0553d6ee5e8ec, 0xc014abbce625be53)),
441
    );
442
0
    v_sin = DoubleDouble::mul_add(
443
0
        x2,
444
0
        v_sin,
445
0
        DoubleDouble::from_bit_pair((0x3ca1a626330dd130, 0x400921fb54442d18)),
446
0
    );
447
0
    v_sin = DoubleDouble::quick_mult_f64(v_sin, x);
448
449
    /*
450
       cospi(pi*x) poly generated by Sollya:
451
       d = [0, 0.015625];
452
       f_cos = cos(y*pi);
453
       Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating);
454
       See ./notes/cospi_fast_dd.sollya
455
    */
456
0
    let p = f_polyeval3(
457
0
        x2.hi,
458
0
        f64::from_bits(0x40103c1f081b5abf),
459
0
        f64::from_bits(0xbff55d3c7e2edd89),
460
0
        f64::from_bits(0x3fce1f2fd9d79484),
461
    );
462
463
0
    let mut v_cos = DoubleDouble::mul_f64_add(
464
0
        x2,
465
0
        p,
466
0
        DoubleDouble::from_bit_pair((0xbcb69236a9b3ed73, 0xc013bd3cc9be45de)),
467
    );
468
0
    v_cos = DoubleDouble::mul_add_f64(x2, v_cos, f64::from_bits(0x3ff0000000000000));
469
470
0
    SinCos {
471
0
        v_sin: DoubleDouble::from_exact_add(v_sin.hi, v_sin.lo),
472
0
        v_cos: DoubleDouble::from_exact_add(v_cos.hi, v_cos.lo),
473
0
        err: 0.,
474
0
    }
475
0
}
476
477
0
pub(crate) fn f_fast_sinpi_dd(x: f64) -> DoubleDouble {
478
0
    let ix = x.to_bits();
479
0
    let ax = ix & 0x7fff_ffff_ffff_ffff;
480
0
    if ax == 0 {
481
0
        return DoubleDouble::new(0., 0.);
482
0
    }
483
0
    let e: i32 = (ax >> 52) as i32;
484
0
    let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
485
0
    let sgn: i64 = (ix as i64) >> 63;
486
0
    let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
487
0
    let mut s: i32 = 1063i32.wrapping_sub(e);
488
0
    if s < 0 {
489
0
        s = -s - 1;
490
0
        if s > 10 {
491
0
            return DoubleDouble::new(0., f64::copysign(0.0, x));
492
0
        }
493
0
        let iq: u64 = (m as u64).wrapping_shl(s as u32);
494
0
        if (iq & 2047) == 0 {
495
0
            return DoubleDouble::new(0., f64::copysign(0.0, x));
496
0
        }
497
0
    }
498
499
0
    if ax <= 0x3fa2000000000000u64 {
500
        // |x| <= 0.03515625
501
        const PI: DoubleDouble = DoubleDouble::new(
502
            f64::from_bits(0x3ca1a62633145c07),
503
            f64::from_bits(0x400921fb54442d18),
504
        );
505
506
0
        if ax < 0x3c90000000000000 {
507
            // for x near zero, sinpi(x) = pi*x + O(x^3), thus worst cases are those
508
            // of the function pi*x, and if x is a worst case, then 2*x is another
509
            // one in the next binade. For this reason, worst cases are only included
510
            // for the binade [2^-1022, 2^-1021). For larger binades,
511
            // up to [2^-54,2^-53), worst cases should be deduced by multiplying
512
            // by some power of 2.
513
0
            if ax < 0x0350000000000000 {
514
0
                let t = x * f64::from_bits(0x4690000000000000);
515
0
                let z = DoubleDouble::quick_mult_f64(PI, t);
516
0
                let r = z.to_f64();
517
0
                let rs = r * f64::from_bits(0x3950000000000000);
518
0
                let rt = rs * f64::from_bits(0x4690000000000000);
519
0
                return DoubleDouble::new(
520
                    0.,
521
0
                    dyad_fmla((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs),
522
                );
523
0
            }
524
0
            let z = DoubleDouble::quick_mult_f64(PI, x);
525
0
            return z;
526
0
        }
527
528
        /*
529
           Poly generated by Sollya:
530
           d = [0, 0.03515625];
531
           f_sin = sin(y*pi)/y;
532
           Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107, 107, D...|], d, relative, floating);
533
534
           See ./notes/sinpi_zero_fast_dd.sollya
535
        */
536
        const C: [u64; 4] = [
537
            0xbfe32d2cce62bd85,
538
            0x3fb50783487eb73d,
539
            0xbf7e3074f120ad1f,
540
            0x3f3e8d9011340e5a,
541
        ];
542
543
0
        let x2 = DoubleDouble::from_exact_mult(x, x);
544
545
        const C_PI: DoubleDouble =
546
            DoubleDouble::from_bit_pair((0x3ca1a626331457a4, 0x400921fb54442d18));
547
548
0
        let p = f_polyeval4(
549
0
            x2.hi,
550
0
            f64::from_bits(C[0]),
551
0
            f64::from_bits(C[1]),
552
0
            f64::from_bits(C[2]),
553
0
            f64::from_bits(C[3]),
554
        );
555
0
        let mut r = DoubleDouble::mul_f64_add(
556
0
            x2,
557
0
            p,
558
0
            DoubleDouble::from_bit_pair((0xbc96dd7ae221e58c, 0x400466bc6775aae2)),
559
        );
560
0
        r = DoubleDouble::mul_add(
561
0
            x2,
562
0
            r,
563
0
            DoubleDouble::from_bit_pair((0x3cb05511c8a6c478, 0xc014abbce625be53)),
564
0
        );
565
0
        r = DoubleDouble::mul_add(r, x2, C_PI);
566
0
        r = DoubleDouble::quick_mult_f64(r, x);
567
0
        let k = DoubleDouble::from_exact_add(r.hi, r.lo);
568
0
        return k;
569
0
    }
570
571
0
    let si = e.wrapping_sub(1011);
572
0
    if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
573
        // x is integer or half-integer
574
0
        if (m0.wrapping_shl(si as u32)) == 0 {
575
0
            return DoubleDouble::new(0., f64::copysign(0.0, x)); // x is integer
576
0
        }
577
0
        let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
578
        // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2
579
0
        return DoubleDouble::new(
580
            0.,
581
0
            if t == 0 {
582
0
                f64::copysign(1.0, x)
583
            } else {
584
0
                -f64::copysign(1.0, x)
585
            },
586
        );
587
0
    }
588
589
0
    let (y, k) = reduce_pi_64(x);
590
591
    // // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
592
0
    let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
593
0
    let cos_k = DoubleDouble::from_bit_pair(
594
0
        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
595
    );
596
597
0
    let r_sincos = sincospi_eval_extended(y);
598
599
0
    let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos);
600
0
    let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
601
602
    // sin_k_cos_y is always >> cos_k_sin_y
603
0
    let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
604
0
    rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
605
0
    DoubleDouble::from_exact_add(rr.hi, rr.lo)
606
0
}
607
608
#[inline(always)]
609
0
fn sinpi_gen_impl<B: SinCosPiBackend>(x: f64, backend: B) -> f64 {
610
0
    let ix = x.to_bits();
611
0
    let ax = ix & 0x7fff_ffff_ffff_ffff;
612
0
    if ax == 0 {
613
0
        return x;
614
0
    }
615
0
    let e: i32 = (ax >> 52) as i32;
616
0
    let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
617
0
    let sgn: i64 = (ix as i64) >> 63;
618
0
    let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
619
0
    let mut s: i32 = 1063i32.wrapping_sub(e);
620
0
    if s < 0 {
621
0
        if e == 0x7ff {
622
0
            if (ix << 12) == 0 {
623
0
                return f64::NAN;
624
0
            }
625
0
            return x + x; // case x=NaN
626
0
        }
627
0
        s = -s - 1;
628
0
        if s > 10 {
629
0
            return f64::copysign(0.0, x);
630
0
        }
631
0
        let iq: u64 = (m as u64).wrapping_shl(s as u32);
632
0
        if (iq & 2047) == 0 {
633
0
            return f64::copysign(0.0, x);
634
0
        }
635
0
    }
636
637
0
    if ax <= 0x3fa2000000000000u64 {
638
        // |x| <= 0.03515625
639
        const PI: DoubleDouble = DoubleDouble::new(
640
            f64::from_bits(0x3ca1a62633145c07),
641
            f64::from_bits(0x400921fb54442d18),
642
        );
643
644
0
        if ax < 0x3c90000000000000 {
645
            // for x near zero, sinpi(x) = pi*x + O(x^3), thus worst cases are those
646
            // of the function pi*x, and if x is a worst case, then 2*x is another
647
            // one in the next binade. For this reason, worst cases are only included
648
            // for the binade [2^-1022, 2^-1021). For larger binades,
649
            // up to [2^-54,2^-53), worst cases should be deduced by multiplying
650
            // by some power of 2.
651
0
            if ax < 0x0350000000000000 {
652
0
                let t = x * f64::from_bits(0x4690000000000000);
653
0
                let z = backend.quick_mult_f64(PI, t);
654
0
                let r = z.to_f64();
655
0
                let rs = r * f64::from_bits(0x3950000000000000);
656
0
                let rt = rs * f64::from_bits(0x4690000000000000);
657
0
                return backend.dyad_fma(
658
0
                    (z.hi - rt) + z.lo,
659
0
                    f64::from_bits(0x3950000000000000),
660
0
                    rs,
661
                );
662
0
            }
663
0
            let z = backend.quick_mult_f64(PI, x);
664
0
            return z.to_f64();
665
0
        }
666
667
        /*
668
           Poly generated by Sollya:
669
           d = [0, 0.03515625];
670
           f_sin = sin(y*pi)/y;
671
           Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107, D...|], d, relative, floating);
672
673
           See ./notes/sinpi_zero.sollya
674
        */
675
676
0
        let x2 = x * x;
677
0
        let x3 = x2 * x;
678
0
        let x4 = x2 * x2;
679
680
0
        let eps = x * backend.fma(
681
0
            x2,
682
0
            f64::from_bits(0x3d00000000000000), // 2^-47
683
0
            f64::from_bits(0x3bd0000000000000), // 2^-66
684
0
        );
685
686
        const C: [u64; 4] = [
687
            0xc014abbce625be51,
688
            0x400466bc67754b46,
689
            0xbfe32d2cc12a51f4,
690
            0x3fb5060540058476,
691
        ];
692
693
        const C_PI: DoubleDouble =
694
            DoubleDouble::from_bit_pair((0x3ca1a67088eb1a46, 0x400921fb54442d18));
695
696
0
        let mut z = backend.quick_mult_f64(C_PI, x);
697
698
0
        let zl0 = backend.fma(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
699
0
        let zl1 = backend.fma(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
700
701
0
        z.lo = backend.fma(x3, backend.fma(x4, zl1, zl0), z.lo);
702
0
        let lb = z.hi + (z.lo - eps);
703
0
        let ub = z.hi + (z.lo + eps);
704
0
        if lb == ub {
705
0
            return lb;
706
0
        }
707
0
        return as_sinpi_zero(x, &backend);
708
0
    }
709
710
0
    let si = e.wrapping_sub(1011);
711
0
    if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
712
        // x is integer or half-integer
713
0
        if (m0.wrapping_shl(si as u32)) == 0 {
714
0
            return f64::copysign(0.0, x); // x is integer
715
0
        }
716
0
        let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
717
        // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2
718
0
        return if t == 0 {
719
0
            f64::copysign(1.0, x)
720
        } else {
721
0
            -f64::copysign(1.0, x)
722
        };
723
0
    }
724
725
0
    let (y, k) = backend.arg_reduce_pi_64(x);
726
727
    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
728
0
    let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
729
0
    let cos_k = DoubleDouble::from_bit_pair(
730
0
        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
731
    );
732
733
0
    let r_sincos = sincospi_eval(y, &backend);
734
735
0
    let sin_k_cos_y = backend.quick_mult(sin_k, r_sincos.v_cos);
736
0
    let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
737
738
    // sin_k_cos_y is always >> cos_k_sin_y
739
0
    let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
740
0
    rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
741
742
0
    let ub = rr.hi + (rr.lo + r_sincos.err); // (rr.lo + ERR);
743
0
    let lb = rr.hi + (rr.lo - r_sincos.err); // (rr.lo - ERR);
744
745
0
    if ub == lb {
746
0
        return rr.to_f64();
747
0
    }
748
0
    sinpi_dd(y, sin_k, cos_k, &backend)
749
0
}
Unexecuted instantiation: pxfm::sincospi::sinpi_gen_impl::<pxfm::sincospi::FmaSinCosPiBackend>
Unexecuted instantiation: pxfm::sincospi::sinpi_gen_impl::<pxfm::sincospi::GenSinCosPiBackend>
750
751
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
752
#[target_feature(enable = "avx", enable = "fma")]
753
0
unsafe fn sinpi_fma_impl(x: f64) -> f64 {
754
0
    sinpi_gen_impl(x, FmaSinCosPiBackend {})
755
0
}
756
757
/// Computes sin(PI*x)
758
///
759
/// Max ULP 0.5
760
0
pub fn f_sinpi(x: f64) -> f64 {
761
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
762
    {
763
        sinpi_gen_impl(x, GenSinCosPiBackend {})
764
    }
765
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
766
    {
767
        use std::sync::OnceLock;
768
        static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
769
0
        let q = EXECUTOR.get_or_init(|| {
770
0
            if std::arch::is_x86_feature_detected!("avx")
771
0
                && std::arch::is_x86_feature_detected!("fma")
772
            {
773
0
                sinpi_fma_impl
774
            } else {
775
0
                fn def_sinpi(x: f64) -> f64 {
776
0
                    sinpi_gen_impl(x, GenSinCosPiBackend {})
777
0
                }
778
0
                def_sinpi
779
            }
780
0
        });
781
0
        unsafe { q(x) }
782
    }
783
0
}
784
785
#[inline(always)]
786
0
fn cospi_gen_impl<B: SinCosPiBackend>(x: f64, backend: B) -> f64 {
787
0
    let ix = x.to_bits();
788
0
    let ax = ix & 0x7fff_ffff_ffff_ffff;
789
0
    if ax == 0 {
790
0
        return 1.0;
791
0
    }
792
0
    let e: i32 = (ax >> 52) as i32;
793
    // e is the unbiased exponent, we have 2^(e-1023) <= |x| < 2^(e-1022)
794
0
    let m: i64 = ((ix & 0x000fffffffffffff) | (1u64 << 52)) as i64;
795
0
    let mut s = 1063i32.wrapping_sub(e); // 2^(40-s) <= |x| < 2^(41-s)
796
0
    if s < 0 {
797
        // |x| >= 2^41
798
0
        if e == 0x7ff {
799
            // NaN or Inf
800
0
            if ix.wrapping_shl(12) == 0 {
801
0
                return f64::NAN;
802
0
            }
803
0
            return x + x; // NaN
804
0
        }
805
0
        s = -s - 1; // now 2^(41+s) <= |x| < 2^(42+s)
806
0
        if s > 11 {
807
0
            return 1.0;
808
0
        } // |x| >= 2^53
809
0
        let iq: u64 = (m as u64).wrapping_shl(s as u32).wrapping_add(1024);
810
0
        if (iq & 2047) == 0 {
811
0
            return 0.0;
812
0
        }
813
0
    }
814
0
    if ax <= 0x3f30000000000000u64 {
815
        // |x| <= 2^-12, |x| <= 0.000244140625
816
0
        if ax <= 0x3e2ccf6429be6621u64 {
817
0
            return 1.0 - f64::from_bits(0x3c80000000000000);
818
0
        }
819
0
        let x2 = x * x;
820
0
        let x4 = x2 * x2;
821
0
        let eps = x2 * f64::from_bits(0x3cfa000000000000);
822
823
        /*
824
            Generated by Sollya:
825
            d = [0, 0.000244140625];
826
            f_cos = cos(y*pi);
827
            Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating);
828
829
            See ./notes/cospi.sollya
830
        */
831
832
        const C: [u64; 4] = [
833
            0xc013bd3cc9be45de,
834
            0x40103c1f081b5ac4,
835
            0xbff55d3c7ff79b60,
836
            0x3fd24c7b6f7d0690,
837
        ];
838
839
0
        let p0 = backend.fma(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
840
0
        let p1 = backend.fma(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
841
842
0
        let p = x2 * backend.fma(x4, p0, p1);
843
0
        let lb = (p - eps) + 1.;
844
0
        let ub = (p + eps) + 1.;
845
0
        if lb == ub {
846
0
            return lb;
847
0
        }
848
0
        return as_cospi_zero(x, &backend);
849
0
    }
850
851
0
    let si: i32 = e.wrapping_sub(1011);
852
0
    if si >= 0 && ((m as u64).wrapping_shl(si as u32) ^ 0x8000000000000000u64) == 0 {
853
0
        return 0.0;
854
0
    }
855
856
0
    let (y, k) = backend.arg_reduce_pi_64(x);
857
858
    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
859
0
    let msin_k = DoubleDouble::from_bit_pair(
860
0
        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(64) & 127) as usize],
861
    );
862
0
    let cos_k = DoubleDouble::from_bit_pair(
863
0
        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
864
    );
865
866
0
    let r_sincos = sincospi_eval(y, &backend);
867
868
0
    let cos_k_cos_y = backend.quick_mult(r_sincos.v_cos, cos_k);
869
0
    let cos_k_msin_y = backend.quick_mult(r_sincos.v_sin, msin_k);
870
871
    // cos_k_cos_y is always >> cos_k_msin_y
872
0
    let mut rr = DoubleDouble::from_exact_add(cos_k_cos_y.hi, cos_k_msin_y.hi);
873
0
    rr.lo += cos_k_cos_y.lo + cos_k_msin_y.lo;
874
875
0
    let ub = rr.hi + (rr.lo + r_sincos.err); // (rr.lo + ERR);
876
0
    let lb = rr.hi + (rr.lo - r_sincos.err); // (rr.lo - ERR);
877
878
0
    if ub == lb {
879
0
        return rr.to_f64();
880
0
    }
881
0
    sinpi_dd(y, cos_k, msin_k, &backend)
882
0
}
Unexecuted instantiation: pxfm::sincospi::cospi_gen_impl::<pxfm::sincospi::FmaSinCosPiBackend>
Unexecuted instantiation: pxfm::sincospi::cospi_gen_impl::<pxfm::sincospi::GenSinCosPiBackend>
883
884
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
885
#[target_feature(enable = "avx", enable = "fma")]
886
0
unsafe fn cospi_fma_impl(x: f64) -> f64 {
887
0
    cospi_gen_impl(x, FmaSinCosPiBackend {})
888
0
}
889
890
/// Computes cos(PI*x)
891
///
892
/// Max found ULP 0.5
893
0
pub fn f_cospi(x: f64) -> f64 {
894
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
895
    {
896
        cospi_gen_impl(x, GenSinCosPiBackend {})
897
    }
898
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
899
    {
900
        use std::sync::OnceLock;
901
        static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
902
0
        let q = EXECUTOR.get_or_init(|| {
903
0
            if std::arch::is_x86_feature_detected!("avx")
904
0
                && std::arch::is_x86_feature_detected!("fma")
905
            {
906
0
                cospi_fma_impl
907
            } else {
908
0
                fn def_cospi(x: f64) -> f64 {
909
0
                    cospi_gen_impl(x, GenSinCosPiBackend {})
910
0
                }
911
0
                def_cospi
912
            }
913
0
        });
914
0
        unsafe { q(x) }
915
    }
916
0
}
917
918
#[inline(always)]
919
0
fn sincospi_gen_impl<B: SinCosPiBackend>(x: f64, backend: B) -> (f64, f64) {
920
0
    let ix = x.to_bits();
921
0
    let ax = ix & 0x7fff_ffff_ffff_ffff;
922
0
    if ax == 0 {
923
0
        return (x, 1.0);
924
0
    }
925
0
    let e: i32 = (ax >> 52) as i32;
926
    // e is the unbiased exponent, we have 2^(e-1023) <= |x| < 2^(e-1022)
927
0
    let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
928
0
    let m: i64 = ((ix & 0x000fffffffffffff) | (1u64 << 52)) as i64;
929
0
    let mut s = 1063i32.wrapping_sub(e); // 2^(40-s) <= |x| < 2^(41-s)
930
0
    if s < 0 {
931
        // |x| >= 2^41
932
0
        if e == 0x7ff {
933
            // NaN or Inf
934
0
            if ix.wrapping_shl(12) == 0 {
935
0
                return (f64::NAN, f64::NAN);
936
0
            }
937
0
            return (x + x, x + x); // NaN
938
0
        }
939
0
        s = -s - 1;
940
0
        if s > 10 {
941
            static CF: [f64; 2] = [1., -1.];
942
0
            let is_odd = backend.odd_integer(f64::from_bits(ax));
943
0
            let cos_x = CF[is_odd as usize];
944
0
            return (f64::copysign(0.0, x), cos_x);
945
0
        } // |x| >= 2^53
946
0
        let iq: u64 = (m as u64).wrapping_shl(s as u32);
947
948
        // sinpi = 0 when multiple of 2048
949
0
        let sin_zero = (iq & 2047) == 0;
950
951
        // cospi = 0 when offset-by-half multiple of 2048
952
0
        let cos_zero = ((m as u64).wrapping_shl(s as u32).wrapping_add(1024) & 2047) == 0;
953
954
0
        if sin_zero && cos_zero {
955
0
            // both zero (only possible if NaN or something degenerate)
956
0
        } else if sin_zero {
957
            static CF: [f64; 2] = [1., -1.];
958
0
            let is_odd = backend.odd_integer(f64::from_bits(ax));
959
0
            let cos_x = CF[is_odd as usize];
960
0
            return (0.0, cos_x); // sin = 0, cos = ±1
961
0
        } else if cos_zero {
962
            // x = k / 2 * PI
963
0
            let si = e.wrapping_sub(1011);
964
0
            let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
965
            // making sin decision based on quadrant
966
0
            return if t == 0 {
967
0
                (f64::copysign(1.0, x), 0.0)
968
            } else {
969
0
                (-f64::copysign(1.0, x), 0.0)
970
            }; // sin = ±1, cos = 0
971
0
        }
972
0
    }
973
974
0
    if ax <= 0x3f30000000000000u64 {
975
        // |x| <= 2^-12, |x| <= 0.000244140625
976
0
        if ax <= 0x3c90000000000000u64 {
977
            const PI: DoubleDouble = DoubleDouble::new(
978
                f64::from_bits(0x3ca1a62633145c07),
979
                f64::from_bits(0x400921fb54442d18),
980
            );
981
0
            let sin_x = if ax < 0x0350000000000000 {
982
0
                let t = x * f64::from_bits(0x4690000000000000);
983
0
                let z = backend.quick_mult_f64(PI, t);
984
0
                let r = z.to_f64();
985
0
                let rs = r * f64::from_bits(0x3950000000000000);
986
0
                let rt = rs * f64::from_bits(0x4690000000000000);
987
0
                backend.dyad_fma((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs)
988
            } else {
989
0
                let z = backend.quick_mult_f64(PI, x);
990
0
                z.to_f64()
991
            };
992
0
            return (sin_x, 1.0 - f64::from_bits(0x3c80000000000000));
993
0
        }
994
0
        let x2 = x * x;
995
0
        let x4 = x2 * x2;
996
0
        let cos_eps = x2 * f64::from_bits(0x3cfa000000000000);
997
998
        /*
999
            Generated by Sollya:
1000
            d = [0, 0.000244140625];
1001
            f_cos = cos(y*pi);
1002
            Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating);
1003
1004
            See ./notes/cospi.sollya
1005
        */
1006
1007
        const COS_C: [u64; 4] = [
1008
            0xc013bd3cc9be45de,
1009
            0x40103c1f081b5ac4,
1010
            0xbff55d3c7ff79b60,
1011
            0x3fd24c7b6f7d0690,
1012
        ];
1013
1014
0
        let p0 = backend.fma(x2, f64::from_bits(COS_C[3]), f64::from_bits(COS_C[2]));
1015
0
        let p1 = backend.fma(x2, f64::from_bits(COS_C[1]), f64::from_bits(COS_C[0]));
1016
1017
0
        let p = x2 * backend.fma(x4, p0, p1);
1018
0
        let cos_lb = (p - cos_eps) + 1.;
1019
0
        let cos_ub = (p + cos_eps) + 1.;
1020
0
        let cos_x = if cos_lb == cos_ub {
1021
0
            cos_lb
1022
        } else {
1023
0
            as_cospi_zero(x, &backend)
1024
        };
1025
1026
        /*
1027
            Poly generated by Sollya:
1028
            d = [0, 0.03515625];
1029
            f_sin = sin(y*pi)/y;
1030
            Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107, D...|], d, relative, floating);
1031
1032
            See ./notes/sinpi_zero.sollya
1033
        */
1034
1035
        const SIN_C: [u64; 4] = [
1036
            0xc014abbce625be51,
1037
            0x400466bc67754b46,
1038
            0xbfe32d2cc12a51f4,
1039
            0x3fb5060540058476,
1040
        ];
1041
1042
        const C_PI: DoubleDouble =
1043
            DoubleDouble::from_bit_pair((0x3ca1a67088eb1a46, 0x400921fb54442d18));
1044
1045
0
        let mut z = backend.quick_mult_f64(C_PI, x);
1046
1047
0
        let x3 = x2 * x;
1048
1049
0
        let zl0 = backend.fma(x2, f64::from_bits(SIN_C[1]), f64::from_bits(SIN_C[0]));
1050
0
        let zl1 = backend.fma(x2, f64::from_bits(SIN_C[3]), f64::from_bits(SIN_C[2]));
1051
1052
0
        let sin_eps = x * backend.fma(
1053
0
            x2,
1054
0
            f64::from_bits(0x3d00000000000000), // 2^-47
1055
0
            f64::from_bits(0x3bd0000000000000), // 2^-66
1056
0
        );
1057
1058
0
        z.lo = backend.fma(x3, backend.fma(x4, zl1, zl0), z.lo);
1059
0
        let sin_lb = z.hi + (z.lo - sin_eps);
1060
0
        let sin_ub = z.hi + (z.lo + sin_eps);
1061
0
        let sin_x = if sin_lb == sin_ub {
1062
0
            sin_lb
1063
        } else {
1064
0
            as_sinpi_zero(x, &backend)
1065
        };
1066
0
        return (sin_x, cos_x);
1067
0
    }
1068
1069
0
    let si = e.wrapping_sub(1011);
1070
0
    if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
1071
        // x is integer or half-integer
1072
0
        if (m0.wrapping_shl(si as u32)) == 0 {
1073
            static CF: [f64; 2] = [1., -1.];
1074
0
            let is_odd = backend.odd_integer(f64::from_bits(ax));
1075
0
            let cos_x = CF[is_odd as usize];
1076
0
            return (f64::copysign(0.0, x), cos_x); // x is integer
1077
0
        }
1078
        // x is half-integer
1079
0
        let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
1080
        // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2
1081
0
        return if t == 0 {
1082
0
            (f64::copysign(1.0, x), 0.0)
1083
        } else {
1084
0
            (-f64::copysign(1.0, x), 0.0)
1085
        };
1086
0
    }
1087
1088
0
    let (y, k) = backend.arg_reduce_pi_64(x);
1089
1090
    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
1091
0
    let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
1092
0
    let cos_k = DoubleDouble::from_bit_pair(
1093
0
        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
1094
    );
1095
0
    let msin_k = -sin_k;
1096
1097
0
    let r_sincos = sincospi_eval(y, &backend);
1098
1099
0
    let sin_k_cos_y = backend.quick_mult(sin_k, r_sincos.v_cos);
1100
0
    let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
1101
1102
0
    let cos_k_cos_y = backend.quick_mult(r_sincos.v_cos, cos_k);
1103
0
    let msin_k_sin_y = backend.quick_mult(r_sincos.v_sin, msin_k);
1104
1105
    // sin_k_cos_y is always >> cos_k_sin_y
1106
0
    let mut rr_sin = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
1107
0
    rr_sin.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
1108
1109
0
    let sin_ub = rr_sin.hi + (rr_sin.lo + r_sincos.err); // (rr.lo + ERR);
1110
0
    let sin_lb = rr_sin.hi + (rr_sin.lo - r_sincos.err); // (rr.lo - ERR);
1111
1112
0
    let mut rr_cos = DoubleDouble::from_exact_add(cos_k_cos_y.hi, msin_k_sin_y.hi);
1113
0
    rr_cos.lo += cos_k_cos_y.lo + msin_k_sin_y.lo;
1114
1115
0
    let cos_ub = rr_cos.hi + (rr_cos.lo + r_sincos.err); // (rr.lo + ERR);
1116
0
    let cos_lb = rr_cos.hi + (rr_cos.lo - r_sincos.err); // (rr.lo - ERR);
1117
1118
0
    if sin_ub == sin_lb && cos_lb == cos_ub {
1119
0
        return (rr_sin.to_f64(), rr_cos.to_f64());
1120
0
    }
1121
1122
0
    sincospi_dd(y, sin_k, cos_k, cos_k, msin_k, &backend)
1123
0
}
Unexecuted instantiation: pxfm::sincospi::sincospi_gen_impl::<pxfm::sincospi::FmaSinCosPiBackend>
Unexecuted instantiation: pxfm::sincospi::sincospi_gen_impl::<pxfm::sincospi::GenSinCosPiBackend>
1124
1125
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1126
#[target_feature(enable = "avx", enable = "fma")]
1127
0
unsafe fn sincospi_fma_impl(x: f64) -> (f64, f64) {
1128
0
    sincospi_gen_impl(x, FmaSinCosPiBackend {})
1129
0
}
1130
1131
/// Computes sin(PI*x) and cos(PI*x)
1132
///
1133
/// Max found ULP 0.5
1134
0
pub fn f_sincospi(x: f64) -> (f64, f64) {
1135
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
1136
    {
1137
        sincospi_gen_impl(x, GenSinCosPiBackend {})
1138
    }
1139
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1140
    {
1141
        use std::sync::OnceLock;
1142
        static EXECUTOR: OnceLock<unsafe fn(f64) -> (f64, f64)> = OnceLock::new();
1143
0
        let q = EXECUTOR.get_or_init(|| {
1144
0
            if std::arch::is_x86_feature_detected!("avx")
1145
0
                && std::arch::is_x86_feature_detected!("fma")
1146
            {
1147
0
                sincospi_fma_impl
1148
            } else {
1149
0
                fn def_sincospi(x: f64) -> (f64, f64) {
1150
0
                    sincospi_gen_impl(x, GenSinCosPiBackend {})
1151
0
                }
1152
0
                def_sincospi
1153
            }
1154
0
        });
1155
0
        unsafe { q(x) }
1156
    }
1157
0
}
1158
1159
#[cfg(test)]
1160
mod tests {
1161
    use super::*;
1162
1163
    #[test]
1164
    fn test_sinpi() {
1165
        assert_eq!(f_sinpi(262143.50006870925), -0.9999999767029883);
1166
        assert_eq!(f_sinpi(7124076477593855.), 0.);
1167
        assert_eq!(f_sinpi(-11235582092889474000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.), -0.);
1168
        assert_eq!(f_sinpi(-2.7430620343968443e303), -0.0);
1169
        assert_eq!(f_sinpi(0.00003195557007273919), 0.00010039138401316004);
1170
        assert_eq!(f_sinpi(-0.038357843137253766), -0.12021328061499763);
1171
        assert_eq!(f_sinpi(1.0156097449358867), -0.04901980680173724);
1172
        assert_eq!(f_sinpi(74.8593852519989), 0.42752597787896457);
1173
        assert_eq!(f_sinpi(0.500091552734375), 0.9999999586369661);
1174
        assert_eq!(f_sinpi(0.5307886532952182), 0.9953257438106751);
1175
        assert_eq!(f_sinpi(3.1415926535897936), -0.43030121700009316);
1176
        assert_eq!(f_sinpi(-0.5305172747685276), -0.9954077178320563);
1177
        assert_eq!(f_sinpi(-0.03723630312089732), -0.1167146713267927);
1178
        assert_eq!(
1179
            f_sinpi(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022946074000077123),
1180
            0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007208721750737005
1181
        );
1182
        assert_eq!(
1183
            f_sinpi(0.000000000000000000000000000000000000007413093439574428),
1184
            2.3288919890141717e-38
1185
        );
1186
        assert_eq!(f_sinpi(0.0031909299901270445), 0.0100244343161398578);
1187
        assert_eq!(f_sinpi(0.11909245901270445), 0.36547215190661003);
1188
        assert_eq!(f_sinpi(0.99909245901270445), 0.0028511202357662186);
1189
        assert!(f_sinpi(f64::INFINITY).is_nan());
1190
        assert!(f_sinpi(f64::NEG_INFINITY).is_nan());
1191
        assert!(f_sinpi(f64::NAN).is_nan());
1192
    }
1193
1194
    #[test]
1195
    fn test_sincospi() {
1196
        let v0 = f_sincospi(1.0156097449358867);
1197
        assert_eq!(v0.0, f_sinpi(1.0156097449358867));
1198
        assert_eq!(v0.1, f_cospi(1.0156097449358867));
1199
1200
        let v1 = f_sincospi(4503599627370496.);
1201
        assert_eq!(v1.0, f_sinpi(4503599627370496.));
1202
        assert_eq!(v1.1, f_cospi(4503599627370496.));
1203
1204
        let v1 = f_sincospi(-108.);
1205
        assert_eq!(v1.0, f_sinpi(-108.));
1206
        assert_eq!(v1.1, f_cospi(-108.));
1207
1208
        let v1 = f_sincospi(3.);
1209
        assert_eq!(v1.0, f_sinpi(3.));
1210
        assert_eq!(v1.1, f_cospi(3.));
1211
1212
        let v1 = f_sincospi(13.5);
1213
        assert_eq!(v1.0, f_sinpi(13.5));
1214
        assert_eq!(v1.1, f_cospi(13.5));
1215
1216
        let v1 = f_sincospi(7124076477593855.);
1217
        assert_eq!(v1.0, f_sinpi(7124076477593855.));
1218
        assert_eq!(v1.1, f_cospi(7124076477593855.));
1219
1220
        let v1 = f_sincospi(2533419148247186.5);
1221
        assert_eq!(v1.0, f_sinpi(2533419148247186.5));
1222
        assert_eq!(v1.1, f_cospi(2533419148247186.5));
1223
1224
        let v1 = f_sincospi(2.2250653705240375E-308);
1225
        assert_eq!(v1.0, f_sinpi(2.2250653705240375E-308));
1226
        assert_eq!(v1.1, f_cospi(2.2250653705240375E-308));
1227
1228
        let v1 = f_sincospi(2533420818956351.);
1229
        assert_eq!(v1.0, f_sinpi(2533420818956351.));
1230
        assert_eq!(v1.1, f_cospi(2533420818956351.));
1231
1232
        let v1 = f_sincospi(2533822406803233.5);
1233
        assert_eq!(v1.0, f_sinpi(2533822406803233.5));
1234
        assert_eq!(v1.1, f_cospi(2533822406803233.5));
1235
1236
        let v1 = f_sincospi(-3040685725640478.5);
1237
        assert_eq!(v1.0, f_sinpi(-3040685725640478.5));
1238
        assert_eq!(v1.1, f_cospi(-3040685725640478.5));
1239
1240
        let v1 = f_sincospi(2533419148247186.5);
1241
        assert_eq!(v1.0, f_sinpi(2533419148247186.5));
1242
        assert_eq!(v1.1, f_cospi(2533419148247186.5));
1243
1244
        let v1 = f_sincospi(2533420819267583.5);
1245
        assert_eq!(v1.0, f_sinpi(2533420819267583.5));
1246
        assert_eq!(v1.1, f_cospi(2533420819267583.5));
1247
1248
        let v1 = f_sincospi(6979704728846336.);
1249
        assert_eq!(v1.0, f_sinpi(6979704728846336.));
1250
        assert_eq!(v1.1, f_cospi(6979704728846336.));
1251
1252
        let v1 = f_sincospi(7124076477593855.);
1253
        assert_eq!(v1.0, f_sinpi(7124076477593855.));
1254
        assert_eq!(v1.1, f_cospi(7124076477593855.));
1255
1256
        let v1 = f_sincospi(-0.00000000002728839192371484);
1257
        assert_eq!(v1.0, f_sinpi(-0.00000000002728839192371484));
1258
        assert_eq!(v1.1, f_cospi(-0.00000000002728839192371484));
1259
1260
        let v1 = f_sincospi(0.00002465398569495569);
1261
        assert_eq!(v1.0, f_sinpi(0.00002465398569495569));
1262
        assert_eq!(v1.1, f_cospi(0.00002465398569495569));
1263
    }
1264
1265
    #[test]
1266
    fn test_cospi() {
1267
        assert_eq!(0.9999497540959953, f_cospi(0.0031909299901270445));
1268
        assert_eq!(0.9308216542079669, f_cospi(0.11909299901270445));
1269
        assert_eq!(-0.1536194873288318, f_cospi(0.54909299901270445));
1270
        assert!(f_cospi(f64::INFINITY).is_nan());
1271
        assert!(f_cospi(f64::NEG_INFINITY).is_nan());
1272
        assert!(f_cospi(f64::NAN).is_nan());
1273
    }
1274
}