/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/cosm1.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::f_fmla; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::polyeval::f_polyeval4; |
32 | | use crate::sin::{range_reduction_small, sincos_eval}; |
33 | | use crate::sin_helper::sincos_eval_dd; |
34 | | use crate::sin_table::SIN_K_PI_OVER_128; |
35 | | use crate::sincos_reduce::LargeArgumentReduction; |
36 | | |
37 | | #[cold] |
38 | | #[inline(never)] |
39 | 0 | fn cosm1_accurate(y: DoubleDouble, sin_k: DoubleDouble, cos_k: DoubleDouble) -> f64 { |
40 | 0 | let r_sincos = sincos_eval_dd(y); |
41 | | |
42 | | // k is an integer and -pi / 256 <= y <= pi / 256. |
43 | | // Then sin(x) = sin((k * pi/128 + y) |
44 | | // = sin(y) * cos(k*pi/128) + cos(y) * sin(k*pi/128) |
45 | | |
46 | 0 | let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k); |
47 | 0 | let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k); |
48 | | |
49 | 0 | let mut rr = DoubleDouble::full_dd_add(sin_k_cos_y, cos_k_sin_y); |
50 | | |
51 | | // Computing cos(x) - 1 as follows: |
52 | | // cos(x) - 1 = -2*sin^2(x/2) |
53 | 0 | rr = DoubleDouble::from_exact_add(rr.hi, rr.lo); |
54 | 0 | rr = DoubleDouble::quick_mult(rr, rr); |
55 | 0 | rr = DoubleDouble::quick_mult_f64(rr, -2.); |
56 | | |
57 | 0 | rr.to_f64() |
58 | 0 | } |
59 | | |
60 | | #[cold] |
61 | 0 | fn cosm1_tiny_hard(x: f64) -> f64 { |
62 | | // Generated by Sollya: |
63 | | // d = [2^-27, 2^-7]; |
64 | | // f_cosm1 = cos(x) - 1; |
65 | | // Q = fpminimax(f_cosm1, [|2,4,6,8|], [|0, 107...|], d); |
66 | | // See ./notes/cosm1_hard.sollya |
67 | | const C: [(u64, u64); 3] = [ |
68 | | (0x3c453997dc8ae20d, 0x3fa5555555555555), |
69 | | (0x3bf6100c76a1827a, 0xbf56c16c16c15749), |
70 | | (0x3b918f45acdd1fb2, 0x3efa019ddf5a583a), |
71 | | ]; |
72 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
73 | 0 | let mut p = DoubleDouble::mul_add( |
74 | 0 | x2, |
75 | 0 | DoubleDouble::from_bit_pair(C[2]), |
76 | 0 | DoubleDouble::from_bit_pair(C[1]), |
77 | | ); |
78 | 0 | p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[0])); |
79 | 0 | p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(0xbfe0000000000000)); |
80 | 0 | p = DoubleDouble::quick_mult(p, x2); |
81 | 0 | p.to_f64() |
82 | 0 | } |
83 | | |
84 | | /// Computes cos(x) - 1 |
85 | 0 | pub fn f_cosm1(x: f64) -> f64 { |
86 | 0 | let x_e = (x.to_bits() >> 52) & 0x7ff; |
87 | | const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64; |
88 | | |
89 | | let y: DoubleDouble; |
90 | | let k; |
91 | | |
92 | 0 | let mut argument_reduction = LargeArgumentReduction::default(); |
93 | | |
94 | | // |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA) |
95 | 0 | if x_e < E_BIAS + 16 { |
96 | | // |x| < 2^-7 |
97 | 0 | if x_e < E_BIAS - 7 { |
98 | | // |x| < 2^-26 |
99 | 0 | if x_e < E_BIAS - 27 { |
100 | | // Signed zeros. |
101 | 0 | if x == 0.0 { |
102 | 0 | return 0.0; |
103 | 0 | } |
104 | | // Taylor expansion for small cos(x) - 1 ~ -x^2/2 + x^4/24 + O(x^6) |
105 | 0 | let x_sqr = x * x; |
106 | | const A0: f64 = -1. / 2.; |
107 | | const A1: f64 = 1. / 24.; |
108 | 0 | let r0 = f_fmla(x_sqr, A1, A0); |
109 | 0 | return r0 * x_sqr; |
110 | 0 | } |
111 | | |
112 | | // Generated by Sollya: |
113 | | // d = [2^-27, 2^-7]; |
114 | | // f_cosm1 = (cos(x) - 1); |
115 | | // Q = fpminimax(f_cosm1, [|2,4,6,8|], [|0, D...|], d); |
116 | | // See ./notes/cosm1.sollya |
117 | | |
118 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
119 | 0 | let p = f_polyeval4( |
120 | 0 | x2.hi, |
121 | 0 | f64::from_bits(0xbfe0000000000000), |
122 | 0 | f64::from_bits(0x3fa5555555555555), |
123 | 0 | f64::from_bits(0xbf56c16c16b9c2b7), |
124 | 0 | f64::from_bits(0x3efa014d03f38855), |
125 | | ); |
126 | | |
127 | 0 | let r = DoubleDouble::quick_mult_f64(x2, p); |
128 | | |
129 | 0 | let eps = x * f_fmla( |
130 | 0 | x2.hi, |
131 | 0 | f64::from_bits(0x3d00000000000000), // 2^-47 |
132 | 0 | f64::from_bits(0x3be0000000000000), // 2^-65 |
133 | 0 | ); |
134 | | |
135 | 0 | let ub = r.hi + (r.lo + eps); |
136 | 0 | let lb = r.hi + (r.lo - eps); |
137 | 0 | if ub == lb { |
138 | 0 | return r.to_f64(); |
139 | 0 | } |
140 | 0 | return cosm1_tiny_hard(x); |
141 | 0 | } else { |
142 | 0 | // // Small range reduction. |
143 | 0 | (y, k) = range_reduction_small(x * 0.5); |
144 | 0 | } |
145 | | } else { |
146 | | // Inf or NaN |
147 | 0 | if x_e > 2 * E_BIAS { |
148 | | // cos(+-Inf) = NaN |
149 | 0 | return x + f64::NAN; |
150 | 0 | } |
151 | | |
152 | | // Large range reduction. |
153 | | // k = argument_reduction.high_part(x); |
154 | 0 | (k, y) = argument_reduction.reduce(x * 0.5); |
155 | | } |
156 | | |
157 | | // Computing cos(x) - 1 as follows: |
158 | | // cos(x) - 1 = -2*sin^2(x/2) |
159 | | |
160 | 0 | let r_sincos = sincos_eval(y); |
161 | | |
162 | | // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128). |
163 | 0 | let sk = SIN_K_PI_OVER_128[(k & 255) as usize]; |
164 | 0 | let ck = SIN_K_PI_OVER_128[((k.wrapping_add(64)) & 255) as usize]; |
165 | | |
166 | 0 | let sin_k = DoubleDouble::from_bit_pair(sk); |
167 | 0 | let cos_k = DoubleDouble::from_bit_pair(ck); |
168 | | |
169 | 0 | let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k); |
170 | 0 | let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k); |
171 | | |
172 | | // sin_k_cos_y is always >> cos_k_sin_y |
173 | 0 | let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi); |
174 | 0 | rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo; |
175 | | |
176 | 0 | rr = DoubleDouble::from_exact_add(rr.hi, rr.lo); |
177 | 0 | rr = DoubleDouble::quick_mult(rr, rr); |
178 | 0 | rr = DoubleDouble::quick_mult_f64(rr, -2.); |
179 | | |
180 | 0 | let rlp = rr.lo + r_sincos.err; |
181 | 0 | let rlm = rr.lo - r_sincos.err; |
182 | | |
183 | 0 | let r_upper = rr.hi + rlp; // (rr.lo + ERR); |
184 | 0 | let r_lower = rr.hi + rlm; // (rr.lo - ERR); |
185 | | |
186 | | // Ziv's accuracy test |
187 | 0 | if r_upper == r_lower { |
188 | 0 | return rr.to_f64(); |
189 | 0 | } |
190 | | |
191 | 0 | cosm1_accurate(y, sin_k, cos_k) |
192 | 0 | } |
193 | | |
194 | | #[cfg(test)] |
195 | | mod tests { |
196 | | use super::*; |
197 | | |
198 | | #[test] |
199 | | fn f_cosm1f_test() { |
200 | | assert_eq!(f_cosm1(0.0017700195313803402), -0.000001566484161754997); |
201 | | assert_eq!( |
202 | | f_cosm1(0.0000000011641532182693484), |
203 | | -0.0000000000000000006776263578034406 |
204 | | ); |
205 | | assert_eq!(f_cosm1(0.006164513528517324), -0.000019000553351160402); |
206 | | assert_eq!(f_cosm1(6.2831853071795862), -2.999519565323715e-32); |
207 | | assert_eq!(f_cosm1(0.00015928394), -1.2685686744140693e-8); |
208 | | assert_eq!(f_cosm1(0.0), 0.0); |
209 | | assert_eq!(f_cosm1(0.0), 0.0); |
210 | | assert_eq!(f_cosm1(std::f64::consts::PI), -2.); |
211 | | assert_eq!(f_cosm1(0.5), -0.12241743810962728); |
212 | | assert_eq!(f_cosm1(0.7), -0.23515781271551153); |
213 | | assert_eq!(f_cosm1(1.7), -1.1288444942955247); |
214 | | assert!(f_cosm1(f64::INFINITY).is_nan()); |
215 | | assert!(f_cosm1(f64::NEG_INFINITY).is_nan()); |
216 | | assert!(f_cosm1(f64::NAN).is_nan()); |
217 | | assert_eq!(f_cosm1(0.0002480338), -3.0760382813519806e-8); |
218 | | } |
219 | | } |