/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.29/src/sincos_reduce.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::bits::set_exponent_f64; |
30 | | use crate::common::{dd_fmla, f_fmla}; |
31 | | use crate::double_double::DoubleDouble; |
32 | | use crate::dyadic_float::{DyadicFloat128, DyadicSign}; |
33 | | use crate::rounding::CpuRound; |
34 | | use crate::sincos_reduce_tables::ONE_TWENTY_EIGHT_OVER_PI; |
35 | | |
36 | | #[derive(Debug)] |
37 | | pub(crate) struct AngleReduced { |
38 | | pub(crate) angle: DoubleDouble, |
39 | | } |
40 | | |
41 | | #[derive(Default)] |
42 | | pub(crate) struct LargeArgumentReduction { |
43 | | x_reduced: f64, |
44 | | idx: u64, |
45 | | y_hi: f64, |
46 | | y_lo: f64, |
47 | | // Low part of x * ONE_TWENTY_EIGHT_OVER_PI[idx][1]. |
48 | | y_mid: DoubleDouble, |
49 | | } |
50 | | |
51 | | // For large range |x| >= 2^16, we perform the range reduction computations as: |
52 | | // u = x - k * pi/128 = (pi/128) * (x * (128/pi) - k). |
53 | | // We use the exponent of x to find 4 double-chunks of 128/pi: |
54 | | // c_hi, c_mid, c_lo, c_lo_2 such that: |
55 | | // 1) ulp(round(x * c_hi, D, RN)) >= 2^8 = 256, |
56 | | // 2) If x * c_hi = ph_hi + ph_lo and x * c_mid = pm_hi + pm_lo, then |
57 | | // min(ulp(ph_lo), ulp(pm_hi)) >= 2^-53. |
58 | | // This will allow us to drop the high part ph_hi and the addition: |
59 | | // (ph_lo + pm_hi) mod 1 |
60 | | // can be exactly representable in a double precision. |
61 | | // This will allow us to do split the computations as: |
62 | | // (x * 256/pi) ~ x * (c_hi + c_mid + c_lo + c_lo_2) (mod 256) |
63 | | // ~ (ph_lo + pm_hi) + (pm_lo + x * c_lo) + x * c_lo_2. |
64 | | // Then, |
65 | | // round(x * 128/pi) = round(ph_lo + pm_hi) (mod 256) |
66 | | // And the high part of fractional part of (x * 128/pi) can simply be: |
67 | | // {x * 128/pi}_hi = {ph_lo + pm_hi}. |
68 | | // To prevent overflow when x is very large, we simply scale up |
69 | | // (c_hi, c_mid, c_lo, c_lo_2) by a fixed power of 2 (based on the index) and |
70 | | // scale down x by the same amount. |
71 | | impl LargeArgumentReduction { |
72 | | #[cold] |
73 | 0 | pub(crate) fn accurate(&self) -> DyadicFloat128 { |
74 | | // Sage math: |
75 | | // R = RealField(128) |
76 | | // π = R.pi() |
77 | | // |
78 | | // def format_hex(value): |
79 | | // l = hex(value)[2:] |
80 | | // n = 4 |
81 | | // x = [l[i:i + n] for i in range(0, len(l), n)] |
82 | | // return "0x" + "_".join(x) + "_u128" |
83 | | // |
84 | | // def print_dyadic(value): |
85 | | // (s, m, e) = RealField(128)(value).sign_mantissa_exponent(); |
86 | | // print("DyadicFloat128 {") |
87 | | // print(f" sign: DyadicSign::{'Pos' if s >= 0 else 'Neg'},") |
88 | | // print(f" exponent: {e},") |
89 | | // print(f" mantissa: {format_hex(m)},") |
90 | | // print("};") |
91 | | // |
92 | | // print_dyadic(π/128) |
93 | | const PI_OVER_128_F128: DyadicFloat128 = DyadicFloat128 { |
94 | | sign: DyadicSign::Pos, |
95 | | exponent: -133, |
96 | | mantissa: 0xc90f_daa2_2168_c234_c4c6_628b_80dc_1cd1_u128, |
97 | | }; |
98 | | |
99 | | // y_lo = x * c_lo + pm.lo |
100 | 0 | let one_pi_rot = ONE_TWENTY_EIGHT_OVER_PI[self.idx as usize]; |
101 | 0 | let y_lo_0 = DyadicFloat128::new_from_f64(self.x_reduced * f64::from_bits(one_pi_rot.3)); |
102 | 0 | let y_lo_1 = DyadicFloat128::new_from_f64(self.y_lo) + y_lo_0; |
103 | 0 | let y_mid_f128 = DyadicFloat128::new_from_f64(self.y_mid.lo) + y_lo_1; |
104 | 0 | let y_hi_f128 = |
105 | 0 | DyadicFloat128::new_from_f64(self.y_hi) + DyadicFloat128::new_from_f64(self.y_mid.hi); |
106 | 0 | let y = y_hi_f128 + y_mid_f128; |
107 | | |
108 | 0 | y * PI_OVER_128_F128 |
109 | 0 | } |
110 | | |
111 | 0 | pub(crate) fn reduce(&mut self, x: f64) -> (u64, DoubleDouble) { |
112 | | const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64; |
113 | 0 | let mut xbits = x.to_bits(); |
114 | 0 | let x_e = ((x.to_bits() >> 52) & 0x7ff) as i64; |
115 | 0 | let x_e_m62 = x_e.wrapping_sub(E_BIAS as i64 + 62); |
116 | 0 | self.idx = (x_e_m62 >> 4).wrapping_add(3) as u64; |
117 | | // Scale x down by 2^(-(16 * (idx - 3)) |
118 | 0 | xbits = set_exponent_f64( |
119 | 0 | xbits, |
120 | 0 | (x_e_m62 & 15) |
121 | 0 | .wrapping_add(E_BIAS as i64) |
122 | 0 | .wrapping_add(62i64) as u64, |
123 | 0 | ); |
124 | | // 2^62 <= |x_reduced| < 2^(62 + 16) = 2^78 |
125 | 0 | self.x_reduced = f64::from_bits(xbits); |
126 | | // x * c_hi = ph.hi + ph.lo exactly. |
127 | 0 | let one_pi = ONE_TWENTY_EIGHT_OVER_PI[self.idx as usize]; |
128 | 0 | let ph = DoubleDouble::from_exact_mult(self.x_reduced, f64::from_bits(one_pi.0)); |
129 | | // x * c_mid = pm.hi + pm.lo exactly. |
130 | 0 | let pm = DoubleDouble::from_exact_mult(self.x_reduced, f64::from_bits(one_pi.1)); |
131 | | // x * c_lo = pl.hi + pl.lo exactly. |
132 | 0 | let pl = DoubleDouble::from_exact_mult(self.x_reduced, f64::from_bits(one_pi.2)); |
133 | | // Extract integral parts and fractional parts of (ph.lo + pm.hi). |
134 | 0 | let sum_hi = ph.lo + pm.hi; |
135 | 0 | let kd = sum_hi.cpu_round(); |
136 | | |
137 | | // x * 128/pi mod 1 ~ y_hi + y_mid + y_lo |
138 | 0 | self.y_hi = (ph.lo - kd) + pm.hi; // Exact |
139 | 0 | self.y_mid = DoubleDouble::from_exact_add(pm.lo, pl.hi); |
140 | 0 | self.y_lo = pl.lo; |
141 | | |
142 | | // y_l = x * c_lo_2 + pl.lo |
143 | 0 | let y_l = dd_fmla(self.x_reduced, f64::from_bits(one_pi.3), self.y_lo); |
144 | 0 | let mut y = DoubleDouble::from_exact_add(self.y_hi, self.y_mid.hi); |
145 | 0 | y.lo += self.y_mid.lo + y_l; |
146 | | |
147 | | // Digits of pi/128, generated by SageMath with: |
148 | | // import struct |
149 | | // from sage.all import * |
150 | | // |
151 | | // def double_to_hex(f): |
152 | | // return "0x" + format(struct.unpack('<Q', struct.pack('<d', f))[0], '016x') |
153 | | // |
154 | | // R = RealField(128) |
155 | | // π = R.pi() |
156 | | // |
157 | | // RN = RealField(53) |
158 | | // |
159 | | // hi = RN(π/128) |
160 | | // lo = RN(π/128 - R(hi)) |
161 | | // |
162 | | // print("lo: " + double_to_hex(lo)) |
163 | | // print("hi: " + double_to_hex(hi)) |
164 | | const PI_OVER_128_DD: DoubleDouble = DoubleDouble::new( |
165 | | f64::from_bits(0x3c31a62633145c07), |
166 | | f64::from_bits(0x3f9921fb54442d18), |
167 | | ); |
168 | | |
169 | | // Error bound: with {a} denote the fractional part of a, i.e.: |
170 | | // {a} = a - round(a) |
171 | | // Then, |
172 | | // | {x * 128/pi} - (y_hi + y_lo) | <= ulp(ulp(y_hi)) <= 2^-105 |
173 | | // | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-105 = 2^-110 |
174 | 0 | let u = DoubleDouble::quick_mult(y, PI_OVER_128_DD); |
175 | | |
176 | 0 | (kd as i64 as u64, u) |
177 | 0 | } |
178 | | } |
179 | | |
180 | | // Generated by SageMath |
181 | | // nwords = 20 |
182 | | // prec = nwords * 64 + 150 |
183 | | // R = RealField(prec) |
184 | | // invpi = R(1) / (R(2) * R.pi()) |
185 | | // |
186 | | // scale = R(2)**64 |
187 | | // |
188 | | // words = [] |
189 | | // x = invpi |
190 | | // for i in range(nwords): |
191 | | // y = floor(x * scale) |
192 | | // words.append(int(y)) |
193 | | // x = x * scale - y |
194 | | // |
195 | | // for w in words: |
196 | | // print("0x{:016x},".format(w)) |
197 | | static INVPI_2_64: [u64; 20] = [ |
198 | | 0x28be60db9391054a, |
199 | | 0x7f09d5f47d4d3770, |
200 | | 0x36d8a5664f10e410, |
201 | | 0x7f9458eaf7aef158, |
202 | | 0x6dc91b8e909374b8, |
203 | | 0x1924bba82746487, |
204 | | 0x3f877ac72c4a69cf, |
205 | | 0xba208d7d4baed121, |
206 | | 0x3a671c09ad17df90, |
207 | | 0x4e64758e60d4ce7d, |
208 | | 0x272117e2ef7e4a0e, |
209 | | 0xc7fe25fff7816603, |
210 | | 0xfbcbc462d6829b47, |
211 | | 0xdb4d9fb3c9f2c26d, |
212 | | 0xd3d18fd9a797fa8b, |
213 | | 0x5d49eeb1faf97c5e, |
214 | | 0xcf41ce7de294a4ba, |
215 | | 0x9afed7ec47e35742, |
216 | | 0x1580cc11bf1edaea, |
217 | | 0xfc33ef0826bd0d87, |
218 | | ]; |
219 | | |
220 | | #[inline] |
221 | 0 | fn create_dd(c1: u64, c0: u64) -> DoubleDouble { |
222 | 0 | let mut c1 = c1; |
223 | 0 | let mut c0 = c0; |
224 | 0 | if c1 != 0 { |
225 | 0 | let e = c1.leading_zeros(); |
226 | 0 | if e != 0 { |
227 | 0 | c1 = (c1 << e) | (c0 >> (64 - e)); |
228 | 0 | c0 = c0.wrapping_shl(e); |
229 | 0 | } |
230 | 0 | let f = 0x3fe - e; |
231 | 0 | let t_u = ((f as u64) << 52) | ((c1 << 1) >> 12); |
232 | 0 | let hi = f64::from_bits(t_u); |
233 | 0 | c0 = (c1 << 53) | (c0 >> 11); |
234 | 0 | let l = if c0 != 0 { |
235 | 0 | let g = c0.leading_zeros(); |
236 | 0 | if (g) != 0 { |
237 | 0 | c0 = c0.wrapping_shl(g); |
238 | 0 | } |
239 | 0 | let t_u = (((f - 53 - g) as u64) << 52) | ((c0 << 1) >> 12); |
240 | 0 | f64::from_bits(t_u) |
241 | | } else { |
242 | 0 | 0. |
243 | | }; |
244 | 0 | DoubleDouble::new(l, hi) |
245 | 0 | } else if c0 != 0 { |
246 | 0 | let e = c0.leading_zeros(); |
247 | 0 | let f = 0x3fe - 64 - e; |
248 | 0 | c0 = c0.wrapping_shl(e + 1); // most significant bit shifted out |
249 | | |
250 | | /* put the upper 52 bits of c0 into h */ |
251 | 0 | let t_u = ((f as u64) << 52) | (c0 >> 12); |
252 | 0 | let hi = f64::from_bits(t_u); |
253 | | /* put the lower 12 bits of c0 into l */ |
254 | 0 | c0 = c0.wrapping_shl(52); |
255 | 0 | let l = if c0 != 0 { |
256 | 0 | let g = c0.leading_zeros(); |
257 | 0 | c0 = c0.wrapping_shl(g + 1); |
258 | 0 | let t_u = (((f - 64 - g) as u64) << 52) | (c0 >> 12); |
259 | 0 | f64::from_bits(t_u) |
260 | | } else { |
261 | 0 | 0. |
262 | | }; |
263 | 0 | DoubleDouble::new(l, hi) |
264 | | } else { |
265 | 0 | DoubleDouble::default() |
266 | | } |
267 | 0 | } |
268 | | |
269 | | #[inline] |
270 | 0 | fn frac_2pi(x: f64) -> DoubleDouble { |
271 | 0 | if x <= f64::from_bits(0x401921fb54442d17) |
272 | | // x < 2*pi |
273 | | { |
274 | | /* | CH+CL - 1/(2pi) | < 2^-110.523 */ |
275 | | const C: DoubleDouble = DoubleDouble::new( |
276 | | f64::from_bits(0xbc66b01ec5417056), |
277 | | f64::from_bits(0x3fc45f306dc9c883), |
278 | | ); |
279 | 0 | let mut z = DoubleDouble::quick_mult_f64(C, x); |
280 | 0 | z.lo = f_fmla(C.lo, x, z.lo); |
281 | 0 | z |
282 | | } else |
283 | | // x > 0x1.921fb54442d17p+2 |
284 | | { |
285 | 0 | let t = x.to_bits(); |
286 | 0 | let mut e = ((t >> 52) & 0x7ff) as i32; /* 1025 <= e <= 2046 */ |
287 | 0 | let m = (1u64 << 52) | (t & 0xfffffffffffffu64); |
288 | | let mut c0: u64; |
289 | | let mut c1: u64; |
290 | | let mut c2: u64; |
291 | | // x = m/2^53 * 2^(e-1022) |
292 | 0 | if e <= 1074 |
293 | | // 1025 <= e <= 1074: 2^2 <= x < 2^52 |
294 | 0 | { |
295 | 0 | let mut u = m as u128 * INVPI_2_64[1] as u128; |
296 | 0 | c0 = u as u64; |
297 | 0 | c1 = (u >> 64) as u64; |
298 | 0 | u = m as u128 * INVPI_2_64[0] as u128; |
299 | 0 | c1 = c1.wrapping_add(u as u64); |
300 | 0 | c2 = (u >> 64) as u64 + (c1 < (u as u64)) as u64; |
301 | 0 | e = 1075 - e; // 1 <= e <= 50 |
302 | 0 | } else |
303 | | // 1075 <= e <= 2046, 2^52 <= x < 2^1024 |
304 | 0 | { |
305 | 0 | let i = (e - 1138 + 63) / 64; // i = ceil((e-1138)/64), 0 <= i <= 15 |
306 | 0 | let mut u = m as u128 * INVPI_2_64[i as usize + 2] as u128; |
307 | 0 | c0 = u as u64; |
308 | 0 | c1 = (u >> 64) as u64; |
309 | 0 | u = m as u128 * INVPI_2_64[i as usize + 1] as u128; |
310 | 0 | c1 = c1.wrapping_add(u as u64); |
311 | 0 | c2 = (u >> 64) as u64 + ((c1) < (u as u64)) as u64; |
312 | 0 | u = m as u128 * INVPI_2_64[i as usize] as u128; |
313 | 0 | c2 = c2.wrapping_add(u as u64); |
314 | 0 | e = 1139 + (i << 6) - e; // 1 <= e <= 64 |
315 | 0 | } |
316 | 0 | if e == 64 { |
317 | 0 | c0 = c1; |
318 | 0 | c1 = c2; |
319 | 0 | } else { |
320 | 0 | c0 = (c1 << (64 - e)) | c0 >> e; |
321 | 0 | c1 = (c2 << (64 - e)) | c1 >> e; |
322 | 0 | } |
323 | 0 | create_dd(c1, c0) |
324 | | } |
325 | 0 | } |
326 | | |
327 | | /// Returns x mod 2*PI |
328 | | #[inline] |
329 | 0 | pub(crate) fn rem2pi_any(x: f64) -> AngleReduced { |
330 | | const TWO_PI: DoubleDouble = DoubleDouble::new( |
331 | | f64::from_bits(0x3cb1a62633145c07), |
332 | | f64::from_bits(0x401921fb54442d18), |
333 | | ); |
334 | 0 | let a = frac_2pi(x); |
335 | 0 | let z = DoubleDouble::quick_mult(a, TWO_PI); |
336 | 0 | AngleReduced { angle: z } |
337 | 0 | } |
338 | | |
339 | | /** |
340 | | Generated by SageMath: |
341 | | ```python |
342 | | def triple_double_split(x, n): |
343 | | VR = RealField(1000) |
344 | | R32 = RealField(n) |
345 | | hi = R32(x) |
346 | | rem1 = x - VR(hi) |
347 | | med = R32(rem1) |
348 | | rem2 = rem1 - VR(med) |
349 | | lo = R32(rem2) |
350 | | rem2 = rem1 - VR(med) |
351 | | lo = R32(rem2) |
352 | | return (hi, med, lo) |
353 | | |
354 | | hi, med, lo = triple_double_split((RealField(600).pi() * RealField(600)(2)), 51) |
355 | | |
356 | | print(double_to_hex(hi)) |
357 | | print(double_to_hex(med)) |
358 | | print(double_to_hex(lo)) |
359 | | ``` |
360 | | **/ |
361 | | #[inline] |
362 | 0 | fn rem2pif_small(x: f32) -> f64 { |
363 | | const ONE_OVER_PI_2: f64 = f64::from_bits(0x3fc45f306dc9c883); |
364 | | const TWO_PI: [u64; 3] = [0x401921fb54440000, 0x3da68c234c4c0000, 0x3b498a2e03707345]; |
365 | 0 | let dx = x as f64; |
366 | 0 | let kd = (dx * ONE_OVER_PI_2).cpu_round(); |
367 | 0 | let mut y = dd_fmla(-kd, f64::from_bits(TWO_PI[0]), dx); |
368 | 0 | y = dd_fmla(-kd, f64::from_bits(TWO_PI[1]), y); |
369 | 0 | y = dd_fmla(-kd, f64::from_bits(TWO_PI[2]), y); |
370 | 0 | y |
371 | 0 | } |
372 | | |
373 | | #[inline] |
374 | 0 | pub(crate) fn rem2pif_any(x: f32) -> f64 { |
375 | 0 | let x_abs = x.abs(); |
376 | 0 | if x_abs.to_bits() < 0x5600_0000u32 { |
377 | 0 | rem2pif_small(x_abs) |
378 | | } else { |
379 | 0 | let p = rem2pi_any(x_abs as f64); |
380 | 0 | p.angle.to_f64() |
381 | | } |
382 | 0 | } |
383 | | |
384 | 0 | pub(crate) fn frac2pi_d128(x: DyadicFloat128) -> DyadicFloat128 { |
385 | 0 | let e = x.biased_exponent(); |
386 | | |
387 | 0 | let mut fe = x; |
388 | | |
389 | 0 | if e <= 1 |
390 | | // |X| < 2 |
391 | | { |
392 | | /* multiply by T[0]/2^64 + T[1]/2^128, where |
393 | | |T[0]/2^64 + T[1]/2^128 - 1/(2pi)| < 2^-130.22 */ |
394 | 0 | let mut x_hi = (x.mantissa >> 64) as u64; |
395 | | let mut x_lo: u64; |
396 | 0 | let mut u: u128 = x_hi as u128 * INVPI_2_64[1] as u128; |
397 | 0 | let tiny: u64 = u as u64; |
398 | 0 | x_lo = (u >> 64) as u64; |
399 | 0 | u = x_hi as u128 * INVPI_2_64[0] as u128; |
400 | 0 | x_lo = x_lo.wrapping_add(u as u64); |
401 | 0 | x_hi = (u >> 64) as u64 + (x_lo < u as u64) as u64; |
402 | | /* hi + lo/2^64 + tiny/2^128 = hi_in * (T[0]/2^64 + T[1]/2^128) thus |
403 | | |hi + lo/2^64 + tiny/2^128 - hi_in/(2*pi)| < hi_in * 2^-130.22 |
404 | | Since X is normalized at input, hi_in >= 2^63, and since T[0] >= 2^61, |
405 | | we have hi >= 2^(63+61-64) = 2^60, thus the normalize() below |
406 | | perform a left shift by at most 3 bits */ |
407 | 0 | let mut e = x.exponent; |
408 | 0 | fe.mantissa = (x_hi as u128).wrapping_shl(64) | (x_lo as u128); |
409 | 0 | fe.normalize(); |
410 | 0 | e -= fe.exponent; |
411 | | // put the upper e bits of tiny into X->lo |
412 | 0 | if (e) != 0 { |
413 | 0 | x_hi = (fe.mantissa >> 64) as u64; |
414 | 0 | x_lo = (fe.mantissa & 0xffff_ffff_ffff_ffff) as u64; |
415 | 0 | x_lo |= tiny >> (64 - e); |
416 | 0 | fe.mantissa = (x_hi as u128).wrapping_shl(64) | (x_lo as u128); |
417 | 0 | } |
418 | | /* The error is bounded by 2^-130.22 (relative) + ulp(lo) (absolute). |
419 | | Since now X->hi >= 2^63, the absolute error of ulp(lo) converts into |
420 | | a relative error of less than 2^-127. |
421 | | This yields a maximal relative error of: |
422 | | (1 + 2^-130.22) * (1 + 2^-127) - 1 < 2^-126.852. |
423 | | */ |
424 | 0 | return fe; |
425 | 0 | } |
426 | | |
427 | | // now 2 <= e <= 1024 |
428 | | |
429 | | /* The upper 64-bit word X->hi corresponds to hi/2^64*2^e, if multiplied by |
430 | | T[i]/2^((i+1)*64) it yields hi*T[i]/2^128 * 2^(e-i*64). |
431 | | If e-64i <= -128, it contributes to less than 2^-128; |
432 | | if e-64i >= 128, it yields an integer, which is 0 modulo 1. |
433 | | We thus only consider the values of i such that -127 <= e-64i <= 127, |
434 | | i.e., (-127+e)/64 <= i <= (127+e)/64. |
435 | | Up to 4 consecutive values of T[i] can contribute (only 3 when e is a |
436 | | multiple of 64). */ |
437 | 0 | let i = (if e < 127 { 0 } else { (e - 127 + 64 - 1) / 64 }) as usize; // ceil((e-127)/64) |
438 | | // 0 <= i <= 15 |
439 | 0 | let mut c: [u64; 5] = [0u64; 5]; |
440 | | |
441 | 0 | let mut x_hi = (x.mantissa >> 64) as u64; |
442 | | let mut x_lo: u64; |
443 | | |
444 | 0 | let mut u: u128 = x_hi as u128 * INVPI_2_64[i + 3] as u128; // i+3 <= 18 |
445 | 0 | c[0] = u as u64; |
446 | 0 | c[1] = (u >> 64) as u64; |
447 | 0 | u = x_hi as u128 * INVPI_2_64[i + 2] as u128; |
448 | 0 | c[1] = c[1].wrapping_add(u as u64); |
449 | 0 | c[2] = (u >> 64) as u64 + (c[1] < u as u64) as u64; |
450 | 0 | u = x_hi as u128 * INVPI_2_64[i + 1] as u128; |
451 | 0 | c[2] = c[2].wrapping_add(u as u64); |
452 | 0 | c[3] = (u >> 64) as u64 + (c[2] < u as u64) as u64; |
453 | 0 | u = x_hi as u128 * INVPI_2_64[i] as u128; |
454 | 0 | c[3] = c[3].wrapping_add(u as u64); |
455 | 0 | c[4] = (u >> 64) as u64 + (c[3] < u as u64) as u64; |
456 | | |
457 | 0 | let f = e as i32 - 64 * i as i32; // hi*T[i]/2^128 is multiplied by 2^f |
458 | | |
459 | | /* {c, 5} = hi*(T[i]+T[i+1]/2^64+T[i+2]/2^128+T[i+3]/2^192) */ |
460 | | /* now shift c[0..4] by f bits to the left */ |
461 | | let tiny; |
462 | 0 | if f < 64 { |
463 | 0 | x_hi = (c[4] << f) | (c[3] >> (64 - f)); |
464 | 0 | x_lo = (c[3] << f) | (c[2] >> (64 - f)); |
465 | 0 | tiny = (c[2] << f) | (c[1] >> (64 - f)); |
466 | 0 | /* the ignored part was less than 1 in c[1], |
467 | 0 | thus less than 2^(f-64) <= 1/2 in tiny */ |
468 | 0 | } else if f == 64 { |
469 | 0 | x_hi = c[3]; |
470 | 0 | x_lo = c[2]; |
471 | 0 | tiny = c[1]; |
472 | 0 | /* the ignored part was less than 1 in c[1], |
473 | 0 | thus less than 1 in tiny */ |
474 | 0 | } else |
475 | | /* 65 <= f <= 127: this case can only occur when e >= 65 */ |
476 | | { |
477 | 0 | let g = f - 64; /* 1 <= g <= 63 */ |
478 | | /* we compute an extra term */ |
479 | 0 | u = x_hi as u128 * INVPI_2_64[i + 4] as u128; // i+4 <= 19 |
480 | 0 | u >>= 64; |
481 | 0 | c[0] = c[0].wrapping_add(u as u64); |
482 | 0 | c[1] += (c[0] < u as u64) as u64; |
483 | 0 | c[2] += ((c[0] < u as u64) && c[1] == 0) as u64; |
484 | 0 | c[3] += ((c[0] < u as u64) && c[1] == 0 && c[2] == 0) as u64; |
485 | 0 | c[4] += ((c[0] < u as u64) && c[1] == 0 && c[2] == 0 && c[3] == 0) as u64; |
486 | 0 | x_hi = (c[3] << g) | (c[2] >> (64 - g)); |
487 | 0 | x_lo = (c[2] << g) | (c[1] >> (64 - g)); |
488 | 0 | tiny = (c[1] << g) | (c[0] >> (64 - g)); |
489 | | /* the ignored part was less than 1 in c[0], |
490 | | thus less than 1/2 in tiny */ |
491 | | } |
492 | 0 | let mut fe = x; |
493 | 0 | fe.exponent = -127; |
494 | 0 | fe.mantissa = (x_hi as u128).wrapping_shl(64) | (x_lo as u128); |
495 | 0 | fe.normalize(); |
496 | 0 | let ze = fe.biased_exponent(); |
497 | 0 | if ze < 0 { |
498 | 0 | x_hi = (fe.mantissa >> 64) as u64; |
499 | 0 | x_lo = (fe.mantissa & 0xffff_ffff_ffff_ffff) as u64; |
500 | 0 | x_lo |= tiny >> (64 + ze); |
501 | 0 | fe.mantissa = (x_hi as u128).wrapping_shl(64) | (x_lo as u128); |
502 | 0 | } |
503 | 0 | fe |
504 | 0 | } |
505 | | |
506 | 0 | pub(crate) fn rem2pi_f128(x: DyadicFloat128) -> DyadicFloat128 { |
507 | | /* |
508 | | Generated by SageMath: |
509 | | ```python |
510 | | def double_to_hex(f): |
511 | | # Converts Python float (f64) to hex string |
512 | | packed = struct.pack('>d', float(f)) |
513 | | return '0x' + packed.hex() |
514 | | |
515 | | def format_dyadic_hex(value): |
516 | | l = hex(value)[2:] |
517 | | n = 8 |
518 | | x = [l[i:i + n] for i in range(0, len(l), n)] |
519 | | return "0x" + "_".join(x) + "_u128" |
520 | | |
521 | | def print_dyadic(value): |
522 | | (s, m, e) = RealField(128)(value).sign_mantissa_exponent(); |
523 | | print("DyadicFloat128 {") |
524 | | print(f" sign: DyadicSign::{'Pos' if s >= 0 else 'Neg'},") |
525 | | print(f" exponent: {e},") |
526 | | print(f" mantissa: {format_dyadic_hex(m)},") |
527 | | print("},") |
528 | | |
529 | | print_dyadic(RealField(300).pi() * 2) |
530 | | ``` |
531 | | */ |
532 | | const TWO_PI: DyadicFloat128 = DyadicFloat128 { |
533 | | sign: DyadicSign::Pos, |
534 | | exponent: -125, |
535 | | mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128, |
536 | | }; |
537 | 0 | let frac2pi = frac2pi_d128(x); |
538 | 0 | TWO_PI * frac2pi |
539 | 0 | } |
540 | | // |
541 | | // #[cfg(test)] |
542 | | // mod tests { |
543 | | // use crate::dyadic_float::DyadicFloat128; |
544 | | // use crate::sincos_reduce::{frac_2pi, frac2pi_d128}; |
545 | | // |
546 | | // #[test] |
547 | | // fn test_reduce() { |
548 | | // let x = 1.8481363e36f32; |
549 | | // let reduced = frac_2pi(x as f64); |
550 | | // let to_reduced2 = DyadicFloat128::new_from_f64(x as f64); |
551 | | // let reduced2 = frac2pi_d128(to_reduced2); |
552 | | // println!("{:?}", reduced); |
553 | | // println!("{:?}", reduced2.fast_as_f64()); |
554 | | // } |
555 | | // } |