/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.26/src/compound/compoundf.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::{dd_fmla, is_integerf}; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::rounding::CpuRoundTiesEven; |
32 | | use std::hint::black_box; |
33 | | |
34 | | #[cold] |
35 | | #[inline(never)] |
36 | 0 | fn as_compoundf_special(x: f32, y: f32) -> f32 { |
37 | 0 | let nx = x.to_bits(); |
38 | 0 | let ny = y.to_bits(); |
39 | 0 | let ax: u32 = nx.wrapping_shl(1); |
40 | 0 | let ay: u32 = ny.wrapping_shl(1); |
41 | | |
42 | 0 | if ax == 0 || ay == 0 { |
43 | | // x or y is 0 |
44 | 0 | if ax == 0 { |
45 | | // compound(0,y) = 1 except for y = sNaN |
46 | 0 | return if y.is_nan() { x + y } else { 1.0 }; |
47 | 0 | } |
48 | | |
49 | 0 | if ay == 0 { |
50 | | // compound (x, 0) |
51 | 0 | if x.is_nan() { |
52 | 0 | return x + y; |
53 | 0 | } // x = sNaN |
54 | 0 | return if x < -1.0 { |
55 | 0 | f32::NAN // rule (g) |
56 | | } else { |
57 | 0 | 1.0 |
58 | | }; // rule (a) |
59 | 0 | } |
60 | 0 | } |
61 | | |
62 | 0 | let mone = (-1.0f32).to_bits(); |
63 | 0 | if ay >= 0xffu32 << 24 { |
64 | | // y=Inf/NaN |
65 | | // the case x=0 was already checked above |
66 | 0 | if ax > 0xffu32 << 24 { |
67 | 0 | return x + y; |
68 | 0 | } // x=NaN |
69 | 0 | if ay == 0xffu32 << 24 { |
70 | | // y = +/-Inf |
71 | 0 | if nx > mone { |
72 | 0 | return f32::NAN; |
73 | 0 | } // rule (g) |
74 | 0 | let sy = ny >> 31; // sign bit of y |
75 | 0 | if nx == mone { |
76 | 0 | return if sy == 0 { |
77 | 0 | 0.0 // Rule (c) |
78 | | } else { |
79 | 0 | f32::INFINITY // Rule (b) |
80 | | }; |
81 | 0 | } |
82 | 0 | if x < 0.0 { |
83 | 0 | return if sy == 0 { 0.0 } else { f32::INFINITY }; |
84 | 0 | } |
85 | 0 | if x > 0.0 { |
86 | 0 | return if sy != 0 { 0.0 } else { f32::INFINITY }; |
87 | 0 | } |
88 | 0 | return 1.0; |
89 | 0 | } |
90 | 0 | return x + y; // case y=NaN |
91 | 0 | } |
92 | | |
93 | 0 | if nx >= mone || nx >= 0xffu32 << 23 { |
94 | | // x is Inf, NaN or <= -1 |
95 | 0 | if ax == 0xffu32 << 24 { |
96 | | // x is +Inf or -Inf |
97 | 0 | if (nx >> 31) != 0 { |
98 | 0 | return f32::NAN; |
99 | 0 | } // x = -Inf, rule (g) |
100 | | // (1 + Inf)^y = +Inf for y > 0, +0 for y < 0 |
101 | 0 | return if (ny >> 31) != 0 { 1.0 / x } else { x }; |
102 | 0 | } |
103 | 0 | if ax > 0xffu32 << 24 { |
104 | 0 | return x + y; |
105 | 0 | } // x is NaN |
106 | 0 | if nx > mone { |
107 | 0 | return f32::NAN; // x < -1.0: rule (g) |
108 | 0 | } |
109 | | // now x = -1 |
110 | 0 | return if (ny >> 31) != 0 { |
111 | | // y < 0 |
112 | 0 | f32::INFINITY |
113 | | } else { |
114 | | // y > 0 |
115 | 0 | 0.0 |
116 | | }; |
117 | 0 | } |
118 | 0 | 0.0 |
119 | 0 | } |
120 | | |
121 | | #[inline] |
122 | 0 | pub(crate) fn log2p1_polyeval_1(z: f64) -> f64 { |
123 | | // we include P[0] = 0 so that P[i] corresponds to degree i |
124 | | // this degree-8 polynomial generated by Sollya (cf p1.sollya) |
125 | | // has relative error < 2^-50.98 |
126 | | const P: [u64; 8] = [ |
127 | | 0x0000000000000000, |
128 | | 0x3ff71547652b82fe, |
129 | | 0xbfe71547652b8d11, |
130 | | 0x3fdec709dc3a5014, |
131 | | 0xbfd715475b144983, |
132 | | 0x3fd2776c3fda300e, |
133 | | 0xbfcec990162358ce, |
134 | | 0x3fca645337c29e27, |
135 | | ]; |
136 | | |
137 | 0 | let z2 = z * z; |
138 | 0 | let mut c5 = dd_fmla(f64::from_bits(P[6]), z, f64::from_bits(P[5])); |
139 | 0 | let c3 = dd_fmla(f64::from_bits(P[4]), z, f64::from_bits(P[3])); |
140 | 0 | let mut c1 = dd_fmla(f64::from_bits(P[2]), z, f64::from_bits(P[1])); |
141 | 0 | let z4 = z2 * z2; |
142 | 0 | c5 = dd_fmla(f64::from_bits(P[7]), z2, c5); |
143 | 0 | c1 = dd_fmla(c3, z2, c1); |
144 | 0 | c1 = dd_fmla(c5, z4, c1); |
145 | 0 | z * c1 |
146 | 0 | } |
147 | | |
148 | | // for 0<=i<46, inv[i] approximates 1/t for 1/2+(i+13)/64 <= t < 1/2+(i+14)/64 |
149 | | pub(crate) static LOG2P1_COMPOUNDF_INV: [u64; 46] = [ |
150 | | 0x3ff6800000000000, |
151 | | 0x3ff6000000000000, |
152 | | 0x3ff5800000000000, |
153 | | 0x3ff5000000000000, |
154 | | 0x3ff4c00000000000, |
155 | | 0x3ff4400000000000, |
156 | | 0x3ff4000000000000, |
157 | | 0x3ff3800000000000, |
158 | | 0x3ff3400000000000, |
159 | | 0x3ff2c00000000000, |
160 | | 0x3ff2800000000000, |
161 | | 0x3ff2000000000000, |
162 | | 0x3ff1c00000000000, |
163 | | 0x3ff1800000000000, |
164 | | 0x3ff1400000000000, |
165 | | 0x3ff1000000000000, |
166 | | 0x3ff0c00000000000, |
167 | | 0x3ff0800000000000, |
168 | | 0x3ff0000000000000, |
169 | | 0x3ff0000000000000, |
170 | | 0x3fef400000000000, |
171 | | 0x3feec00000000000, |
172 | | 0x3fee400000000000, |
173 | | 0x3fee000000000000, |
174 | | 0x3fed800000000000, |
175 | | 0x3fed000000000000, |
176 | | 0x3feca00000000000, |
177 | | 0x3fec400000000000, |
178 | | 0x3febe00000000000, |
179 | | 0x3feb800000000000, |
180 | | 0x3feb200000000000, |
181 | | 0x3feac00000000000, |
182 | | 0x3fea800000000000, |
183 | | 0x3fea200000000000, |
184 | | 0x3fe9c00000000000, |
185 | | 0x3fe9800000000000, |
186 | | 0x3fe9200000000000, |
187 | | 0x3fe8c00000000000, |
188 | | 0x3fe8800000000000, |
189 | | 0x3fe8400000000000, |
190 | | 0x3fe8000000000000, |
191 | | 0x3fe7c00000000000, |
192 | | 0x3fe7600000000000, |
193 | | 0x3fe7200000000000, |
194 | | 0x3fe6e00000000000, |
195 | | 0x3fe6a00000000000, |
196 | | ]; |
197 | | |
198 | | /* log2inv[i][0]+log2inv[i][1] is a double-double approximation of |
199 | | -log2(inv[i]), with log2inv[i][0] having absolute error < 2^-54.462, |
200 | | and log2inv[i][0]+log2inv[i][1] absolute error < 2^-109.101 */ |
201 | | pub(crate) static LOG2P1_COMPOUNDF_LOG2_INV: [(u64, u64); 46] = [ |
202 | | (0x3c68f3673ffdd785, 0xbfdf7a8568cb06cf), |
203 | | (0x3c1c141e66faaaad, 0xbfdd6753e032ea0f), |
204 | | (0x3c76fae441c09d76, 0xbfdb47ebf73882a1), |
205 | | (0x3c72d352bea51e59, 0xbfd91bba891f1709), |
206 | | (0xbc69575b04fa6fbd, 0xbfd800a563161c54), |
207 | | (0x3c7817fd3b7d7e5d, 0xbfd5c01a39fbd688), |
208 | | (0x3c1b6d40900b2502, 0xbfd49a784bcd1b8b), |
209 | | (0x3c7f6e91ad16ecff, 0xbfd24407ab0e073a), |
210 | | (0x3c6a7b47d2c352d9, 0xbfd11307dad30b76), |
211 | | (0x3c5b85a54d7ee2fd, 0xbfcd49ee4c325970), |
212 | | (0x3c401ee1343fe7ca, 0xbfcacf5e2db4ec94), |
213 | | (0x3c6817fd3b7d7e5d, 0xbfc5c01a39fbd688), |
214 | | (0xbc4f51f2c075a74c, 0xbfc32ae9e278ae1a), |
215 | | (0x3c6a7610e40bd6ab, 0xbfc08c588cda79e4), |
216 | | (0xbc58ecb169b9465f, 0xbfbbc84240adabba), |
217 | | (0xbc5f3314e0985116, 0xbfb663f6fac91316), |
218 | | (0x3c530c22d15199b8, 0xbfb0eb389fa29f9b), |
219 | | (0xbc389b03784b5be1, 0xbfa6bad3758efd87), |
220 | | (0x0000000000000000, 0x0000000000000000), |
221 | | (0x0000000000000000, 0x0000000000000000), |
222 | | (0x3c3491f06c085bc2, 0x3fa184b8e4c56af8), |
223 | | (0x3c0155660710eb2a, 0x3fad6ebd1f1febfe), |
224 | | (0x3c2c141e66faaaad, 0x3fb4c560fe68af88), |
225 | | (0x3c59ced1447e30ad, 0x3fb7d60496cfbb4c), |
226 | | (0x3c592ce9636c90a0, 0x3fbe0b1ae8f2fd56), |
227 | | (0xbc5696e2866c718e, 0x3fc22dadc2ab3497), |
228 | | (0xbc61562d61af73f8, 0x3fc494f863b8df35), |
229 | | (0xbc60798d1aa21694, 0x3fc7046031c79f85), |
230 | | (0xbc6e95734abd2fcc, 0x3fc97c1cb13c7ec1), |
231 | | (0x3c2bc0af7b82e7d7, 0x3fcbfc67a7fff4cc), |
232 | | (0xbc6086fce864a1f6, 0x3fce857d3d361368), |
233 | | (0xbc53d56efe4338fe, 0x3fd08bce0d95fa38), |
234 | | (0x3c7c8d43e017579b, 0x3fd169c05363f158), |
235 | | (0xbc50132ae5e417cd, 0x3fd2baa0c34be1ec), |
236 | | (0xbc7c658d602e66b0, 0x3fd4106017c3eca3), |
237 | | (0x3c7e393a16b94b52, 0x3fd4f6fbb2cec598), |
238 | | (0x3c7ac9080333c605, 0x3fd6552b49986277), |
239 | | (0x3c68f89e2eb553b2, 0x3fd7b89f02cf2aad), |
240 | | (0x3c799aa6df8b7d83, 0x3fd8a8980abfbd32), |
241 | | (0x3c7bca36fd02def0, 0x3fd99b072a96c6b2), |
242 | | (0x3c5817fd3b7d7e5d, 0x3fda8ff971810a5e), |
243 | | (0xbc501d98c3531027, 0x3fdb877c57b1b070), |
244 | | (0x3c78a38b4175d665, 0x3fdcffae611ad12b), |
245 | | (0x3c438c8946414c6a, 0x3fddfdd89d586e2b), |
246 | | (0x3c76d261f1753e0b, 0x3fdefec61b011f85), |
247 | | (0xbc87398fe685f171, 0x3fe0014332be0033), |
248 | | ]; |
249 | | |
250 | | /* for |z| <= 2^-6, returns an approximation of 2^z |
251 | | with absolute error < 2^-43.540 */ |
252 | | #[inline] |
253 | 0 | fn compoundf_expf_poly(z: f64) -> f64 { |
254 | | /* Q is a degree-4 polynomial generated by Sollya (cf q1.sollya) |
255 | | with absolute error < 2^-43.549 */ |
256 | | const Q: [u64; 5] = [ |
257 | | 0x3ff0000000000000, |
258 | | 0x3fe62e42fef6d01a, |
259 | | 0x3fcebfbdff7feeba, |
260 | | 0x3fac6b167e579bee, |
261 | | 0x3f83b2b3428d06de, |
262 | | ]; |
263 | 0 | let z2 = z * z; |
264 | 0 | let c3 = dd_fmla(f64::from_bits(Q[4]), z, f64::from_bits(Q[3])); |
265 | 0 | let c0 = dd_fmla(f64::from_bits(Q[1]), z, f64::from_bits(Q[0])); |
266 | 0 | let c2 = dd_fmla(c3, z, f64::from_bits(Q[2])); |
267 | 0 | dd_fmla(c2, z2, c0) |
268 | 0 | } |
269 | | |
270 | 0 | pub(crate) fn compoundf_log2p1_fast(x: f64) -> f64 { |
271 | | /* for x > 0, 1+x is exact when 2^-29 <= x < 2^53 |
272 | | for x < 0, 1+x is exact when -1 < x <= 2^-30 */ |
273 | | |
274 | | // double u = (x >= 0x1p53) ? x : 1.0 + x; |
275 | 0 | let u = 1.0 + x; |
276 | | /* For x < 0x1p53, x + 1 is exact thus u = x+1. |
277 | | For x >= 2^53, we estimate log2(x) instead of log2(1+x), |
278 | | since log2(1+x) = log2(x) + log2(1+1/x), |
279 | | log2(x) >= 53 and |log2(1+1/x)| < 2^-52.471, the additional relative |
280 | | error is bounded by 2^-52.471/53 < 2^-58.198 */ |
281 | | |
282 | 0 | let mut v = u.to_bits(); |
283 | 0 | let m: u64 = v & 0xfffffffffffffu64; |
284 | 0 | let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64; |
285 | | // 2^e/sqrt(2) < u < 2^e*sqrt(2), with -29 <= e <= 128 |
286 | 0 | v = v.wrapping_sub((e << 52) as u64); |
287 | 0 | let t = f64::from_bits(v); |
288 | | // u = 2^e*t with 1/sqrt(2) < t < sqrt(2) |
289 | | // thus log2(u) = e + log2(t) |
290 | 0 | v = (f64::from_bits(v) + 2.0).to_bits(); // add 2 so that v.f is always in the binade [2, 4) |
291 | 0 | let i = (v >> 45) as i32 - 0x2002d; // 0 <= i <= 45 |
292 | 0 | let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]); |
293 | 0 | let z = dd_fmla(r, t, -1.0); // exact, -1/64 <= z <= 1/64 |
294 | | // we approximates log2(t) by -log2(r) + log2(r*t) |
295 | 0 | let p = log2p1_polyeval_1(z); |
296 | | // p approximates log2(r*t) with rel. error < 2^-49.642, and |p| < 2^-5.459 |
297 | 0 | e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p) |
298 | 0 | } |
299 | | |
300 | | pub(crate) static COMPOUNDF_EXP2_T: [u64; 33] = [ |
301 | | 0xbfe0000000000000, |
302 | | 0xbfde000000000000, |
303 | | 0xbfdc000000000000, |
304 | | 0xbfda000000000000, |
305 | | 0xbfd8000000000000, |
306 | | 0xbfd6000000000000, |
307 | | 0xbfd4000000000000, |
308 | | 0xbfd2000000000000, |
309 | | 0xbfd0000000000000, |
310 | | 0xbfcc000000000000, |
311 | | 0xbfc8000000000000, |
312 | | 0xbfc4000000000000, |
313 | | 0xbfc0000000000000, |
314 | | 0xbfb8000000000000, |
315 | | 0xbfb0000000000000, |
316 | | 0xbfa0000000000000, |
317 | | 0x0000000000000000, |
318 | | 0x3fa0000000000000, |
319 | | 0x3fb0000000000000, |
320 | | 0x3fb8000000000000, |
321 | | 0x3fc0000000000000, |
322 | | 0x3fc4000000000000, |
323 | | 0x3fc8000000000000, |
324 | | 0x3fcc000000000000, |
325 | | 0x3fd0000000000000, |
326 | | 0x3fd2000000000000, |
327 | | 0x3fd4000000000000, |
328 | | 0x3fd6000000000000, |
329 | | 0x3fd8000000000000, |
330 | | 0x3fda000000000000, |
331 | | 0x3fdc000000000000, |
332 | | 0x3fde000000000000, |
333 | | 0x3fe0000000000000, |
334 | | ]; |
335 | | |
336 | | /* exp2_U[i] is a double-double approximation h+l of 2^exp2_T[i] |
337 | | so that h approximates 2^exp2_T[i] with absolute error < 2^-53.092, |
338 | | and h+l approximates 2^exp2_T[i] with absolute error < 2^-107.385 */ |
339 | | pub(crate) static COMPOUNDF_EXP2_U: [(u64, u64); 33] = [ |
340 | | (0xbc8bdd3413b26456, 0x3fe6a09e667f3bcd), |
341 | | (0xbc716e4786887a99, 0x3fe71f75e8ec5f74), |
342 | | (0xbc741577ee04992f, 0x3fe7a11473eb0187), |
343 | | (0xbc8d4c1dd41532d8, 0x3fe82589994cce13), |
344 | | (0x3c86e9f156864b27, 0x3fe8ace5422aa0db), |
345 | | (0xbc575fc781b57ebc, 0x3fe93737b0cdc5e5), |
346 | | (0x3c6c7c46b071f2be, 0x3fe9c49182a3f090), |
347 | | (0xbc8d2f6edb8d41e1, 0x3fea5503b23e255d), |
348 | | (0x3c87a1cd345dcc81, 0x3feae89f995ad3ad), |
349 | | (0xbc65584f7e54ac3b, 0x3feb7f76f2fb5e47), |
350 | | (0x3c711065895048dd, 0x3fec199bdd85529c), |
351 | | (0x3c6503cbd1e949db, 0x3fecb720dcef9069), |
352 | | (0x3c72ed02d75b3707, 0x3fed5818dcfba487), |
353 | | (0xbc81a5cd4f184b5c, 0x3fedfc97337b9b5f), |
354 | | (0xbc8e9c23179c2893, 0x3feea4afa2a490da), |
355 | | (0x3c89d3e12dd8a18b, 0x3fef50765b6e4540), |
356 | | (0x0000000000000000, 0x3ff0000000000000), |
357 | | (0x3c8d73e2a475b465, 0x3ff059b0d3158574), |
358 | | (0x3c98a62e4adc610b, 0x3ff0b5586cf9890f), |
359 | | (0xbc96c51039449b3a, 0x3ff11301d0125b51), |
360 | | (0xbc819041b9d78a76, 0x3ff172b83c7d517b), |
361 | | (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa), |
362 | | (0x3c99b07eb6c70573, 0x3ff2387a6e756238), |
363 | | (0x3c8612e8afad1255, 0x3ff29e9df51fdee1), |
364 | | (0x3c86f46ad23182e4, 0x3ff306fe0a31b715), |
365 | | (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb), |
366 | | (0x3c8ada0911f09ebc, 0x3ff3dea64c123422), |
367 | | (0x3c489b7a04ef80d0, 0x3ff44e086061892d), |
368 | | (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27), |
369 | | (0xbc807abe1db13cad, 0x3ff5342b569d4f82), |
370 | | (0x3c96324c054647ad, 0x3ff5ab07dd485429), |
371 | | (0xbc9383c17e40b497, 0x3ff6247eb03a5585), |
372 | | (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd), |
373 | | ]; |
374 | | |
375 | | /* return the correct rounding of (1+x)^y, otherwise -1.0 |
376 | | where t is an approximation of y*log2(1+x) with absolute error < 2^-40.680, |
377 | | assuming 0x1.7154759a0df53p-24 <= |t| <= 150 |
378 | | exact is non-zero iff (1+x)^y is exact or midpoint */ |
379 | 0 | fn exp2_fast(t: f64) -> f64 { |
380 | 0 | let k = t.cpu_round_ties_even(); // 0 <= |k| <= 150 |
381 | 0 | let mut r = t - k; // |r| <= 1/2, exact |
382 | 0 | let mut v: u64 = (3.015625 + r).to_bits(); // 2.5 <= v <= 3.5015625 |
383 | | // we add 2^-6 so that i is rounded to nearest |
384 | 0 | let i: i32 = (v >> 46) as i32 - 0x10010; // 0 <= i <= 32 |
385 | 0 | r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact |
386 | | // now |r| <= 2^-6 |
387 | | // 2^t = 2^k * exp2_U[i][0] * 2^r |
388 | 0 | v = (f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1) * compoundf_expf_poly(r)).to_bits(); |
389 | | /* the absolute error on exp2_U[i][0] is bounded by 2^-53.092, with |
390 | | exp2_U[i][0] < 2^0.5, and that on q1(r) is bounded by 2^-43.540, |
391 | | with |q1(r)| < 1.011, thus |v| < 1.43, and the absolute error on v is |
392 | | bounded by ulp(v) + 2^0.5 * 2^-43.540 + 2^-53.092 * 1.011 < 2^-43.035. |
393 | | Now t approximates u := y*log2(1+x) with |t-u| < 2^-40.680 thus |
394 | | 2^u = 2^t * (1 + eps) with eps < 2^(2^-40.680)-1 < 2^-41.208. |
395 | | The total absolute error is thus bounded by 2^-43.035 + 2^-41.208 |
396 | | < 2^-40.849. */ |
397 | 0 | let mut err: u64 = 0x3d61d00000000000; // 2^-40.849 < 0x1.1dp-41 |
398 | 0 | v = unsafe { v.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) }; // scale v by 2^k, k is already integer |
399 | | |
400 | | // in case of potential underflow, we defer to the accurate path |
401 | 0 | if f64::from_bits(v) < f64::from_bits(0x38100000000008e2) { |
402 | 0 | return -1.0; |
403 | 0 | } |
404 | 0 | err = unsafe { err.wrapping_add((k.to_int_unchecked::<i64>() << 52) as u64) }; // scale the error by 2^k too |
405 | 0 | let lb = (f64::from_bits(v) - f64::from_bits(err)) as f32; |
406 | 0 | let rb = (f64::from_bits(v) + f64::from_bits(err)) as f32; |
407 | 0 | if lb != rb { |
408 | 0 | return -1.0; |
409 | 0 | } // rounding test failed |
410 | | |
411 | 0 | f64::from_bits(v) |
412 | 0 | } |
413 | | |
414 | | // 2^e/sqrt(2) < h < 2^e*sqrt(2), with -29 <= e <= 128 |
415 | | // divide h, l by 2^e |
416 | | pub(crate) static LOG2P1_SCALE: [u64; 158] = [ |
417 | | 0x41c0000000000000, |
418 | | 0x41b0000000000000, |
419 | | 0x41a0000000000000, |
420 | | 0x4190000000000000, |
421 | | 0x4180000000000000, |
422 | | 0x4170000000000000, |
423 | | 0x4160000000000000, |
424 | | 0x4150000000000000, |
425 | | 0x4140000000000000, |
426 | | 0x4130000000000000, |
427 | | 0x4120000000000000, |
428 | | 0x4110000000000000, |
429 | | 0x4100000000000000, |
430 | | 0x40f0000000000000, |
431 | | 0x40e0000000000000, |
432 | | 0x40d0000000000000, |
433 | | 0x40c0000000000000, |
434 | | 0x40b0000000000000, |
435 | | 0x40a0000000000000, |
436 | | 0x4090000000000000, |
437 | | 0x4080000000000000, |
438 | | 0x4070000000000000, |
439 | | 0x4060000000000000, |
440 | | 0x4050000000000000, |
441 | | 0x4040000000000000, |
442 | | 0x4030000000000000, |
443 | | 0x4020000000000000, |
444 | | 0x4010000000000000, |
445 | | 0x4000000000000000, |
446 | | 0x3ff0000000000000, |
447 | | 0x3fe0000000000000, |
448 | | 0x3fd0000000000000, |
449 | | 0x3fc0000000000000, |
450 | | 0x3fb0000000000000, |
451 | | 0x3fa0000000000000, |
452 | | 0x3f90000000000000, |
453 | | 0x3f80000000000000, |
454 | | 0x3f70000000000000, |
455 | | 0x3f60000000000000, |
456 | | 0x3f50000000000000, |
457 | | 0x3f40000000000000, |
458 | | 0x3f30000000000000, |
459 | | 0x3f20000000000000, |
460 | | 0x3f10000000000000, |
461 | | 0x3f00000000000000, |
462 | | 0x3ef0000000000000, |
463 | | 0x3ee0000000000000, |
464 | | 0x3ed0000000000000, |
465 | | 0x3ec0000000000000, |
466 | | 0x3eb0000000000000, |
467 | | 0x3ea0000000000000, |
468 | | 0x3e90000000000000, |
469 | | 0x3e80000000000000, |
470 | | 0x3e70000000000000, |
471 | | 0x3e60000000000000, |
472 | | 0x3e50000000000000, |
473 | | 0x3e40000000000000, |
474 | | 0x3e30000000000000, |
475 | | 0x3e20000000000000, |
476 | | 0x3e10000000000000, |
477 | | 0x3e00000000000000, |
478 | | 0x3df0000000000000, |
479 | | 0x3de0000000000000, |
480 | | 0x3dd0000000000000, |
481 | | 0x3dc0000000000000, |
482 | | 0x3db0000000000000, |
483 | | 0x3da0000000000000, |
484 | | 0x3d90000000000000, |
485 | | 0x3d80000000000000, |
486 | | 0x3d70000000000000, |
487 | | 0x3d60000000000000, |
488 | | 0x3d50000000000000, |
489 | | 0x3d40000000000000, |
490 | | 0x3d30000000000000, |
491 | | 0x3d20000000000000, |
492 | | 0x3d10000000000000, |
493 | | 0x3d00000000000000, |
494 | | 0x3cf0000000000000, |
495 | | 0x3ce0000000000000, |
496 | | 0x3cd0000000000000, |
497 | | 0x3cc0000000000000, |
498 | | 0x3cb0000000000000, |
499 | | 0x3ca0000000000000, |
500 | | 0x3c90000000000000, |
501 | | 0x3c80000000000000, |
502 | | 0x3c70000000000000, |
503 | | 0x3c60000000000000, |
504 | | 0x3c50000000000000, |
505 | | 0x3c40000000000000, |
506 | | 0x3c30000000000000, |
507 | | 0x3c20000000000000, |
508 | | 0x3c10000000000000, |
509 | | 0x3c00000000000000, |
510 | | 0x3bf0000000000000, |
511 | | 0x3be0000000000000, |
512 | | 0x3bd0000000000000, |
513 | | 0x3bc0000000000000, |
514 | | 0x3bb0000000000000, |
515 | | 0x3ba0000000000000, |
516 | | 0x3b90000000000000, |
517 | | 0x3b80000000000000, |
518 | | 0x3b70000000000000, |
519 | | 0x3b60000000000000, |
520 | | 0x3b50000000000000, |
521 | | 0x3b40000000000000, |
522 | | 0x3b30000000000000, |
523 | | 0x3b20000000000000, |
524 | | 0x3b10000000000000, |
525 | | 0x3b00000000000000, |
526 | | 0x3af0000000000000, |
527 | | 0x3ae0000000000000, |
528 | | 0x3ad0000000000000, |
529 | | 0x3ac0000000000000, |
530 | | 0x3ab0000000000000, |
531 | | 0x3aa0000000000000, |
532 | | 0x3a90000000000000, |
533 | | 0x3a80000000000000, |
534 | | 0x3a70000000000000, |
535 | | 0x3a60000000000000, |
536 | | 0x3a50000000000000, |
537 | | 0x3a40000000000000, |
538 | | 0x3a30000000000000, |
539 | | 0x3a20000000000000, |
540 | | 0x3a10000000000000, |
541 | | 0x3a00000000000000, |
542 | | 0x39f0000000000000, |
543 | | 0x39e0000000000000, |
544 | | 0x39d0000000000000, |
545 | | 0x39c0000000000000, |
546 | | 0x39b0000000000000, |
547 | | 0x39a0000000000000, |
548 | | 0x3990000000000000, |
549 | | 0x3980000000000000, |
550 | | 0x3970000000000000, |
551 | | 0x3960000000000000, |
552 | | 0x3950000000000000, |
553 | | 0x3940000000000000, |
554 | | 0x3930000000000000, |
555 | | 0x3920000000000000, |
556 | | 0x3910000000000000, |
557 | | 0x3900000000000000, |
558 | | 0x38f0000000000000, |
559 | | 0x38e0000000000000, |
560 | | 0x38d0000000000000, |
561 | | 0x38c0000000000000, |
562 | | 0x38b0000000000000, |
563 | | 0x38a0000000000000, |
564 | | 0x3890000000000000, |
565 | | 0x3880000000000000, |
566 | | 0x3870000000000000, |
567 | | 0x3860000000000000, |
568 | | 0x3850000000000000, |
569 | | 0x3840000000000000, |
570 | | 0x3830000000000000, |
571 | | 0x3820000000000000, |
572 | | 0x3810000000000000, |
573 | | 0x3800000000000000, |
574 | | 0x37f0000000000000, |
575 | | ]; |
576 | | |
577 | | /* put in h+l an approximation of log2(1+zh+zl) |
578 | | for |zh| <= 1/64 + 2^-51.508, |zl| < 2^-58 and |zl| < ulp(zh). |
579 | | We have |h|, |h+l| < 2^-5.459, |l| < 2^-56.162, |
580 | | the relative error is bounded by 2^-91.196, |
581 | | and |l| < 2^-50.523 |h| (see analyze_p2() in compoundf.sage). |
582 | | */ |
583 | | |
584 | | /* degree-13 polynomial generated by Sollya which approximates |
585 | | log2(1+z) for |z| <= 1/64 with relative error < 2^-93.777 |
586 | | (cf file p2.sollya) |
587 | | */ |
588 | | static LOG2P1_LOG2_POLY: [u64; 18] = [ |
589 | | 0x3ff71547652b82fe, |
590 | | 0x3c7777d0ffda0d80, |
591 | | 0xbfe71547652b82fe, |
592 | | 0xbc6777d0fd20b49c, |
593 | | 0x3fdec709dc3a03fd, |
594 | | 0x3c7d27f05171b74a, |
595 | | 0xbfd71547652b82fe, |
596 | | 0xbc57814e70b828b0, |
597 | | 0x3fd2776c50ef9bfe, |
598 | | 0x3c7e4f63e12bff83, |
599 | | 0xbfcec709dc3a03f4, |
600 | | 0x3fca61762a7adecc, |
601 | | 0xbfc71547652d8849, |
602 | | 0x3fc484b13d7e7029, |
603 | | 0xbfc2776c1b2a40fd, |
604 | | 0x3fc0c9a80f9b7c1c, |
605 | | 0xbfbecc6801121200, |
606 | | 0x3fbc6e4b91fd10e5, |
607 | | ]; |
608 | | |
609 | 0 | fn log2_poly2(z: DoubleDouble) -> DoubleDouble { |
610 | | /* since we can't expect a relative accuracy better than 2^-93.777, |
611 | | the lower part of the double-double approximation only needs to |
612 | | have about 94-53 = 41 accurate bits. Since |p7*z^7/p1| < 2^-44, |
613 | | we evaluate terms of degree 7 or more in double precision only. */ |
614 | 0 | let mut h = f64::from_bits(LOG2P1_LOG2_POLY[4 + 13]); // degree 13 |
615 | | |
616 | 0 | for i in 7..=12 { |
617 | 0 | h = dd_fmla(z.hi, z.hi, f64::from_bits(LOG2P1_LOG2_POLY[4 + i])); |
618 | 0 | } |
619 | | |
620 | 0 | let mut v = DoubleDouble::quick_mult_f64(z, h); |
621 | 0 | let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[10])); |
622 | 0 | v.hi = t.hi; |
623 | 0 | v.lo += t.lo; |
624 | | |
625 | 0 | v = DoubleDouble::quick_mult(v, z); |
626 | | |
627 | 0 | let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[8])); |
628 | 0 | v.hi = t.hi; |
629 | 0 | v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[9]); |
630 | | |
631 | 0 | v = DoubleDouble::quick_mult(v, z); |
632 | | |
633 | 0 | let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[6])); |
634 | 0 | v.hi = t.hi; |
635 | 0 | v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[7]); |
636 | | |
637 | 0 | v = DoubleDouble::quick_mult(v, z); |
638 | | |
639 | 0 | let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[4])); |
640 | 0 | v.hi = t.hi; |
641 | 0 | v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[5]); |
642 | | |
643 | 0 | v = DoubleDouble::quick_mult(v, z); |
644 | | |
645 | 0 | let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[2])); |
646 | 0 | v.hi = t.hi; |
647 | 0 | v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[3]); |
648 | | |
649 | 0 | v = DoubleDouble::quick_mult(v, z); |
650 | | |
651 | 0 | let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[0])); |
652 | 0 | v.hi = t.hi; |
653 | 0 | v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[1]); |
654 | | |
655 | 0 | v = DoubleDouble::quick_mult(v, z); |
656 | | |
657 | 0 | v |
658 | 0 | } |
659 | | |
660 | | /* assuming -1 < x < 2^128, and x is representable in binary32, |
661 | | put in h+l a double-double approximation of log2(1+x), |
662 | | with relative error bounded by 2^-91.123, and |l| < 2^-48.574 |h| |
663 | | (see analyze_log2p1_accurate() in compoundf.sage) */ |
664 | 0 | pub(crate) fn compoundf_log2p1_accurate(x: f64) -> DoubleDouble { |
665 | 0 | let mut v_dd = if 1.0 >= x { |
666 | | // then 1.0 >= |x| since x > -1 |
667 | 0 | if (x as f32).abs() >= f32::from_bits(0x25000000) { |
668 | 0 | DoubleDouble::from_exact_add(1.0, x) |
669 | | } else { |
670 | 0 | DoubleDouble::new(x, 1.0) |
671 | | } |
672 | | } else { |
673 | | // fast_two_sum() is exact when |x| < 2^54 by Lemma 1 condition (ii) of [1] |
674 | 0 | DoubleDouble::from_exact_add(x, 1.0) |
675 | | }; |
676 | | |
677 | | // now h + l = 1 + x + eps with |eps| <= 2^-105 |h| and |l| <= ulp(h) |
678 | 0 | let mut v = v_dd.hi.to_bits(); |
679 | 0 | let m = v & 0xfffffffffffffu64; |
680 | 0 | let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64; |
681 | | |
682 | 0 | let scale = f64::from_bits(LOG2P1_SCALE[e.wrapping_add(29) as usize]); |
683 | 0 | v_dd.hi *= scale; |
684 | 0 | v_dd.lo *= scale; |
685 | | |
686 | | // now |h| < sqrt(2) and |l| <= ulp(h) <= 2^-52 |
687 | | |
688 | | // now 1 + x ~ 2^e * (h + l) thus log2(1+x) ~ e + log2(h+l) |
689 | | |
690 | 0 | v = (2.0 + v_dd.hi).to_bits(); // add 2 so that v.f is always in the binade [2, 4) |
691 | 0 | let i: i32 = (v >> 45) as i32 - 0x2002d; // h is near 1/2+(i+13)/64 |
692 | 0 | let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]); |
693 | 0 | let mut z_dd = DoubleDouble::new(r * v_dd.lo, dd_fmla(r, v_dd.hi, -1.0)); // exact, -1/64 <= zh <= 1/64 |
694 | | // since |r| <= 0x1.68p+0 and |l| <= 2^-52, |zl| <= 2^-51.508 |
695 | | // zh + zl = r*(h+l)-1 |
696 | | // log2(h+l) = -log2(r) + log2(r*(h+l)) = -log2(r) + log2(1+zh+zl) |
697 | 0 | z_dd = DoubleDouble::from_exact_add(z_dd.hi, z_dd.lo); |
698 | | // now |zh| <= 1/64 + 2^-51.508 and |zl| < 2^-58 |
699 | | /* the relative error of fast_two_sum() is bounded by 2^-105, |
700 | | this amplified the relative error on p2() as follows: |
701 | | (1+2^-91.196)*(1+2^-105)-1 < 2^-91.195. */ |
702 | | |
703 | | // now |zh| <= 1/64 + 2^-51.508 and |zl| < 2^-58 |
704 | | /* the relative error of fast_two_sum() is bounded by 2^-105, |
705 | | this amplified the relative error on p2() as follows: |
706 | | (1+2^-91.196)*(1+2^-105)-1 < 2^-91.195. */ |
707 | 0 | let log_p = log2_poly2(z_dd); |
708 | | // ph + pl approximates log2(1+zh+zl) with relative error < 2^-93.471 |
709 | | |
710 | | /* since |log2inv[i][0]| < 1 and e is integer, the precondition of |
711 | | fast_two_sum is fulfilled: either |e| >= 1, or e=0 and fast_two_sum |
712 | | is exact */ |
713 | 0 | let log2_inv = LOG2P1_COMPOUNDF_LOG2_INV[i as usize]; |
714 | 0 | v_dd = DoubleDouble::from_exact_add(e as f64, f64::from_bits(log2_inv.1)); |
715 | 0 | v_dd.lo += f64::from_bits(log2_inv.0); |
716 | 0 | let mut p = DoubleDouble::from_exact_add(v_dd.hi, log_p.hi); |
717 | 0 | p.lo += v_dd.lo + log_p.lo; |
718 | 0 | p |
719 | 0 | } |
720 | | |
721 | 0 | pub(crate) fn compoundf_exp2_poly2(z: DoubleDouble) -> DoubleDouble { |
722 | | /* Q2 is a degree-8 polynomial generated by Sollya (cf q2.sollya) |
723 | | with absolute error < 2^-85.218 */ |
724 | | |
725 | | static Q2: [u64; 12] = [ |
726 | | 0x3ff0000000000000, |
727 | | 0x3fe62e42fefa39ef, |
728 | | 0x3c7abc9d45534d06, |
729 | | 0x3fcebfbdff82c58f, |
730 | | 0xbc65e4383cf9ddf7, |
731 | | 0x3fac6b08d704a0c0, |
732 | | 0xbc46cbc55586c8f1, |
733 | | 0x3f83b2ab6fba4e77, |
734 | | 0x3f55d87fe789aec5, |
735 | | 0x3f2430912f879daa, |
736 | | 0x3eeffcc774b2367a, |
737 | | 0x3eb62c017b9bdfe6, |
738 | | ]; |
739 | 0 | let h2 = z.hi * z.hi; |
740 | 0 | let c7 = dd_fmla(f64::from_bits(Q2[11]), z.hi, f64::from_bits(Q2[10])); |
741 | 0 | let mut c5 = dd_fmla(f64::from_bits(Q2[9]), z.hi, f64::from_bits(Q2[8])); |
742 | 0 | c5 = dd_fmla(c7, h2, c5); |
743 | | // since ulp(c5*h^5) <= 2^-86, we still compute c5*z as double |
744 | 0 | let z_vqh = c5 * z.hi; |
745 | 0 | let mut q = DoubleDouble::from_exact_add(f64::from_bits(Q2[7]), z_vqh); |
746 | | // multiply by z |
747 | 0 | q = DoubleDouble::quick_mult(q, z); |
748 | | // add coefficient of degree 3 |
749 | 0 | let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[5]), q.hi); |
750 | 0 | q.hi = t.hi; |
751 | 0 | q.lo += t.lo + f64::from_bits(Q2[6]); |
752 | | // multiply by z and add coefficient of degree 2 |
753 | 0 | q = DoubleDouble::quick_mult(q, z); |
754 | 0 | let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[3]), q.hi); |
755 | 0 | q.hi = t.hi; |
756 | 0 | q.lo += t.lo + f64::from_bits(Q2[4]); |
757 | | // multiply by h+l and add coefficient of degree 1 |
758 | 0 | q = DoubleDouble::quick_mult(q, z); |
759 | 0 | let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[1]), q.hi); |
760 | 0 | q.hi = t.hi; |
761 | 0 | q.lo += t.lo + f64::from_bits(Q2[2]); |
762 | | // multiply by h+l and add coefficient of degree 0 |
763 | 0 | q = DoubleDouble::quick_mult(q, z); |
764 | 0 | let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[0]), q.hi); |
765 | 0 | q.hi = t.hi; |
766 | 0 | q.lo += t.lo; |
767 | 0 | q |
768 | 0 | } |
769 | | |
770 | | /* return the correct rounding of (1+x)^y or -1 if the rounding test failed, |
771 | | where t is an approximation of y*log2(1+x). |
772 | | We assume |h+l| < 150, |l/h| < 2^-48.445 |h|, |
773 | | and the relative error between h+l and y*log2(1+x) is < 2^-91.120. |
774 | | x and y are the original inputs of compound. */ |
775 | 0 | fn compoundf_exp2_accurate(x_dd: DoubleDouble, x: f32, y: f32) -> f32 { |
776 | 0 | if y == 1.0 { |
777 | 0 | let res = 1.0 + x; |
778 | 0 | return res; |
779 | 0 | } |
780 | 0 | let k = x_dd.hi.cpu_round_ties_even(); // |k| <= 150 |
781 | | |
782 | | // check easy cases h+l is tiny thus 2^(h+l) rounds to 1, 1- or 1+ |
783 | 0 | if k == 0. && x_dd.hi.abs() <= f64::from_bits(0x3e6715476af0d4c8) { |
784 | | /* the relative error between h and y*log2(1+x) is bounded by |
785 | | (1 + 2^-48.445) * (1 + 2^-91.120) - 1 < 2^-48.444. |
786 | | 2^h rounds to 1 to nearest for |h| <= H0 := 0x1.715476af0d4d9p-25. |
787 | | The above threshold is such that h*(1+2^-48.444) < H0. */ |
788 | 0 | return (1.0 + x_dd.hi * 0.5) as f32; |
789 | 0 | } |
790 | | |
791 | 0 | let r = x_dd.hi - k; // |r| <= 1/2, exact |
792 | | // since r is an integer multiple of ulp(h), fast_two_sum() below is exact |
793 | 0 | let mut v_dd = DoubleDouble::from_exact_add(r, x_dd.lo); |
794 | 0 | let mut v = (3.015625 + v_dd.hi).to_bits(); // 2.5 <= v <= 3.5015625 |
795 | | // we add 2^-6 so that i is rounded to nearest |
796 | 0 | let i: i32 = ((v >> 46) as i32).wrapping_sub(0x10010); // 0 <= i <= 32 |
797 | | // h is near (i-16)/2^5 |
798 | 0 | v_dd.hi -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact |
799 | | |
800 | | // now |h| <= 2^-6 |
801 | | // 2^(h+l) = 2^k * exp2_U[i] * 2^(h+l) |
802 | 0 | v_dd = DoubleDouble::from_exact_add(v_dd.hi, v_dd.lo); |
803 | 0 | let q = compoundf_exp2_poly2(v_dd); |
804 | | |
805 | | /* we have 0.989 < qh < 1.011, |ql| < 2^-51.959, and |
806 | | |qh + ql - 2^(h+l)| < 2^-85.210 */ |
807 | 0 | let exp2u = DoubleDouble::from_bit_pair(COMPOUNDF_EXP2_U[i as usize]); |
808 | 0 | let mut q = DoubleDouble::quick_mult(exp2u, q); |
809 | 0 | q = DoubleDouble::from_exact_add(q.hi, q.lo); |
810 | | /* Total error: |
811 | | * at input we have a relative error between h+l and y*log2(1+x) bounded |
812 | | by 2^-91.120: h + l = y*log2(1+x) * (1 + eps1) with |eps1| < 2^-91.120. |
813 | | Since |h+l| <= 150, this yields an absolute error bounded |
814 | | by 150*2^-91.120 < 2^-83.891: |
815 | | h + l = y*log2(1+x) + eps2 with |eps2| <= 150*2^-91.120 < 2^-83.891. |
816 | | * the absolute error in q2() is bounded by 2^-85.210 |
817 | | and is multiplied by exp2_U[i] < 1.415 |
818 | | * the absolute d_mul() error is bounded by 2^-102.199 |
819 | | * the fast_two_sum() error is bounded by 2^-105 |
820 | | All this yields an absolute error on qh+ql bounded by: |
821 | | 2^-83.891 + 2^-85.210*1.415 + 2^-102.199 + 2^-105 < 2^-83.242. |
822 | | |
823 | | We distinguish the "small" case when at input |h+l| <= 2^-9. |
824 | | In this case k=0, i=16, thus exp2_T[i]=0, exp2_U[i]=1, |
825 | | and absolute error in q2() is bounded by 2^-102.646, |
826 | | and remains unchanged since the d_mul() call does not change qh, ql. |
827 | | */ |
828 | | |
829 | | /* Rounding test: since |ql| < ulp(qh), and the error is less than ulp(qh), |
830 | | the rounding test can fail only when the last 53-25 = 28 bits of qh |
831 | | represent a signed number in [-1,1] (when it is -2 or 2, adding ql and |
832 | | the error cannot cross a rounding boundary). */ |
833 | 0 | let mut w = q.hi.to_bits(); |
834 | 0 | if ((w.wrapping_add(1)) & 0xfffffffu64) <= 2 { |
835 | | static ERR: [u64; 2] = [0x3abb100000000000, 0x3a2d800000000000]; |
836 | 0 | let small: bool = k == 0. && i == 16 && x_dd.hi <= f64::from_bits(0x3f60000000000000); |
837 | 0 | let err = f64::from_bits(ERR[small as usize]); |
838 | | |
839 | 0 | w = (q.hi + (q.lo + err)).to_bits(); |
840 | 0 | w = unsafe { w.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) }; |
841 | 0 | } |
842 | | |
843 | | /* multiply qh+ql by 2^k: since 0.989 < qh_in < 1.011 and |
844 | | 0.707 < exp2_U[i] < 1.415, we have 0.69 < qh+ql < 1.44 */ |
845 | 0 | v = (q.hi + q.lo).to_bits(); |
846 | | /* For RNDN, if qh fits exactly in 25 bits, and ql is tiny, so that |
847 | | qh + ql rounds to qh, then we might have a double-rounding issue. */ |
848 | 0 | if (w.wrapping_shl(36)) == 0 && f64::from_bits(v) == q.hi && q.lo != 0. { |
849 | 0 | v = v.wrapping_add((if q.lo > 0. { 1i64 } else { -1i64 }) as u64); // simulate round to odd |
850 | 0 | } |
851 | 0 | v = unsafe { v.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) }; |
852 | | // there is no underflow/overflow in the scaling by 2^k since |k| <= 150 |
853 | 0 | f64::from_bits(v) as f32 |
854 | 0 | } |
855 | | |
856 | | // at input, exact is non-zero iff (1+x)^y is exact |
857 | | // x,y=0x1.0f6f1ap+1,0x1.c643bp+5: 49 identical bits after round bit |
858 | | // x,y=0x1.ef272cp+15,-0x1.746ab2p+1: 55 identical bits after round bit |
859 | | // x,y=0x1.07ffcp+0,-0x1.921a8ap+4: 47 identical bits after round bit |
860 | | #[cold] |
861 | | #[inline(never)] |
862 | 0 | fn compoundf_accurate(x: f32, y: f32) -> f32 { |
863 | 0 | let mut v = compoundf_log2p1_accurate(x as f64); |
864 | | /* h + l is a double-double approximation of log(1+x), |
865 | | with relative error bounded by 2^-91.123, |
866 | | and |l| < 2^-48.574 |h| */ |
867 | 0 | v = DoubleDouble::quick_mult_f64(v, y as f64); |
868 | | /* h + l is a double-double approximation of y*log(1+x). |
869 | | Since 2^-149 <= |h_in+l_in| < 128 and 2^-149 <= |y| < 2^128, we have |
870 | | 2^-298 <= |h+l| < 2^135, thus no underflow/overflow in double is possible. |
871 | | The s_mul() error is bounded by ulp(l). Since |l_in| < 2^-48.574 |h_in|, |
872 | | and the intermediate variable lo in s_mul() satisfies |lo| < ulp(h), |
873 | | we have |l| < ulp(h) + |y l_in| <= ulp(h) + 2^-48.574 |y h_in| |
874 | | < (2^-52 + 2^-48.574) |h| < 2^-48.445 |h|. The s_mul() error is thus |
875 | | bounded by 2^-48.445*2^-52 = 2^-100.445 |h|. This yields a total relative |
876 | | error bounded by (1+2^-91.123)*(1+2^-100.445)-1 < 2^-91.120. */ |
877 | 0 | compoundf_exp2_accurate(v, x, y) |
878 | 0 | } |
879 | | |
880 | | /// Computes compound function (1.0 + x)^y |
881 | | /// |
882 | | /// Max ULP 0.5 |
883 | | #[inline] |
884 | 0 | pub fn f_compoundf(x: f32, y: f32) -> f32 { |
885 | | /* Rules from IEEE 754-2019 for compound (x, n) with n integer: |
886 | | (a) compound (x, 0) is 1 for x >= -1 or quiet NaN |
887 | | (b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0 |
888 | | (c) compound (-1, n) is +0 for n > 0 |
889 | | (d) compound (+/-0, n) is 1 |
890 | | (e) compound (+Inf, n) is +Inf for n > 0 |
891 | | (f) compound (+Inf, n) is +0 for n < 0 |
892 | | (g) compound (x, n) is qNaN and signals the invalid exception for x < -1 |
893 | | (h) compound (qNaN, n) is qNaN for n <> 0. |
894 | | */ |
895 | 0 | let mone = (-1.0f32).to_bits(); |
896 | 0 | let nx = x.to_bits(); |
897 | 0 | let ny = y.to_bits(); |
898 | 0 | if nx >= mone { |
899 | 0 | return as_compoundf_special(x, y); |
900 | 0 | } // x <= -1 |
901 | | // now x > -1 |
902 | | |
903 | 0 | let ax: u32 = nx.wrapping_shl(1); |
904 | 0 | let ay: u32 = ny.wrapping_shl(1); |
905 | 0 | if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 { |
906 | 0 | return as_compoundf_special(x, y); |
907 | 0 | } // x=+-0 || x=+-inf/nan || y=+-0 || y=+-inf/nan |
908 | | |
909 | | // evaluate (1+x)^y explicitly for integer y in [-16,16] range and |x|<2^64 |
910 | 0 | if is_integerf(y) && ay <= 0x83000000u32 && ax <= 0xbefffffeu32 { |
911 | 0 | if ax <= 0x62000000u32 { |
912 | 0 | return 1.0 + y * x; |
913 | 0 | } // does it work for |x|<2^-29 and |y|<=16? |
914 | 0 | let mut s = x as f64 + 1.; |
915 | 0 | let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() }; |
916 | | |
917 | | // exponentiation by squaring: O(log(y)) complexity |
918 | 0 | let mut acc = if iter_count % 2 != 0 { s } else { 1. }; |
919 | | |
920 | 0 | while { |
921 | 0 | iter_count >>= 1; |
922 | 0 | iter_count |
923 | 0 | } != 0 |
924 | | { |
925 | 0 | s = s * s; |
926 | 0 | if iter_count % 2 != 0 { |
927 | 0 | acc *= s; |
928 | 0 | } |
929 | | } |
930 | | |
931 | 0 | let dz = if y.is_sign_negative() { 1. / acc } else { acc }; |
932 | 0 | return dz as f32; |
933 | 0 | } |
934 | | |
935 | 0 | let xd = x as f64; |
936 | 0 | let yd = y as f64; |
937 | 0 | let tx = xd.to_bits(); |
938 | 0 | let ty = yd.to_bits(); |
939 | | |
940 | 0 | let l: f64 = compoundf_log2p1_fast(f64::from_bits(tx)); |
941 | | |
942 | | /* l approximates log2(1+x) with relative error < 2^-47.997, |
943 | | and 2^-149 <= |l| < 128 */ |
944 | | |
945 | 0 | let t: u64 = (l * f64::from_bits(ty)).to_bits(); |
946 | | /* since 2^-149 <= |l| < 128 and 2^-149 <= |y| < 2^128, we have |
947 | | 2^-298 <= |t| < 2^135, thus no underflow/overflow in double is possible. |
948 | | The relative error is bounded by (1+2^-47.997)*(1+2^-52)-1 < 2^-47.909 */ |
949 | | |
950 | | // detect overflow/underflow |
951 | 0 | if (t.wrapping_shl(1)) >= (0x406u64 << 53) { |
952 | | // |t| >= 128 |
953 | 0 | if t >= 0x3018bu64 << 46 { |
954 | | // t <= -150 |
955 | 0 | return black_box(f32::from_bits(0x00800000)) * black_box(f32::from_bits(0x00800000)); |
956 | 0 | } else if (t >> 63) == 0 { |
957 | | // t >= 128: overflow |
958 | 0 | return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000)); |
959 | 0 | } |
960 | 0 | } |
961 | | |
962 | | /* since |t| < 150, the absolute error on t is bounded by |
963 | | 150*2^-47.909 < 2^-40.680 */ |
964 | | |
965 | | // 2^t rounds to 1 to nearest when |t| <= 0x1.715476ba97f14p-25 |
966 | 0 | if (t.wrapping_shl(1)) <= 0x3e6715476ba97f14u64 { |
967 | 0 | return if (t >> 63) != 0 { |
968 | 0 | black_box(1.0) - black_box(f32::from_bits(0x33000000)) |
969 | | } else { |
970 | 0 | black_box(1.0) + black_box(f32::from_bits(0x33000000)) |
971 | | }; |
972 | 0 | } |
973 | | |
974 | 0 | let res = exp2_fast(f64::from_bits(t)); |
975 | 0 | if res != -1.0 { |
976 | 0 | return res as f32; |
977 | 0 | } |
978 | 0 | compoundf_accurate(x, y) |
979 | 0 | } |
980 | | |
981 | | #[cfg(test)] |
982 | | mod tests { |
983 | | use super::*; |
984 | | |
985 | | #[test] |
986 | | fn test_compoundf() { |
987 | | assert_eq!( |
988 | | f_compoundf( |
989 | | 0.000000000000000000000000000000000000011754944, |
990 | | -170502050000000000000000000000000000000. |
991 | | ), |
992 | | 1. |
993 | | ); |
994 | | assert_eq!(f_compoundf(1.235, 1.432), 3.1634824); |
995 | | assert_eq!(f_compoundf(2., 3.0), 27.); |
996 | | assert!(f_compoundf(-2., 5.0).is_nan()); |
997 | | assert_eq!(f_compoundf(1., f32::INFINITY), f32::INFINITY); |
998 | | assert_eq!(f_compoundf(1., f32::NEG_INFINITY), 0.0); |
999 | | } |
1000 | | } |