/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/powf.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::bits::biased_exponent_f64; |
30 | | use crate::common::*; |
31 | | use crate::double_double::DoubleDouble; |
32 | | use crate::exponents::expf; |
33 | | use crate::logf; |
34 | | use crate::logs::LOG2_R; |
35 | | use crate::polyeval::{f_polyeval3, f_polyeval6, f_polyeval10}; |
36 | | use crate::pow_tables::EXP2_MID1; |
37 | | use crate::powf_tables::{LOG2_R_TD, LOG2_R2_DD, POWF_R2}; |
38 | | use crate::rounding::CpuRound; |
39 | | |
40 | | /// Power function for given value for const context. |
41 | | /// This is simplified version just to make a good approximation on const context. |
42 | 0 | pub const fn powf(d: f32, n: f32) -> f32 { |
43 | 0 | let value = d.abs(); |
44 | 0 | let c = expf(n * logf(value)); |
45 | 0 | if n == 1. { |
46 | 0 | return d; |
47 | 0 | } |
48 | 0 | if d < 0.0 { |
49 | 0 | let y = n as i32; |
50 | 0 | if y % 2 == 0 { c } else { -c } |
51 | | } else { |
52 | 0 | c |
53 | | } |
54 | 0 | } |
55 | | |
56 | | #[inline] |
57 | 0 | const fn larger_exponent(a: f64, b: f64) -> bool { |
58 | 0 | biased_exponent_f64(a) >= biased_exponent_f64(b) |
59 | 0 | } |
60 | | |
61 | | // Calculate 2^(y * log2(x)) in double-double precision. |
62 | | // At this point we can reuse the following values: |
63 | | // idx_x: index for extra precision of log2 for the middle part of log2(x). |
64 | | // dx: the reduced argument for log2(x) |
65 | | // y6: 2^6 * y. |
66 | | // lo6_hi: the high part of 2^6 * (y - (hi + mid)) |
67 | | // exp2_hi_mid: high part of 2^(hi + mid) |
68 | | #[cold] |
69 | | #[inline(never)] |
70 | 0 | fn powf_dd(idx_x: i32, dx: f64, y6: f64, lo6_hi: f64, exp2_hi_mid: DoubleDouble) -> f64 { |
71 | | // Perform a second range reduction step: |
72 | | // idx2 = round(2^14 * (dx + 2^-8)) = round ( dx * 2^14 + 2^6) |
73 | | // dx2 = (1 + dx) * r2 - 1 |
74 | | // Output range: |
75 | | // -0x1.3ffcp-15 <= dx2 <= 0x1.3e3dp-15 |
76 | 0 | let idx2 = f_fmla( |
77 | 0 | dx, |
78 | 0 | f64::from_bits(0x40d0000000000000), |
79 | 0 | f64::from_bits(0x4050000000000000), |
80 | 0 | ) |
81 | 0 | .cpu_round() as usize; |
82 | 0 | let dx2 = f_fmla(1.0 + dx, f64::from_bits(POWF_R2[idx2]), -1.0); // Exact |
83 | | |
84 | | const COEFFS: [(u64, u64); 6] = [ |
85 | | (0x3c7777d0ffda25e0, 0x3ff71547652b82fe), |
86 | | (0xbc6777d101cf0a84, 0xbfe71547652b82fe), |
87 | | (0x3c7ce04b5140d867, 0x3fdec709dc3a03fd), |
88 | | (0x3c7137b47e635be5, 0xbfd71547652b82fb), |
89 | | (0xbc5b5a30b3bdb318, 0x3fd2776c516a92a2), |
90 | | (0x3c62d2fbd081e657, 0xbfcec70af1929ca6), |
91 | | ]; |
92 | 0 | let dx_dd = DoubleDouble::new(0.0, dx2); |
93 | 0 | let p = f_polyeval6( |
94 | 0 | dx_dd, |
95 | 0 | DoubleDouble::from_bit_pair(COEFFS[0]), |
96 | 0 | DoubleDouble::from_bit_pair(COEFFS[1]), |
97 | 0 | DoubleDouble::from_bit_pair(COEFFS[2]), |
98 | 0 | DoubleDouble::from_bit_pair(COEFFS[3]), |
99 | 0 | DoubleDouble::from_bit_pair(COEFFS[4]), |
100 | 0 | DoubleDouble::from_bit_pair(COEFFS[5]), |
101 | | ); |
102 | | // log2(1 + dx2) ~ dx2 * P(dx2) |
103 | 0 | let log2_x_lo = DoubleDouble::quick_mult_f64(p, dx2); |
104 | | // Lower parts of (e_x - log2(r1)) of the first range reduction constant |
105 | 0 | let log2_r_td = LOG2_R_TD[idx_x as usize]; |
106 | 0 | let log2_x_mid = DoubleDouble::new(f64::from_bits(log2_r_td.0), f64::from_bits(log2_r_td.1)); |
107 | | // -log2(r2) + lower part of (e_x - log2(r1)) |
108 | 0 | let log2_x_m = DoubleDouble::add(DoubleDouble::from_bit_pair(LOG2_R2_DD[idx2]), log2_x_mid); |
109 | | // log2(1 + dx2) - log2(r2) + lower part of (e_x - log2(r1)) |
110 | | // Since we don't know which one has larger exponent to apply Fast2Sum |
111 | | // algorithm, we need to check them before calling double-double addition. |
112 | 0 | let log2_x = if larger_exponent(log2_x_m.hi, log2_x_lo.hi) { |
113 | 0 | DoubleDouble::add(log2_x_m, log2_x_lo) |
114 | | } else { |
115 | 0 | DoubleDouble::add(log2_x_lo, log2_x_m) |
116 | | }; |
117 | 0 | let lo6_hi_dd = DoubleDouble::new(0.0, lo6_hi); |
118 | | // 2^6 * y * (log2(1 + dx2) - log2(r2) + lower part of (e_x - log2(r1))) |
119 | 0 | let prod = DoubleDouble::quick_mult_f64(log2_x, y6); |
120 | | // 2^6 * (y * log2(x) - (hi + mid)) = 2^6 * lo |
121 | 0 | let lo6 = if larger_exponent(prod.hi, lo6_hi) { |
122 | 0 | DoubleDouble::add(prod, lo6_hi_dd) |
123 | | } else { |
124 | 0 | DoubleDouble::add(lo6_hi_dd, prod) |
125 | | }; |
126 | | |
127 | | const EXP2_COEFFS: [(u64, u64); 10] = [ |
128 | | (0x0000000000000000, 0x3ff0000000000000), |
129 | | (0x3c1abc9e3b398024, 0x3f862e42fefa39ef), |
130 | | (0xbba5e43a5429bddb, 0x3f0ebfbdff82c58f), |
131 | | (0xbb2d33162491268f, 0x3e8c6b08d704a0c0), |
132 | | (0x3a94fb32d240a14e, 0x3e03b2ab6fba4e77), |
133 | | (0x39ee84e916be83e0, 0x3d75d87fe78a6731), |
134 | | (0xb989a447bfddc5e6, 0x3ce430912f86bfb8), |
135 | | (0xb8e31a55719de47f, 0x3c4ffcbfc588ded9), |
136 | | (0xb850ba57164eb36b, 0x3bb62c034beb8339), |
137 | | (0xb7b8483eabd9642d, 0x3b1b5251ff97bee1), |
138 | | ]; |
139 | | |
140 | 0 | let pp = f_polyeval10( |
141 | 0 | lo6, |
142 | 0 | DoubleDouble::from_bit_pair(EXP2_COEFFS[0]), |
143 | 0 | DoubleDouble::from_bit_pair(EXP2_COEFFS[1]), |
144 | 0 | DoubleDouble::from_bit_pair(EXP2_COEFFS[2]), |
145 | 0 | DoubleDouble::from_bit_pair(EXP2_COEFFS[3]), |
146 | 0 | DoubleDouble::from_bit_pair(EXP2_COEFFS[4]), |
147 | 0 | DoubleDouble::from_bit_pair(EXP2_COEFFS[5]), |
148 | 0 | DoubleDouble::from_bit_pair(EXP2_COEFFS[6]), |
149 | 0 | DoubleDouble::from_bit_pair(EXP2_COEFFS[7]), |
150 | 0 | DoubleDouble::from_bit_pair(EXP2_COEFFS[8]), |
151 | 0 | DoubleDouble::from_bit_pair(EXP2_COEFFS[9]), |
152 | | ); |
153 | 0 | let rr = DoubleDouble::quick_mult(exp2_hi_mid, pp); |
154 | | |
155 | | // Make sure the sum is normalized: |
156 | 0 | let r = DoubleDouble::from_exact_add(rr.hi, rr.lo); |
157 | | |
158 | | const FRACTION_MASK: u64 = (1u64 << 52) - 1; |
159 | | |
160 | 0 | let mut r_bits = r.hi.to_bits(); |
161 | 0 | if ((r_bits & 0xfff_ffff) == 0) && (r.lo != 0.0) { |
162 | 0 | let hi_sign = r.hi.to_bits() >> 63; |
163 | 0 | let lo_sign = r.lo.to_bits() >> 63; |
164 | 0 | if hi_sign == lo_sign { |
165 | 0 | r_bits = r_bits.wrapping_add(1); |
166 | 0 | } else if (r_bits & FRACTION_MASK) > 0 { |
167 | 0 | r_bits = r_bits.wrapping_sub(1); |
168 | 0 | } |
169 | 0 | } |
170 | | |
171 | 0 | f64::from_bits(r_bits) |
172 | 0 | } |
173 | | |
174 | | /// Power function |
175 | | /// |
176 | | /// Max found ULP 0.5 |
177 | 0 | pub fn f_powf(x: f32, y: f32) -> f32 { |
178 | 0 | let mut x_u = x.to_bits(); |
179 | 0 | let x_abs = x_u & 0x7fff_ffff; |
180 | 0 | let mut y = y; |
181 | 0 | let y_u = y.to_bits(); |
182 | 0 | let y_abs = y_u & 0x7fff_ffff; |
183 | 0 | let mut x = x; |
184 | | |
185 | 0 | if ((y_abs & 0x0007_ffff) == 0) || (y_abs > 0x4f170000) { |
186 | | // y is signaling NaN |
187 | 0 | if x.is_nan() || y.is_nan() { |
188 | 0 | if y.abs() == 0. { |
189 | 0 | return 1.; |
190 | 0 | } |
191 | 0 | if x == 1. { |
192 | 0 | return 1.; |
193 | 0 | } |
194 | 0 | return f32::NAN; |
195 | 0 | } |
196 | | |
197 | | // Exceptional exponents. |
198 | 0 | if y == 0.0 { |
199 | 0 | return 1.0; |
200 | 0 | } |
201 | | |
202 | 0 | match y_abs { |
203 | | 0x7f80_0000 => { |
204 | 0 | if x_abs > 0x7f80_0000 { |
205 | | // pow(NaN, +-Inf) = NaN |
206 | 0 | return x; |
207 | 0 | } |
208 | 0 | if x_abs == 0x3f80_0000 { |
209 | | // pow(+-1, +-Inf) = 1.0f |
210 | 0 | return 1.0; |
211 | 0 | } |
212 | 0 | if x == 0.0 && y_u == 0xff80_0000 { |
213 | | // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO |
214 | 0 | return f32::INFINITY; |
215 | 0 | } |
216 | | // pow (|x| < 1, -inf) = +inf |
217 | | // pow (|x| < 1, +inf) = 0.0f |
218 | | // pow (|x| > 1, -inf) = 0.0f |
219 | | // pow (|x| > 1, +inf) = +inf |
220 | 0 | return if (x_abs < 0x3f80_0000) == (y_u == 0xff80_0000) { |
221 | 0 | f32::INFINITY |
222 | | } else { |
223 | 0 | 0. |
224 | | }; |
225 | | } |
226 | | _ => { |
227 | 0 | match y_u { |
228 | | 0x3f00_0000 => { |
229 | | // pow(x, 1/2) = sqrt(x) |
230 | 0 | if x == 0.0 || x_u == 0xff80_0000 { |
231 | | // pow(-0, 1/2) = +0 |
232 | | // pow(-inf, 1/2) = +inf |
233 | | // Make sure it is correct for FTZ/DAZ. |
234 | 0 | return x * x; |
235 | 0 | } |
236 | 0 | let r = x.sqrt(); |
237 | 0 | return if r.to_bits() != 0x8000_0000 { r } else { 0.0 }; |
238 | | } |
239 | | 0x3f80_0000 => { |
240 | 0 | return x; |
241 | | } // y = 1.0f |
242 | 0 | 0x4000_0000 => return x * x, // y = 2.0f |
243 | | _ => { |
244 | 0 | let is_int = is_integerf(y); |
245 | 0 | if is_int && (y_u > 0x4000_0000) && (y_u <= 0x41c0_0000) { |
246 | | // Check for exact cases when 2 < y < 25 and y is an integer. |
247 | 0 | let mut msb: i32 = if x_abs == 0 { |
248 | 0 | 32 - 2 |
249 | | } else { |
250 | 0 | x_abs.leading_zeros() as i32 |
251 | | }; |
252 | 0 | msb = if msb > 8 { msb } else { 8 }; |
253 | 0 | let mut lsb: i32 = if x_abs == 0 { |
254 | 0 | 0 |
255 | | } else { |
256 | 0 | x_abs.trailing_zeros() as i32 |
257 | | }; |
258 | 0 | lsb = if lsb > 23 { 23 } else { lsb }; |
259 | 0 | let extra_bits: i32 = 32 - 2 - lsb - msb; |
260 | 0 | let iter = y as i32; |
261 | | |
262 | 0 | if extra_bits * iter <= 23 + 2 { |
263 | | // The result is either exact or exactly half-way. |
264 | | // But it is exactly representable in double precision. |
265 | 0 | let x_d = x as f64; |
266 | 0 | let mut result = x_d; |
267 | 0 | for _ in 1..iter { |
268 | 0 | result *= x_d; |
269 | 0 | } |
270 | 0 | return result as f32; |
271 | 0 | } |
272 | 0 | } |
273 | | |
274 | 0 | if y_abs > 0x4f17_0000 { |
275 | | // if y is NaN |
276 | 0 | if y_abs > 0x7f80_0000 { |
277 | 0 | if x_u == 0x3f80_0000 { |
278 | | // x = 1.0f |
279 | | // pow(1, NaN) = 1 |
280 | 0 | return 1.0; |
281 | 0 | } |
282 | | // pow(x, NaN) = NaN |
283 | 0 | return y; |
284 | 0 | } |
285 | | // x^y will be overflow / underflow in single precision. Set y to a |
286 | | // large enough exponent but not too large, so that the computations |
287 | | // won't be overflow in double precision. |
288 | 0 | y = f32::from_bits((y_u & 0x8000_0000).wrapping_add(0x4f800000u32)); |
289 | 0 | } |
290 | | } |
291 | | } |
292 | | } |
293 | | } |
294 | 0 | } |
295 | | |
296 | | const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32; |
297 | 0 | let mut ex = -(E_BIAS as i32); |
298 | 0 | let mut sign: u64 = 0; |
299 | | |
300 | 0 | if ((x_u & 0x801f_ffffu32) == 0) || x_u >= 0x7f80_0000u32 || x_u < 0x0080_0000u32 { |
301 | 0 | if x.is_nan() { |
302 | 0 | return f32::NAN; |
303 | 0 | } |
304 | | |
305 | 0 | if x_u == 0x3f80_0000 { |
306 | 0 | return 1.; |
307 | 0 | } |
308 | | |
309 | 0 | let x_is_neg = x.to_bits() > 0x8000_0000; |
310 | | |
311 | 0 | if x == 0.0 { |
312 | 0 | let out_is_neg = x_is_neg && is_odd_integerf(f32::from_bits(y_u)); |
313 | 0 | if y_u > 0x8000_0000u32 { |
314 | | // pow(0, negative number) = inf |
315 | 0 | return if x_is_neg { |
316 | 0 | f32::NEG_INFINITY |
317 | | } else { |
318 | 0 | f32::INFINITY |
319 | | }; |
320 | 0 | } |
321 | | // pow(0, positive number) = 0 |
322 | 0 | return if out_is_neg { -0.0 } else { 0.0 }; |
323 | 0 | } |
324 | | |
325 | 0 | if x_abs == 0x7f80_0000u32 { |
326 | | // x = +-Inf |
327 | 0 | let out_is_neg = x_is_neg && is_odd_integerf(f32::from_bits(y_u)); |
328 | 0 | if y_u >= 0x7fff_ffff { |
329 | 0 | return if out_is_neg { -0.0 } else { 0.0 }; |
330 | 0 | } |
331 | 0 | return if out_is_neg { |
332 | 0 | f32::NEG_INFINITY |
333 | | } else { |
334 | 0 | f32::INFINITY |
335 | | }; |
336 | 0 | } |
337 | | |
338 | 0 | if x_abs > 0x7f80_0000 { |
339 | | // x is NaN. |
340 | | // pow (aNaN, 0) is already taken care above. |
341 | 0 | return x; |
342 | 0 | } |
343 | | |
344 | | // Normalize denormal inputs. |
345 | 0 | if x_abs < 0x0080_0000u32 { |
346 | 0 | ex = ex.wrapping_sub(64); |
347 | 0 | x *= f32::from_bits(0x5f800000); |
348 | 0 | } |
349 | | |
350 | | // x is finite and negative, and y is a finite integer. |
351 | 0 | if x.is_sign_negative() { |
352 | 0 | if is_integerf(y) { |
353 | 0 | x = -x; |
354 | 0 | if is_odd_integerf(y) { |
355 | 0 | sign = 0x8000_0000_0000_0000u64; |
356 | 0 | } |
357 | | } else { |
358 | | // pow( negative, non-integer ) = NaN |
359 | 0 | return f32::NAN; |
360 | | } |
361 | 0 | } |
362 | 0 | } |
363 | | |
364 | | // x^y = 2^( y * log2(x) ) |
365 | | // = 2^( y * ( e_x + log2(m_x) ) ) |
366 | | // First we compute log2(x) = e_x + log2(m_x) |
367 | 0 | x_u = x.to_bits(); |
368 | | |
369 | | // Extract exponent field of x. |
370 | 0 | ex = ex.wrapping_add((x_u >> 23) as i32); |
371 | 0 | let e_x = ex as f64; |
372 | | // Use the highest 7 fractional bits of m_x as the index for look up tables. |
373 | 0 | let x_mant = x_u & ((1u32 << 23) - 1); |
374 | 0 | let idx_x = (x_mant >> (23 - 7)) as i32; |
375 | | // Add the hidden bit to the mantissa. |
376 | | // 1 <= m_x < 2 |
377 | 0 | let m_x = f32::from_bits(x_mant | 0x3f800000); |
378 | | |
379 | | // Reduced argument for log2(m_x): |
380 | | // dx = r * m_x - 1. |
381 | | // The computation is exact, and -2^-8 <= dx < 2^-7. |
382 | | // Then m_x = (1 + dx) / r, and |
383 | | // log2(m_x) = log2( (1 + dx) / r ) |
384 | | // = log2(1 + dx) - log2(r). |
385 | | |
386 | | let dx; |
387 | | #[cfg(any( |
388 | | all( |
389 | | any(target_arch = "x86", target_arch = "x86_64"), |
390 | | target_feature = "fma" |
391 | | ), |
392 | | target_arch = "aarch64" |
393 | | ))] |
394 | | { |
395 | | use crate::logs::LOG_REDUCTION_F32; |
396 | | dx = f_fmlaf( |
397 | | m_x, |
398 | | f32::from_bits(LOG_REDUCTION_F32.0[idx_x as usize]), |
399 | | -1.0, |
400 | | ) as f64; // Exact. |
401 | | } |
402 | | #[cfg(not(any( |
403 | | all( |
404 | | any(target_arch = "x86", target_arch = "x86_64"), |
405 | | target_feature = "fma" |
406 | | ), |
407 | | target_arch = "aarch64" |
408 | | )))] |
409 | | { |
410 | | use crate::logs::LOG_RANGE_REDUCTION; |
411 | 0 | dx = f_fmla( |
412 | 0 | m_x as f64, |
413 | 0 | f64::from_bits(LOG_RANGE_REDUCTION[idx_x as usize]), |
414 | 0 | -1.0, |
415 | 0 | ); // Exact |
416 | | } |
417 | | |
418 | | // Degree-5 polynomial approximation: |
419 | | // dx * P(dx) ~ log2(1 + dx) |
420 | | // Generated by Sollya with: |
421 | | // > P = fpminimax(log2(1 + x)/x, 5, [|D...|], [-2^-8, 2^-7]); |
422 | | // > dirtyinfnorm(log2(1 + x)/x - P, [-2^-8, 2^-7]); |
423 | | // 0x1.653...p-52 |
424 | | const COEFFS: [u64; 6] = [ |
425 | | 0x3ff71547652b82fe, |
426 | | 0xbfe71547652b7a07, |
427 | | 0x3fdec709dc458db1, |
428 | | 0xbfd715479c2266c9, |
429 | | 0x3fd2776ae1ddf8f0, |
430 | | 0xbfce7b2178870157, |
431 | | ]; |
432 | | |
433 | 0 | let dx2 = dx * dx; // Exact |
434 | 0 | let c0 = f_fmla(dx, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0])); |
435 | 0 | let c1 = f_fmla(dx, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2])); |
436 | 0 | let c2 = f_fmla(dx, f64::from_bits(COEFFS[5]), f64::from_bits(COEFFS[4])); |
437 | | |
438 | 0 | let p = f_polyeval3(dx2, c0, c1, c2); |
439 | | |
440 | | // s = e_x - log2(r) + dx * P(dx) |
441 | | // Approximation errors: |
442 | | // |log2(x) - s| < ulp(e_x) + (bounds on dx) * (error bounds of P(dx)) |
443 | | // = ulp(e_x) + 2^-7 * 2^-51 |
444 | | // < 2^8 * 2^-52 + 2^-7 * 2^-43 |
445 | | // ~ 2^-44 + 2^-50 |
446 | 0 | let s = f_fmla(dx, p, f64::from_bits(LOG2_R[idx_x as usize]) + e_x); |
447 | | |
448 | | // To compute 2^(y * log2(x)), we break the exponent into 3 parts: |
449 | | // y * log(2) = hi + mid + lo, where |
450 | | // hi is an integer |
451 | | // mid * 2^6 is an integer |
452 | | // |lo| <= 2^-7 |
453 | | // Then: |
454 | | // x^y = 2^(y * log2(x)) = 2^hi * 2^mid * 2^lo, |
455 | | // In which 2^mid is obtained from a look-up table of size 2^6 = 64 elements, |
456 | | // and 2^lo ~ 1 + lo * P(lo). |
457 | | // Thus, we have: |
458 | | // hi + mid = 2^-6 * round( 2^6 * y * log2(x) ) |
459 | | // If we restrict the output such that |hi| < 150, (hi + mid) uses (8 + 6) |
460 | | // bits, hence, if we use double precision to perform |
461 | | // round( 2^6 * y * log2(x)) |
462 | | // the lo part is bounded by 2^-7 + 2^(-(52 - 14)) = 2^-7 + 2^-38 |
463 | | |
464 | | // In the following computations: |
465 | | // y6 = 2^6 * y |
466 | | // hm = 2^6 * (hi + mid) = round(2^6 * y * log2(x)) ~ round(y6 * s) |
467 | | // lo6 = 2^6 * lo = 2^6 * (y - (hi + mid)) = y6 * log2(x) - hm. |
468 | 0 | let y6 = (y * f32::from_bits(0x42800000)) as f64; // Exact. |
469 | 0 | let hm = (s * y6).cpu_round(); |
470 | | |
471 | | // let log2_rr = LOG2_R2_DD[idx_x as usize]; |
472 | | |
473 | | // // lo6 = 2^6 * lo. |
474 | | // let lo6_hi = f_fmla(y6, e_x + f64::from_bits(log2_rr.1), -hm); // Exact |
475 | | // // Error bounds: |
476 | | // // | (y*log2(x) - hm * 2^-6 - lo) / y| < err(dx * p) + err(LOG2_R_DD.lo) |
477 | | // // < 2^-51 + 2^-75 |
478 | | // let lo6 = f_fmla(y6, f_fmla(dx, p, f64::from_bits(log2_rr.0)), lo6_hi); |
479 | | |
480 | | // lo6 = 2^6 * lo. |
481 | 0 | let lo6_hi = f_fmla(y6, e_x + f64::from_bits(LOG2_R_TD[idx_x as usize].2), -hm); // Exact |
482 | | // Error bounds: |
483 | | // | (y*log2(x) - hm * 2^-6 - lo) / y| < err(dx * p) + err(LOG2_R_DD.lo) |
484 | | // < 2^-51 + 2^-75 |
485 | 0 | let lo6 = f_fmla( |
486 | 0 | y6, |
487 | 0 | f_fmla(dx, p, f64::from_bits(LOG2_R_TD[idx_x as usize].1)), |
488 | 0 | lo6_hi, |
489 | | ); |
490 | | |
491 | | // |2^(hi + mid) - exp2_hi_mid| <= ulp(exp2_hi_mid) / 2 |
492 | | // Clamp the exponent part into smaller range that fits double precision. |
493 | | // For those exponents that are out of range, the final conversion will round |
494 | | // them correctly to inf/max float or 0/min float accordingly. |
495 | 0 | let mut hm_i = unsafe { hm.to_int_unchecked::<i64>() }; |
496 | 0 | hm_i = if hm_i > (1i64 << 15) { |
497 | 0 | 1 << 15 |
498 | 0 | } else if hm_i < (-(1i64 << 15)) { |
499 | 0 | -(1 << 15) |
500 | | } else { |
501 | 0 | hm_i |
502 | | }; |
503 | | |
504 | 0 | let idx_y = hm_i & 0x3f; |
505 | | |
506 | | // 2^hi |
507 | 0 | let exp_hi_i = (hm_i >> 6).wrapping_shl(52); |
508 | | // 2^mid |
509 | 0 | let exp_mid_i = EXP2_MID1[idx_y as usize].1; |
510 | | // (-1)^sign * 2^hi * 2^mid |
511 | | // Error <= 2^hi * 2^-53 |
512 | 0 | let exp2_hi_mid_i = (exp_hi_i.wrapping_add(exp_mid_i as i64) as u64).wrapping_add(sign); |
513 | 0 | let exp2_hi_mid = f64::from_bits(exp2_hi_mid_i); |
514 | | |
515 | | // Degree-5 polynomial approximation P(lo6) ~ 2^(lo6 / 2^6) = 2^(lo). |
516 | | // Generated by Sollya with: |
517 | | // > P = fpminimax(2^(x/64), 5, [|1, D...|], [-2^-1, 2^-1]); |
518 | | // > dirtyinfnorm(2^(x/64) - P, [-0.5, 0.5]); |
519 | | // 0x1.a2b77e618f5c4c176fd11b7659016cde5de83cb72p-60 |
520 | | const EXP2_COEFFS: [u64; 6] = [ |
521 | | 0x3ff0000000000000, |
522 | | 0x3f862e42fefa39ef, |
523 | | 0x3f0ebfbdff82a23a, |
524 | | 0x3e8c6b08d7076268, |
525 | | 0x3e03b2ad33f8b48b, |
526 | | 0x3d75d870c4d84445, |
527 | | ]; |
528 | | |
529 | 0 | let lo6_sqr = lo6 * lo6; |
530 | 0 | let d0 = f_fmla( |
531 | 0 | lo6, |
532 | 0 | f64::from_bits(EXP2_COEFFS[1]), |
533 | 0 | f64::from_bits(EXP2_COEFFS[0]), |
534 | | ); |
535 | 0 | let d1 = f_fmla( |
536 | 0 | lo6, |
537 | 0 | f64::from_bits(EXP2_COEFFS[3]), |
538 | 0 | f64::from_bits(EXP2_COEFFS[2]), |
539 | | ); |
540 | 0 | let d2 = f_fmla( |
541 | 0 | lo6, |
542 | 0 | f64::from_bits(EXP2_COEFFS[5]), |
543 | 0 | f64::from_bits(EXP2_COEFFS[4]), |
544 | | ); |
545 | 0 | let pp = f_polyeval3(lo6_sqr, d0, d1, d2); |
546 | | |
547 | 0 | let r = pp * exp2_hi_mid; |
548 | 0 | let r_u = r.to_bits(); |
549 | | |
550 | | #[cfg(any( |
551 | | all( |
552 | | any(target_arch = "x86", target_arch = "x86_64"), |
553 | | target_feature = "fma" |
554 | | ), |
555 | | target_arch = "aarch64" |
556 | | ))] |
557 | | const ERR: u64 = 64; |
558 | | #[cfg(not(any( |
559 | | all( |
560 | | any(target_arch = "x86", target_arch = "x86_64"), |
561 | | target_feature = "fma" |
562 | | ), |
563 | | target_arch = "aarch64" |
564 | | )))] |
565 | | const ERR: u64 = 128; |
566 | | |
567 | 0 | let r_upper = f64::from_bits(r_u + ERR) as f32; |
568 | 0 | let r_lower = f64::from_bits(r_u - ERR) as f32; |
569 | 0 | if r_upper == r_lower { |
570 | 0 | return r_upper; |
571 | 0 | } |
572 | | |
573 | | // Scale lower part of 2^(hi + mid) |
574 | 0 | let exp2_hi_mid_dd = DoubleDouble { |
575 | 0 | lo: if idx_y != 0 { |
576 | 0 | f64::from_bits((exp_hi_i as u64).wrapping_add(EXP2_MID1[idx_y as usize].0)) |
577 | | } else { |
578 | 0 | 0. |
579 | | }, |
580 | 0 | hi: exp2_hi_mid, |
581 | | }; |
582 | | |
583 | 0 | let r_dd = powf_dd(idx_x, dx, y6, lo6_hi, exp2_hi_mid_dd); |
584 | 0 | r_dd as f32 |
585 | 0 | } |
586 | | |
587 | | /// Dirty fast pow |
588 | | #[inline] |
589 | 0 | pub fn dirty_powf(d: f32, n: f32) -> f32 { |
590 | | use crate::exponents::dirty_exp2f; |
591 | | use crate::logs::dirty_log2f; |
592 | 0 | let value = d.abs(); |
593 | 0 | let lg = dirty_log2f(value); |
594 | 0 | let c = dirty_exp2f(n * lg); |
595 | 0 | if d < 0.0 { |
596 | 0 | let y = n as i32; |
597 | 0 | if y % 2 == 0 { c } else { -c } |
598 | | } else { |
599 | 0 | c |
600 | | } |
601 | 0 | } Unexecuted instantiation: pxfm::powf::dirty_powf Unexecuted instantiation: pxfm::powf::dirty_powf |
602 | | |
603 | | #[cfg(test)] |
604 | | mod tests { |
605 | | use super::*; |
606 | | |
607 | | #[test] |
608 | | fn powf_test() { |
609 | | assert!( |
610 | | (powf(2f32, 3f32) - 8f32).abs() < 1e-6, |
611 | | "Invalid result {}", |
612 | | powf(2f32, 3f32) |
613 | | ); |
614 | | assert!( |
615 | | (powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6, |
616 | | "Invalid result {}", |
617 | | powf(0.5f32, 2f32) |
618 | | ); |
619 | | } |
620 | | |
621 | | #[test] |
622 | | fn f_powf_test() { |
623 | | assert!( |
624 | | (f_powf(2f32, 3f32) - 8f32).abs() < 1e-6, |
625 | | "Invalid result {}", |
626 | | f_powf(2f32, 3f32) |
627 | | ); |
628 | | assert!( |
629 | | (f_powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6, |
630 | | "Invalid result {}", |
631 | | f_powf(0.5f32, 2f32) |
632 | | ); |
633 | | assert_eq!(f_powf(0.5f32, 1.5432f32), 0.34312353); |
634 | | assert_eq!( |
635 | | f_powf(f32::INFINITY, 0.00000000000000000000000000000000038518824), |
636 | | f32::INFINITY |
637 | | ); |
638 | | assert_eq!(f_powf(f32::NAN, 0.0), 1.); |
639 | | assert_eq!(f_powf(1., f32::NAN), 1.); |
640 | | } |
641 | | |
642 | | #[test] |
643 | | fn dirty_powf_test() { |
644 | | assert!( |
645 | | (dirty_powf(2f32, 3f32) - 8f32).abs() < 1e-6, |
646 | | "Invalid result {}", |
647 | | dirty_powf(2f32, 3f32) |
648 | | ); |
649 | | assert!( |
650 | | (dirty_powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6, |
651 | | "Invalid result {}", |
652 | | dirty_powf(0.5f32, 2f32) |
653 | | ); |
654 | | } |
655 | | } |