/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/exponents/exp10f.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::exponents::expf::{ExpfBackend, GenericExpfBackend}; |
30 | | |
31 | | pub(crate) struct ExpBReduc { |
32 | | pub(crate) hi: f64, |
33 | | pub(crate) lo: f64, |
34 | | } |
35 | | |
36 | | const MID_BITS: u32 = 5; |
37 | | const MID_MASK: usize = (1 << MID_BITS) - 1; |
38 | | const LOG2_B: f64 = f64::from_bits(0x400a934f0979a371) * (1 << MID_BITS) as f64; |
39 | | const M_LOGB_2_HI: f64 = f64::from_bits(0xbfd34413509f8000) / (1 << MID_BITS) as f64; |
40 | | const M_LOGB_2_LO: f64 = f64::from_bits(0x3d380433b83b532a) / (1 << MID_BITS) as f64; |
41 | | const EXP_2_MID: [u64; 32] = [ |
42 | | 0x3ff0000000000000, |
43 | | 0x3ff059b0d3158574, |
44 | | 0x3ff0b5586cf9890f, |
45 | | 0x3ff11301d0125b51, |
46 | | 0x3ff172b83c7d517b, |
47 | | 0x3ff1d4873168b9aa, |
48 | | 0x3ff2387a6e756238, |
49 | | 0x3ff29e9df51fdee1, |
50 | | 0x3ff306fe0a31b715, |
51 | | 0x3ff371a7373aa9cb, |
52 | | 0x3ff3dea64c123422, |
53 | | 0x3ff44e086061892d, |
54 | | 0x3ff4bfdad5362a27, |
55 | | 0x3ff5342b569d4f82, |
56 | | 0x3ff5ab07dd485429, |
57 | | 0x3ff6247eb03a5585, |
58 | | 0x3ff6a09e667f3bcd, |
59 | | 0x3ff71f75e8ec5f74, |
60 | | 0x3ff7a11473eb0187, |
61 | | 0x3ff82589994cce13, |
62 | | 0x3ff8ace5422aa0db, |
63 | | 0x3ff93737b0cdc5e5, |
64 | | 0x3ff9c49182a3f090, |
65 | | 0x3ffa5503b23e255d, |
66 | | 0x3ffae89f995ad3ad, |
67 | | 0x3ffb7f76f2fb5e47, |
68 | | 0x3ffc199bdd85529c, |
69 | | 0x3ffcb720dcef9069, |
70 | | 0x3ffd5818dcfba487, |
71 | | 0x3ffdfc97337b9b5f, |
72 | | 0x3ffea4afa2a490da, |
73 | | 0x3fff50765b6e4540, |
74 | | ]; |
75 | | |
76 | | // Approximating 10^dx with degree-5 minimax polynomial generated by Sollya: |
77 | | // > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]); |
78 | | // Then: |
79 | | // 10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5. |
80 | | pub(crate) const EXP10F_COEFFS: [u64; 5] = [ |
81 | | 0x40026bb1bbb55515, |
82 | | 0x40053524c73bd3ea, |
83 | | 0x4000470591dff149, |
84 | | 0x3ff2bd7c0a9fbc4d, |
85 | | 0x3fe1429e74a98f43, |
86 | | ]; |
87 | | |
88 | | /// Range reduction function equivalent to exp_b_range_reduc |
89 | | #[inline(always)] |
90 | 0 | pub(crate) fn exp_b_range_reduc<B: ExpfBackend>(x: f32, backend: &B) -> ExpBReduc { |
91 | 0 | let xd = x as f64; |
92 | | |
93 | | // kd = round(log2(b) * x) |
94 | 0 | let kd = backend.round(LOG2_B * xd); |
95 | 0 | let k = unsafe { kd.to_int_unchecked::<i32>() }; // it's already not indeterminate. |
96 | | |
97 | | // hi = floor(kd / 2^MID_BITS) |
98 | 0 | let exp_hi = (k.wrapping_shr(MID_BITS) as u64).wrapping_shl(52); // 52 = fraction bits in f64 |
99 | | |
100 | | // mh = 2^hi * 2^mid |
101 | 0 | let mid_index = (k as usize) & MID_MASK; |
102 | 0 | let mh_bits = EXP_2_MID[mid_index].wrapping_add(exp_hi); |
103 | 0 | let mh = f64::from_bits(mh_bits); |
104 | | |
105 | | // dx = x - (hi + mid) * log(2) |
106 | 0 | let z0 = backend.fma(kd, M_LOGB_2_HI, xd); |
107 | 0 | let dx = backend.fma(kd, M_LOGB_2_LO, z0); |
108 | | |
109 | 0 | ExpBReduc { lo: dx, hi: mh } |
110 | 0 | } Unexecuted instantiation: pxfm::exponents::exp10f::exp_b_range_reduc::<pxfm::exponents::expf::FmaBackend> Unexecuted instantiation: pxfm::exponents::exp10f::exp_b_range_reduc::<pxfm::exponents::expf::GenericExpfBackend> |
111 | | |
112 | | #[inline(always)] |
113 | 0 | fn exp10f_gen<B: ExpfBackend>(x: f32, backend: B) -> f32 { |
114 | 0 | let x_u = x.to_bits(); |
115 | 0 | let x_abs = x_u & 0x7fffffff; |
116 | | |
117 | | // When |x| >= log10(2^128), or x is nan |
118 | 0 | if x_abs >= 0x421a209bu32 { |
119 | | // When x < log10(2^-150) or nan |
120 | 0 | if x_u > 0xc2349e35u32 { |
121 | | // exp(-Inf) = 0 |
122 | 0 | if x.is_infinite() { |
123 | 0 | return 0.0; |
124 | 0 | } |
125 | | // exp(nan) = nan |
126 | 0 | if x.is_nan() { |
127 | 0 | return x; |
128 | 0 | } |
129 | 0 | return 0.0; |
130 | 0 | } |
131 | | // x >= log10(2^128) or nan |
132 | 0 | if x > 0. && (x_u >= 0x421a209bu32) { |
133 | | // x is +inf or nan |
134 | 0 | return x + f32::INFINITY; |
135 | 0 | } |
136 | 0 | } |
137 | | |
138 | 0 | if x_abs <= 0x3d000000u32 { |
139 | | // |x| < 1/32 |
140 | 0 | if x_abs <= 0x3b9a209bu32 { |
141 | 0 | if x_u == 0xb25e5bd9u32 { |
142 | | // x = -1.2943e-08 |
143 | 0 | return 1.; |
144 | 0 | } |
145 | | // |x| < 2^-25 |
146 | | // 10^x ~ 1 + log(10) * x |
147 | 0 | if x_abs <= 0x32800000u32 { |
148 | 0 | return backend.fmaf(x, f32::from_bits(0x40135da2), 1.0); |
149 | 0 | } |
150 | 0 | } |
151 | | |
152 | 0 | let xd = x as f64; |
153 | | |
154 | | // Special polynomial for small x. |
155 | | // Generated by Sollya: |
156 | | // d = [-1/32, 1/32]; |
157 | | // f_exp10f = (10^y - 1)/y; |
158 | | // Q = fpminimax(f_exp10f, 6, [|D...|], d, relative, floating); |
159 | | |
160 | | // See ./notes/exp10f_small.sollya |
161 | 0 | let p = backend.polyeval7( |
162 | 0 | xd, |
163 | 0 | f64::from_bits(0x40026bb1bbb55516), |
164 | 0 | f64::from_bits(0x40053524c73cfbf6), |
165 | 0 | f64::from_bits(0x4000470591de0b07), |
166 | 0 | f64::from_bits(0x3ff2bd760599f3a5), |
167 | 0 | f64::from_bits(0x3fe142a001511a6f), |
168 | 0 | f64::from_bits(0x3fca7feffa781d53), |
169 | 0 | f64::from_bits(0x3fb16e53492c0f0e), |
170 | | ); |
171 | 0 | return backend.fma(p, xd, 1.) as f32; |
172 | 0 | } |
173 | | |
174 | | // Range reduction: 10^x = 2^(mid + hi) * 10^lo |
175 | | // rr = (2^(mid + hi), lo) |
176 | 0 | let rr = exp_b_range_reduc(x, &backend); |
177 | | |
178 | | // The low part is approximated by a degree-5 minimax polynomial. |
179 | | // 10^lo ~ 1 + COEFFS[0] * lo + ... + COEFFS[4] * lo^5 |
180 | 0 | let lo2 = rr.lo * rr.lo; |
181 | | // c0 = 1 + COEFFS[0] * lo |
182 | 0 | let c0 = backend.fma(rr.lo, f64::from_bits(EXP10F_COEFFS[0]), 1.0); |
183 | | // c1 = COEFFS[1] + COEFFS[2] * lo |
184 | 0 | let c1 = backend.fma( |
185 | 0 | rr.lo, |
186 | 0 | f64::from_bits(EXP10F_COEFFS[2]), |
187 | 0 | f64::from_bits(EXP10F_COEFFS[1]), |
188 | | ); |
189 | | // c2 = COEFFS[3] + COEFFS[4] * lo |
190 | 0 | let c2 = backend.fma( |
191 | 0 | rr.lo, |
192 | 0 | f64::from_bits(EXP10F_COEFFS[4]), |
193 | 0 | f64::from_bits(EXP10F_COEFFS[3]), |
194 | | ); |
195 | | // p = c1 + c2 * lo^2 |
196 | | // = COEFFS[1] + COEFFS[2] * lo + COEFFS[3] * lo^2 + COEFFS[4] * lo^3 |
197 | 0 | let p = backend.fma(lo2, c2, c1); |
198 | | // 10^lo ~ c0 + p * lo^2 |
199 | | // 10^x = 2^(mid + hi) * 10^lo |
200 | | // ~ mh * (c0 + p * lo^2) |
201 | | // = (mh * c0) + p * (mh * lo^2) |
202 | 0 | backend.fma(p, lo2 * rr.hi, c0 * rr.hi) as f32 |
203 | 0 | } Unexecuted instantiation: pxfm::exponents::exp10f::exp10f_gen::<pxfm::exponents::expf::FmaBackend> Unexecuted instantiation: pxfm::exponents::exp10f::exp10f_gen::<pxfm::exponents::expf::GenericExpfBackend> |
204 | | |
205 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
206 | | #[target_feature(enable = "avx", enable = "fma")] |
207 | 0 | unsafe fn exp10f_fma_impl(x: f32) -> f32 { |
208 | | use crate::exponents::expf::FmaBackend; |
209 | 0 | exp10f_gen(x, FmaBackend {}) |
210 | 0 | } |
211 | | |
212 | | /// Computes exp10 |
213 | | /// |
214 | | /// Max found ULP 0.49999508 |
215 | | #[inline] |
216 | 0 | pub fn f_exp10f(x: f32) -> f32 { |
217 | | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] |
218 | | { |
219 | | exp10f_gen(x, GenericExpfBackend {}) |
220 | | } |
221 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
222 | | { |
223 | | use std::sync::OnceLock; |
224 | | static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new(); |
225 | 0 | let q = EXECUTOR.get_or_init(|| { |
226 | 0 | if std::arch::is_x86_feature_detected!("avx") |
227 | 0 | && std::arch::is_x86_feature_detected!("fma") |
228 | | { |
229 | 0 | exp10f_fma_impl |
230 | | } else { |
231 | 0 | fn def_exp10f(x: f32) -> f32 { |
232 | 0 | exp10f_gen(x, GenericExpfBackend {}) |
233 | 0 | } |
234 | 0 | def_exp10f |
235 | | } |
236 | 0 | }); Unexecuted instantiation: pxfm::exponents::exp10f::f_exp10f::{closure#0}Unexecuted instantiation: pxfm::exponents::exp10f::f_exp10f::{closure#0} |
237 | 0 | unsafe { q(x) } |
238 | | } |
239 | 0 | } Unexecuted instantiation: pxfm::exponents::exp10f::f_exp10f Unexecuted instantiation: pxfm::exponents::exp10f::f_exp10f |
240 | | |
241 | | #[cfg(test)] |
242 | | mod tests { |
243 | | use super::*; |
244 | | |
245 | | #[test] |
246 | | fn test_exp10f() { |
247 | | assert_eq!(f_exp10f(-1. / 64.), 0.9646616); |
248 | | assert_eq!(f_exp10f(1. / 64.), 1.0366329); |
249 | | assert_eq!(f_exp10f(1.), 10.0); |
250 | | assert_eq!(f_exp10f(2.), 100.0); |
251 | | assert_eq!(f_exp10f(3.), 1000.0); |
252 | | assert_eq!(f_exp10f(f32::INFINITY), f32::INFINITY); |
253 | | assert_eq!(f_exp10f(f32::NEG_INFINITY), 0.); |
254 | | assert!(f_exp10f(f32::NAN).is_nan()); |
255 | | } |
256 | | } |