/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/exponents/exp2m1f.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::exponents::exp2f::EXP2F_TABLE; |
30 | | use crate::exponents::expf::{ExpfBackend, GenericExpfBackend}; |
31 | | |
32 | | #[inline(always)] |
33 | 0 | fn exp2m1f_gen<B: ExpfBackend>(x: f32, backend: B) -> f32 { |
34 | 0 | let x_u = x.to_bits(); |
35 | 0 | let x_abs = x_u & 0x7fff_ffffu32; |
36 | 0 | if x_abs >= 0x4300_0000u32 || x_abs <= 0x3d00_0000u32 { |
37 | | // |x| <= 2^-5 |
38 | 0 | if x_abs <= 0x3d00_0000u32 { |
39 | | // Minimax polynomial generated by Sollya with: |
40 | | // > display = hexadecimal; |
41 | | // > fpminimax((2^x - 1)/x, 5, [|D...|], [-2^-5, 2^-5]); |
42 | | const C: [u64; 6] = [ |
43 | | 0x3fe62e42fefa39f3, |
44 | | 0x3fcebfbdff82c57b, |
45 | | 0x3fac6b08d6f2d7aa, |
46 | | 0x3f83b2ab6fc92f5d, |
47 | | 0x3f55d897cfe27125, |
48 | | 0x3f243090e61e6af1, |
49 | | ]; |
50 | 0 | let xd = x as f64; |
51 | 0 | let xsq = xd * xd; |
52 | 0 | let c0 = backend.fma(xd, f64::from_bits(C[1]), f64::from_bits(C[0])); |
53 | 0 | let c1 = backend.fma(xd, f64::from_bits(C[3]), f64::from_bits(C[2])); |
54 | 0 | let c2 = backend.fma(xd, f64::from_bits(C[5]), f64::from_bits(C[4])); |
55 | 0 | let p = backend.polyeval3(xsq, c0, c1, c2); |
56 | 0 | return (p * xd) as f32; |
57 | 0 | } |
58 | | |
59 | | // x >= 128, or x is nan |
60 | 0 | if x.is_sign_positive() { |
61 | | // x >= 128 and 2^x - 1 rounds to +inf, or x is +inf or nan |
62 | 0 | return x + f32::INFINITY; |
63 | 0 | } |
64 | 0 | } |
65 | | |
66 | 0 | if x <= -25.0 { |
67 | | // 2^(-inf) - 1 = -1 |
68 | 0 | if x.is_infinite() { |
69 | 0 | return -1.0; |
70 | 0 | } |
71 | | // 2^nan - 1 = nan |
72 | 0 | if x.is_nan() { |
73 | 0 | return x; |
74 | 0 | } |
75 | 0 | return -1.0; |
76 | 0 | } |
77 | | |
78 | | // For -25 < x < 128, to compute 2^x, we perform the following range |
79 | | // reduction: find hi, mid, lo such that: |
80 | | // x = hi + mid + lo, in which: |
81 | | // hi is an integer, |
82 | | // 0 <= mid * 2^5 < 32 is an integer, |
83 | | // -2^(-6) <= lo <= 2^(-6). |
84 | | // In particular, |
85 | | // hi + mid = round(x * 2^5) * 2^(-5). |
86 | | // Then, |
87 | | // 2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo. |
88 | | // 2^mid is stored in the lookup table of 32 elements. |
89 | | // 2^lo is computed using a degree-4 minimax polynomial generated by Sollya. |
90 | | // We perform 2^hi * 2^mid by simply add hi to the exponent field of 2^mid. |
91 | | |
92 | | // kf = (hi + mid) * 2^5 = round(x * 2^5) |
93 | | |
94 | 0 | let xd = x as f64; |
95 | | |
96 | 0 | let kf = backend.roundf(x * 64.0); |
97 | 0 | let k = unsafe { kf.to_int_unchecked::<i32>() }; // it's already not indeterminate. |
98 | | // dx = lo = x - (hi + mid) = x - kf * 2^(-6) |
99 | 0 | let dx = backend.fma(f64::from_bits(0xbf90000000000000), kf as f64, xd); |
100 | | |
101 | | const TABLE_BITS: u32 = 6; |
102 | | const TABLE_MASK: u64 = (1u64 << TABLE_BITS) - 1; |
103 | | |
104 | | // hi = floor(kf * 2^(-5)) |
105 | | // exp_hi = shift hi to the exponent field of double precision. |
106 | 0 | let exp_hi: i64 = ((k >> TABLE_BITS) as i64).wrapping_shl(52); |
107 | | |
108 | | // mh = 2^hi * 2^mid |
109 | | // mh_bits = bit field of mh |
110 | 0 | let mh_bits = (EXP2F_TABLE[((k as u64) & TABLE_MASK) as usize] as i64).wrapping_add(exp_hi); |
111 | 0 | let mh = f64::from_bits(mh_bits as u64); |
112 | | |
113 | | // Degree-4 polynomial approximating (2^x - 1)/x generated by Sollya with: |
114 | | // > P = fpminimax((2^y - 1)/y, 4, [|D...|], [-1/64. 1/64]); |
115 | | // see ./notes/exp2f.sollya |
116 | | const C: [u64; 5] = [ |
117 | | 0x3fe62e42fefa39ef, |
118 | | 0x3fcebfbdff8131c4, |
119 | | 0x3fac6b08d7061695, |
120 | | 0x3f83b2b1bee74b2a, |
121 | | 0x3f55d88091198529, |
122 | | ]; |
123 | 0 | let dx_sq = dx * dx; |
124 | 0 | let c1 = backend.fma(dx, f64::from_bits(C[0]), 1.0); |
125 | 0 | let c2 = backend.fma(dx, f64::from_bits(C[2]), f64::from_bits(C[1])); |
126 | 0 | let c3 = backend.fma(dx, f64::from_bits(C[4]), f64::from_bits(C[3])); |
127 | 0 | let p = backend.polyeval3(dx_sq, c1, c2, c3); |
128 | | // 2^x = 2^(hi + mid + lo) |
129 | | // = 2^(hi + mid) * 2^lo |
130 | | // ~ mh * (1 + lo * P(lo)) |
131 | | // = mh + (mh*lo) * P(lo) |
132 | 0 | backend.fma(p, mh, -1.) as f32 |
133 | 0 | } Unexecuted instantiation: pxfm::exponents::exp2m1f::exp2m1f_gen::<pxfm::exponents::expf::FmaBackend> Unexecuted instantiation: pxfm::exponents::exp2m1f::exp2m1f_gen::<pxfm::exponents::expf::GenericExpfBackend> |
134 | | |
135 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
136 | | #[target_feature(enable = "avx", enable = "fma")] |
137 | 0 | unsafe fn exp2m1f_fma_impl(x: f32) -> f32 { |
138 | | use crate::exponents::expf::FmaBackend; |
139 | 0 | exp2m1f_gen(x, FmaBackend {}) |
140 | 0 | } |
141 | | |
142 | | /// Computes 2^x - 1 |
143 | | /// |
144 | | /// Max found ULP 0.5 |
145 | | #[inline] |
146 | 0 | pub fn f_exp2m1f(x: f32) -> f32 { |
147 | | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] |
148 | | { |
149 | | exp2m1f_gen(x, GenericExpfBackend {}) |
150 | | } |
151 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
152 | | { |
153 | | use std::sync::OnceLock; |
154 | | static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new(); |
155 | 0 | let q = EXECUTOR.get_or_init(|| { |
156 | 0 | if std::arch::is_x86_feature_detected!("avx") |
157 | 0 | && std::arch::is_x86_feature_detected!("fma") |
158 | | { |
159 | 0 | exp2m1f_fma_impl |
160 | | } else { |
161 | 0 | fn def_exp2f(x: f32) -> f32 { |
162 | 0 | exp2m1f_gen(x, GenericExpfBackend {}) |
163 | 0 | } |
164 | 0 | def_exp2f |
165 | | } |
166 | 0 | }); |
167 | 0 | unsafe { q(x) } |
168 | | } |
169 | 0 | } |
170 | | |
171 | | #[cfg(test)] |
172 | | mod tests { |
173 | | use super::*; |
174 | | |
175 | | #[test] |
176 | | fn test_exp2m1f() { |
177 | | assert_eq!(f_exp2m1f(0.432423), 0.34949815); |
178 | | assert_eq!(f_exp2m1f(-4.), -0.9375); |
179 | | assert_eq!(f_exp2m1f(5.43122), 42.14795); |
180 | | assert_eq!(f_exp2m1f(4.), 15.0); |
181 | | assert_eq!(f_exp2m1f(3.), 7.); |
182 | | assert_eq!(f_exp2m1f(0.1), 0.07177346); |
183 | | assert_eq!(f_exp2m1f(0.0543432432), 0.038386293); |
184 | | assert!(f_exp2m1f(f32::NAN).is_nan()); |
185 | | assert_eq!(f_exp2m1f(f32::INFINITY), f32::INFINITY); |
186 | | assert_eq!(f_exp2m1f(f32::NEG_INFINITY), -1.0); |
187 | | } |
188 | | } |