/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.28/src/logs/log2f.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::{f_fmla, f_fmlaf, set_exponent_f32}; |
30 | | use crate::logs::logf::{GenLogfBackend, LogfBackend}; |
31 | | use crate::polyeval::f_polyeval3; |
32 | | |
33 | | pub(crate) static LOG2_R: [u64; 128] = [ |
34 | | 0x0000000000000000, |
35 | | 0x3f872c7ba20f7327, |
36 | | 0x3f9743ee861f3556, |
37 | | 0x3fa184b8e4c56af8, |
38 | | 0x3fa77394c9d958d5, |
39 | | 0x3fad6ebd1f1febfe, |
40 | | 0x3fb1bb32a600549d, |
41 | | 0x3fb4c560fe68af88, |
42 | | 0x3fb7d60496cfbb4c, |
43 | | 0x3fb960caf9abb7ca, |
44 | | 0x3fbc7b528b70f1c5, |
45 | | 0x3fbf9c95dc1d1165, |
46 | | 0x3fc097e38ce60649, |
47 | | 0x3fc22dadc2ab3497, |
48 | | 0x3fc3c6fb650cde51, |
49 | | 0x3fc494f863b8df35, |
50 | | 0x3fc633a8bf437ce1, |
51 | | 0x3fc7046031c79f85, |
52 | | 0x3fc8a8980abfbd32, |
53 | | 0x3fc97c1cb13c7ec1, |
54 | | 0x3fcb2602497d5346, |
55 | | 0x3fcbfc67a7fff4cc, |
56 | | 0x3fcdac22d3e441d3, |
57 | | 0x3fce857d3d361368, |
58 | | 0x3fd01d9bbcfa61d4, |
59 | | 0x3fd08bce0d95fa38, |
60 | | 0x3fd169c05363f158, |
61 | | 0x3fd1d982c9d52708, |
62 | | 0x3fd249cd2b13cd6c, |
63 | | 0x3fd32bfee370ee68, |
64 | | 0x3fd39de8e1559f6f, |
65 | | 0x3fd4106017c3eca3, |
66 | | 0x3fd4f6fbb2cec598, |
67 | | 0x3fd56b22e6b578e5, |
68 | | 0x3fd5dfdcf1eeae0e, |
69 | | 0x3fd6552b49986277, |
70 | | 0x3fd6cb0f6865c8ea, |
71 | | 0x3fd7b89f02cf2aad, |
72 | | 0x3fd8304d90c11fd3, |
73 | | 0x3fd8a8980abfbd32, |
74 | | 0x3fd921800924dd3b, |
75 | | 0x3fd99b072a96c6b2, |
76 | | 0x3fda8ff971810a5e, |
77 | | 0x3fdb0b67f4f46810, |
78 | | 0x3fdb877c57b1b070, |
79 | | 0x3fdc043859e2fdb3, |
80 | | 0x3fdc819dc2d45fe4, |
81 | | 0x3fdcffae611ad12b, |
82 | | 0x3fdd7e6c0abc3579, |
83 | | 0x3fddfdd89d586e2b, |
84 | | 0x3fde7df5fe538ab3, |
85 | | 0x3fdefec61b011f85, |
86 | | 0x3fdf804ae8d0cd02, |
87 | | 0x3fe0014332be0033, |
88 | | 0x3fe042bd4b9a7c99, |
89 | | 0x3fe08494c66b8ef0, |
90 | | 0x3fe0c6caaf0c5597, |
91 | | 0x3fe1096015dee4da, |
92 | | 0x3fe14c560fe68af9, |
93 | | 0x3fe18fadb6e2d3c2, |
94 | | 0x3fe1d368296b5255, |
95 | | 0x3fe217868b0c37e8, |
96 | | 0x3fe25c0a0463beb0, |
97 | | 0x3fe2a0f3c340705c, |
98 | | 0x3fe2e644fac04fd8, |
99 | | 0x3fe2e644fac04fd8, |
100 | | 0x3fe32bfee370ee68, |
101 | | 0x3fe37222bb70747c, |
102 | | 0x3fe3b8b1c68fa6ed, |
103 | | 0x3fe3ffad4e74f1d6, |
104 | | 0x3fe44716a2c08262, |
105 | | 0x3fe44716a2c08262, |
106 | | 0x3fe48eef19317991, |
107 | | 0x3fe4d7380dcc422d, |
108 | | 0x3fe51ff2e30214bc, |
109 | | 0x3fe5692101d9b4a6, |
110 | | 0x3fe5b2c3da19723b, |
111 | | 0x3fe5b2c3da19723b, |
112 | | 0x3fe5fcdce2727ddb, |
113 | | 0x3fe6476d98ad990a, |
114 | | 0x3fe6927781d932a8, |
115 | | 0x3fe6927781d932a8, |
116 | | 0x3fe6ddfc2a78fc63, |
117 | | 0x3fe729fd26b707c8, |
118 | | 0x3fe7767c12967a45, |
119 | | 0x3fe7767c12967a45, |
120 | | 0x3fe7c37a9227e7fb, |
121 | | 0x3fe810fa51bf65fd, |
122 | | 0x3fe810fa51bf65fd, |
123 | | 0x3fe85efd062c656d, |
124 | | 0x3fe8ad846cf369a4, |
125 | | 0x3fe8ad846cf369a4, |
126 | | 0x3fe8fc924c89ac84, |
127 | | 0x3fe94c287492c4db, |
128 | | 0x3fe94c287492c4db, |
129 | | 0x3fe99c48be2063c8, |
130 | | 0x3fe9ecf50bf43f13, |
131 | | 0x3fe9ecf50bf43f13, |
132 | | 0x3fea3e2f4ac43f60, |
133 | | 0x3fea8ff971810a5e, |
134 | | 0x3fea8ff971810a5e, |
135 | | 0x3feae255819f022d, |
136 | | 0x3feb35458761d479, |
137 | | 0x3feb35458761d479, |
138 | | 0x3feb88cb9a2ab521, |
139 | | 0x3feb88cb9a2ab521, |
140 | | 0x3febdce9dcc96187, |
141 | | 0x3fec31a27dd00b4a, |
142 | | 0x3fec31a27dd00b4a, |
143 | | 0x3fec86f7b7ea4a89, |
144 | | 0x3fec86f7b7ea4a89, |
145 | | 0x3fecdcebd2373995, |
146 | | 0x3fed338120a6dd9d, |
147 | | 0x3fed338120a6dd9d, |
148 | | 0x3fed8aba045b01c8, |
149 | | 0x3fed8aba045b01c8, |
150 | | 0x3fede298ec0bac0d, |
151 | | 0x3fede298ec0bac0d, |
152 | | 0x3fee3b20546f554a, |
153 | | 0x3fee3b20546f554a, |
154 | | 0x3fee9452c8a71028, |
155 | | 0x3fee9452c8a71028, |
156 | | 0x3feeee32e2aeccbf, |
157 | | 0x3feeee32e2aeccbf, |
158 | | 0x3fef48c34bd1e96f, |
159 | | 0x3fef48c34bd1e96f, |
160 | | 0x3fefa406bd2443df, |
161 | | 0x3ff0000000000000, |
162 | | ]; |
163 | | |
164 | | #[inline(always)] |
165 | 0 | fn log2f_gen<B: LogfBackend>(x: f32, backend: B) -> f32 { |
166 | 0 | let mut x_u = x.to_bits(); |
167 | | |
168 | | const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32; |
169 | | |
170 | 0 | let mut m = -(E_BIAS as i32); |
171 | 0 | if x_u < f32::MIN_POSITIVE.to_bits() || x_u > f32::MAX.to_bits() { |
172 | 0 | if x == 0.0 { |
173 | 0 | return f32::NEG_INFINITY; |
174 | 0 | } |
175 | 0 | if x_u == 0x80000000u32 { |
176 | 0 | return f32::NEG_INFINITY; |
177 | 0 | } |
178 | 0 | if x.is_sign_negative() && !x.is_nan() { |
179 | 0 | return f32::NAN + x; |
180 | 0 | } |
181 | | // x is +inf or nan |
182 | 0 | if x.is_nan() || x.is_infinite() { |
183 | 0 | return x + x; |
184 | 0 | } |
185 | | // Normalize denormal inputs. |
186 | 0 | x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits(); |
187 | 0 | m -= 23; |
188 | 0 | } |
189 | | |
190 | 0 | m = m.wrapping_add(x_u.wrapping_shr(23) as i32); |
191 | 0 | let mant = x_u & 0x007F_FFFF; |
192 | 0 | let index = mant.wrapping_shr(16); |
193 | | |
194 | 0 | x_u = set_exponent_f32(x_u, 0x7F); |
195 | | |
196 | 0 | let u = f32::from_bits(x_u); |
197 | | |
198 | 0 | let v = if B::HAS_FMA { |
199 | | use crate::logs::logf::LOG_REDUCTION_F32; |
200 | 0 | backend.fmaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64 // Exact. |
201 | | } else { |
202 | | use crate::logs::LOG_RANGE_REDUCTION; |
203 | 0 | backend.fma( |
204 | 0 | u as f64, |
205 | 0 | f64::from_bits(LOG_RANGE_REDUCTION[index as usize]), |
206 | | -1.0, |
207 | | ) // Exact |
208 | | }; |
209 | | // Degree-5 polynomial approximation of log2 generated by Sollya with: |
210 | | // > P = fpminimax(log2(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]); |
211 | | |
212 | 0 | let extra_factor = m as f64 + f64::from_bits(LOG2_R[index as usize]); |
213 | | |
214 | | const COEFFS: [u64; 5] = [ |
215 | | 0x3ff71547652b8133, |
216 | | 0xbfe71547652d1e33, |
217 | | 0x3fdec70a098473de, |
218 | | 0xbfd7154c5ccdf121, |
219 | | 0x3fd2514fd90a130a, |
220 | | ]; |
221 | 0 | let v2 = v * v; // Exact |
222 | 0 | let c0 = backend.fma(v, f64::from_bits(COEFFS[0]), extra_factor); |
223 | 0 | let c1 = backend.fma(v, f64::from_bits(COEFFS[2]), f64::from_bits(COEFFS[1])); |
224 | 0 | let c2 = backend.fma(v, f64::from_bits(COEFFS[4]), f64::from_bits(COEFFS[3])); |
225 | | |
226 | 0 | let r = backend.polyeval3(v2, c0, c1, c2); |
227 | 0 | r as f32 |
228 | 0 | } Unexecuted instantiation: pxfm::logs::log2f::log2f_gen::<pxfm::logs::logf::FmaLogfBackend> Unexecuted instantiation: pxfm::logs::log2f::log2f_gen::<pxfm::logs::logf::GenLogfBackend> |
229 | | |
230 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
231 | | #[target_feature(enable = "avx", enable = "fma")] |
232 | 0 | unsafe fn log2f_fma_impl(x: f32) -> f32 { |
233 | | use crate::logs::logf::FmaLogfBackend; |
234 | 0 | log2f_gen(x, FmaLogfBackend {}) |
235 | 0 | } |
236 | | |
237 | | /// Logarithm of base 2 |
238 | | /// |
239 | | /// Max found ULP 0.4999996 |
240 | 0 | pub fn f_log2f(x: f32) -> f32 { |
241 | | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] |
242 | | { |
243 | | log2f_gen(x, GenLogfBackend {}) |
244 | | } |
245 | | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
246 | | { |
247 | | use std::sync::OnceLock; |
248 | | static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new(); |
249 | 0 | let q = EXECUTOR.get_or_init(|| { |
250 | 0 | if std::arch::is_x86_feature_detected!("avx") |
251 | 0 | && std::arch::is_x86_feature_detected!("fma") |
252 | | { |
253 | 0 | log2f_fma_impl |
254 | | } else { |
255 | 0 | fn def_log2f(x: f32) -> f32 { |
256 | 0 | log2f_gen(x, GenLogfBackend {}) |
257 | 0 | } |
258 | 0 | def_log2f |
259 | | } |
260 | 0 | }); |
261 | 0 | unsafe { q(x) } |
262 | | } |
263 | 0 | } |
264 | | |
265 | | /// Natural logarithm using FMA |
266 | | /// |
267 | | /// Max found ULP 0.4999996 |
268 | | #[inline(always)] |
269 | | #[allow(dead_code)] |
270 | 0 | pub(crate) fn f_log2fx(x: f32) -> f64 { |
271 | 0 | let mut x_u = x.to_bits(); |
272 | | |
273 | | const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32; |
274 | | |
275 | 0 | let mut m = -(E_BIAS as i32); |
276 | 0 | if x_u == 0x3f80_0000u32 { |
277 | 0 | return 0.0; |
278 | 0 | } |
279 | | |
280 | 0 | if x_u < f32::MIN_POSITIVE.to_bits() || x_u > f32::MAX.to_bits() { |
281 | 0 | if x == 0.0 { |
282 | 0 | return f64::NEG_INFINITY; |
283 | 0 | } |
284 | 0 | if x_u == 0x80000000u32 { |
285 | 0 | return f64::NEG_INFINITY; |
286 | 0 | } |
287 | 0 | if x.is_sign_negative() && !x.is_nan() { |
288 | 0 | return f64::NAN + x as f64; |
289 | 0 | } |
290 | | // x is +inf or nan |
291 | 0 | if x.is_nan() || x.is_infinite() { |
292 | 0 | return (x + x) as f64; |
293 | 0 | } |
294 | | // Normalize denormal inputs. |
295 | 0 | x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits(); |
296 | 0 | m -= 23; |
297 | 0 | } |
298 | | |
299 | 0 | m = m.wrapping_add(x_u.wrapping_shr(23) as i32); |
300 | 0 | let mant = x_u & 0x007F_FFFF; |
301 | 0 | let index = mant.wrapping_shr(16); |
302 | | |
303 | 0 | x_u = set_exponent_f32(x_u, 0x7F); |
304 | | |
305 | | let v; |
306 | 0 | let u = f32::from_bits(x_u); |
307 | | |
308 | | #[cfg(any( |
309 | | all( |
310 | | any(target_arch = "x86", target_arch = "x86_64"), |
311 | | target_feature = "fma" |
312 | | ), |
313 | | target_arch = "aarch64" |
314 | | ))] |
315 | | { |
316 | | use crate::logs::logf::LOG_REDUCTION_F32; |
317 | | v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; // Exact. |
318 | | } |
319 | | #[cfg(not(any( |
320 | | all( |
321 | | any(target_arch = "x86", target_arch = "x86_64"), |
322 | | target_feature = "fma" |
323 | | ), |
324 | | target_arch = "aarch64" |
325 | | )))] |
326 | | { |
327 | | use crate::logs::log2::LOG_RANGE_REDUCTION; |
328 | 0 | v = f_fmla( |
329 | 0 | u as f64, |
330 | 0 | f64::from_bits(LOG_RANGE_REDUCTION[index as usize]), |
331 | 0 | -1.0, |
332 | 0 | ); // Exact |
333 | | } |
334 | | // Degree-5 polynomial approximation of log2 generated by Sollya with: |
335 | | // > P = fpminimax(log2(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]); |
336 | | |
337 | 0 | let extra_factor = m as f64 + f64::from_bits(LOG2_R[index as usize]); |
338 | | |
339 | | const COEFFS: [u64; 5] = [ |
340 | | 0x3ff71547652b8133, |
341 | | 0xbfe71547652d1e33, |
342 | | 0x3fdec70a098473de, |
343 | | 0xbfd7154c5ccdf121, |
344 | | 0x3fd2514fd90a130a, |
345 | | ]; |
346 | 0 | let v2 = v * v; // Exact |
347 | 0 | let c0 = f_fmla(v, f64::from_bits(COEFFS[0]), extra_factor); |
348 | 0 | let c1 = f_fmla(v, f64::from_bits(COEFFS[2]), f64::from_bits(COEFFS[1])); |
349 | 0 | let c2 = f_fmla(v, f64::from_bits(COEFFS[4]), f64::from_bits(COEFFS[3])); |
350 | | |
351 | 0 | f_polyeval3(v2, c0, c1, c2) |
352 | 0 | } |
353 | | |
354 | | /// Dirty log2 using FMA |
355 | | #[inline(always)] |
356 | | #[allow(dead_code)] |
357 | 0 | pub(crate) fn dirty_log2f(d: f32) -> f32 { |
358 | 0 | let mut ix = d.to_bits(); |
359 | | /* reduce x into [sqrt(2)/2, sqrt(2)] */ |
360 | 0 | ix = ix.wrapping_add(0x3f800000 - 0x3f3504f3); |
361 | 0 | let n = (ix >> 23) as i32 - 0x7f; |
362 | 0 | ix = (ix & 0x007fffff).wrapping_add(0x3f3504f3); |
363 | 0 | let a = f32::from_bits(ix); |
364 | | |
365 | 0 | let x = (a - 1.) / (a + 1.); |
366 | | |
367 | 0 | let x2 = x * x; |
368 | 0 | let mut u = 0.4121985850084821691e+0; |
369 | 0 | u = f_fmlaf(u, x2, 0.5770780163490337802e+0); |
370 | 0 | u = f_fmlaf(u, x2, 0.9617966939259845749e+0); |
371 | 0 | f_fmlaf(x2 * x, u, f_fmlaf(x, 0.2885390081777926802e+1, n as f32)) |
372 | 0 | } |
373 | | |
374 | | #[cfg(test)] |
375 | | mod tests { |
376 | | use super::*; |
377 | | |
378 | | #[test] |
379 | | fn test_log2f() { |
380 | | assert!((f_log2f(0.35f32) - 0.35f32.log2()).abs() < 1e-5); |
381 | | assert!((f_log2f(0.9f32) - 0.9f32.log2()).abs() < 1e-5); |
382 | | assert_eq!(f_log2f(0.), f32::NEG_INFINITY); |
383 | | assert_eq!(f_log2f(1.0), 0.0); |
384 | | assert!(f_log2f(-1.).is_nan()); |
385 | | assert!(f_log2f(f32::NAN).is_nan()); |
386 | | assert!(f_log2f(f32::NEG_INFINITY).is_nan()); |
387 | | assert_eq!(f_log2f(f32::INFINITY), f32::INFINITY); |
388 | | } |
389 | | |
390 | | #[test] |
391 | | fn test_dirty_log2f() { |
392 | | assert!((dirty_log2f(0.35f32) - 0.35f32.log2()).abs() < 1e-5); |
393 | | assert!((dirty_log2f(0.9f32) - 0.9f32.log2()).abs() < 1e-5); |
394 | | } |
395 | | } |