/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/logs/log2p1f.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::f_fmla; |
30 | | use crate::logs::LOG_RANGE_REDUCTION; |
31 | | use crate::polyeval::{f_estrin_polyeval7, f_polyeval6}; |
32 | | |
33 | | // Generated in SageMath: |
34 | | // print("[") |
35 | | // for i in range(128): |
36 | | // R = RealField(200) |
37 | | // r = R(2)**(-8) * ( R(2)**8 * (R(1) - R(2)**(-8)) / (R(1) + R(i)*R(2)**-7) ).ceil() |
38 | | // |
39 | | // if i == 0 or i == 127: |
40 | | // print(double_to_hex(0), ",") |
41 | | // else: |
42 | | // print(double_to_hex(-r.log2()), ",") |
43 | | // print("];") |
44 | | static LOG2_D: [u64; 128] = [ |
45 | | 0x0000000000000000, |
46 | | 0x3f872c7ba20f7327, |
47 | | 0x3f9743ee861f3556, |
48 | | 0x3fa184b8e4c56af8, |
49 | | 0x3fa77394c9d958d5, |
50 | | 0x3fad6ebd1f1febfe, |
51 | | 0x3fb1bb32a600549d, |
52 | | 0x3fb4c560fe68af88, |
53 | | 0x3fb7d60496cfbb4c, |
54 | | 0x3fb960caf9abb7ca, |
55 | | 0x3fbc7b528b70f1c5, |
56 | | 0x3fbf9c95dc1d1165, |
57 | | 0x3fc097e38ce60649, |
58 | | 0x3fc22dadc2ab3497, |
59 | | 0x3fc3c6fb650cde51, |
60 | | 0x3fc494f863b8df35, |
61 | | 0x3fc633a8bf437ce1, |
62 | | 0x3fc7046031c79f85, |
63 | | 0x3fc8a8980abfbd32, |
64 | | 0x3fc97c1cb13c7ec1, |
65 | | 0x3fcb2602497d5346, |
66 | | 0x3fcbfc67a7fff4cc, |
67 | | 0x3fcdac22d3e441d3, |
68 | | 0x3fce857d3d361368, |
69 | | 0x3fd01d9bbcfa61d4, |
70 | | 0x3fd08bce0d95fa38, |
71 | | 0x3fd169c05363f158, |
72 | | 0x3fd1d982c9d52708, |
73 | | 0x3fd249cd2b13cd6c, |
74 | | 0x3fd32bfee370ee68, |
75 | | 0x3fd39de8e1559f6f, |
76 | | 0x3fd4106017c3eca3, |
77 | | 0x3fd4f6fbb2cec598, |
78 | | 0x3fd56b22e6b578e5, |
79 | | 0x3fd5dfdcf1eeae0e, |
80 | | 0x3fd6552b49986277, |
81 | | 0x3fd6cb0f6865c8ea, |
82 | | 0x3fd7b89f02cf2aad, |
83 | | 0x3fd8304d90c11fd3, |
84 | | 0x3fd8a8980abfbd32, |
85 | | 0x3fd921800924dd3b, |
86 | | 0x3fd99b072a96c6b2, |
87 | | 0x3fda8ff971810a5e, |
88 | | 0x3fdb0b67f4f46810, |
89 | | 0x3fdb877c57b1b070, |
90 | | 0x3fdc043859e2fdb3, |
91 | | 0x3fdc819dc2d45fe4, |
92 | | 0x3fdcffae611ad12b, |
93 | | 0x3fdd7e6c0abc3579, |
94 | | 0x3fddfdd89d586e2b, |
95 | | 0x3fde7df5fe538ab3, |
96 | | 0x3fdefec61b011f85, |
97 | | 0x3fdf804ae8d0cd02, |
98 | | 0x3fe0014332be0033, |
99 | | 0x3fe042bd4b9a7c99, |
100 | | 0x3fe08494c66b8ef0, |
101 | | 0x3fe0c6caaf0c5597, |
102 | | 0x3fe1096015dee4da, |
103 | | 0x3fe14c560fe68af9, |
104 | | 0x3fe18fadb6e2d3c2, |
105 | | 0x3fe1d368296b5255, |
106 | | 0x3fe217868b0c37e8, |
107 | | 0x3fe25c0a0463beb0, |
108 | | 0x3fe2a0f3c340705c, |
109 | | 0x3fe2e644fac04fd8, |
110 | | 0x3fe2e644fac04fd8, |
111 | | 0x3fe32bfee370ee68, |
112 | | 0x3fe37222bb70747c, |
113 | | 0x3fe3b8b1c68fa6ed, |
114 | | 0x3fe3ffad4e74f1d6, |
115 | | 0x3fe44716a2c08262, |
116 | | 0x3fe44716a2c08262, |
117 | | 0x3fe48eef19317991, |
118 | | 0x3fe4d7380dcc422d, |
119 | | 0x3fe51ff2e30214bc, |
120 | | 0x3fe5692101d9b4a6, |
121 | | 0x3fe5b2c3da19723b, |
122 | | 0x3fe5b2c3da19723b, |
123 | | 0x3fe5fcdce2727ddb, |
124 | | 0x3fe6476d98ad990a, |
125 | | 0x3fe6927781d932a8, |
126 | | 0x3fe6927781d932a8, |
127 | | 0x3fe6ddfc2a78fc63, |
128 | | 0x3fe729fd26b707c8, |
129 | | 0x3fe7767c12967a45, |
130 | | 0x3fe7767c12967a45, |
131 | | 0x3fe7c37a9227e7fb, |
132 | | 0x3fe810fa51bf65fd, |
133 | | 0x3fe810fa51bf65fd, |
134 | | 0x3fe85efd062c656d, |
135 | | 0x3fe8ad846cf369a4, |
136 | | 0x3fe8ad846cf369a4, |
137 | | 0x3fe8fc924c89ac84, |
138 | | 0x3fe94c287492c4db, |
139 | | 0x3fe94c287492c4db, |
140 | | 0x3fe99c48be2063c8, |
141 | | 0x3fe9ecf50bf43f13, |
142 | | 0x3fe9ecf50bf43f13, |
143 | | 0x3fea3e2f4ac43f60, |
144 | | 0x3fea8ff971810a5e, |
145 | | 0x3fea8ff971810a5e, |
146 | | 0x3feae255819f022d, |
147 | | 0x3feb35458761d479, |
148 | | 0x3feb35458761d479, |
149 | | 0x3feb88cb9a2ab521, |
150 | | 0x3feb88cb9a2ab521, |
151 | | 0x3febdce9dcc96187, |
152 | | 0x3fec31a27dd00b4a, |
153 | | 0x3fec31a27dd00b4a, |
154 | | 0x3fec86f7b7ea4a89, |
155 | | 0x3fec86f7b7ea4a89, |
156 | | 0x3fecdcebd2373995, |
157 | | 0x3fed338120a6dd9d, |
158 | | 0x3fed338120a6dd9d, |
159 | | 0x3fed8aba045b01c8, |
160 | | 0x3fed8aba045b01c8, |
161 | | 0x3fede298ec0bac0d, |
162 | | 0x3fede298ec0bac0d, |
163 | | 0x3fee3b20546f554a, |
164 | | 0x3fee3b20546f554a, |
165 | | 0x3fee9452c8a71028, |
166 | | 0x3fee9452c8a71028, |
167 | | 0x3feeee32e2aeccbf, |
168 | | 0x3feeee32e2aeccbf, |
169 | | 0x3fef48c34bd1e96f, |
170 | | 0x3fef48c34bd1e96f, |
171 | | 0x3fefa406bd2443df, |
172 | | 0x0000000000000000, |
173 | | ]; |
174 | | |
175 | | #[inline] |
176 | 0 | pub(crate) fn core_log2f(x: f64) -> f64 { |
177 | 0 | let x_u = x.to_bits(); |
178 | | |
179 | | const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64; |
180 | | |
181 | 0 | let mut x_e: i32 = -(E_BIAS as i32); |
182 | | |
183 | | // log2(x) = log2(2^x_e * x_m) |
184 | | // = x_e + log2(x_m) |
185 | | // Range reduction for log2(x_m): |
186 | | // For each x_m, we would like to find r such that: |
187 | | // -2^-8 <= r * x_m - 1 < 2^-7 |
188 | 0 | let shifted = (x_u >> 45) as i32; |
189 | 0 | let index = shifted & 0x7F; |
190 | 0 | let r = f64::from_bits(LOG_RANGE_REDUCTION[index as usize]); |
191 | | |
192 | | // Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are |
193 | | // all 1's. |
194 | 0 | x_e = x_e.wrapping_add(x_u.wrapping_add(1u64 << 45).wrapping_shr(52) as i32); |
195 | 0 | let e_x = x_e as f64; |
196 | | |
197 | 0 | let log_r_dd = LOG2_D[index as usize]; |
198 | | |
199 | | // hi is exact |
200 | 0 | let hi = e_x + f64::from_bits(log_r_dd); |
201 | | |
202 | | // Set m = 1.mantissa. |
203 | 0 | let x_m = (x_u & 0x000F_FFFF_FFFF_FFFFu64) | 0x3FF0_0000_0000_0000u64; |
204 | 0 | let m = f64::from_bits(x_m); |
205 | | |
206 | | let u; |
207 | | #[cfg(any( |
208 | | all( |
209 | | any(target_arch = "x86", target_arch = "x86_64"), |
210 | | target_feature = "fma" |
211 | | ), |
212 | | target_arch = "aarch64" |
213 | | ))] |
214 | | { |
215 | | u = f_fmla(r, m, -1.0); // exact |
216 | | } |
217 | | #[cfg(not(any( |
218 | | all( |
219 | | any(target_arch = "x86", target_arch = "x86_64"), |
220 | | target_feature = "fma" |
221 | | ), |
222 | | target_arch = "aarch64" |
223 | | )))] |
224 | | { |
225 | | use crate::logs::LOG_CD; |
226 | 0 | let c_m = x_m & 0x3FFF_E000_0000_0000u64; |
227 | 0 | let c = f64::from_bits(c_m); |
228 | 0 | u = f_fmla(r, m - c, f64::from_bits(LOG_CD[index as usize])); // exact |
229 | | } |
230 | | |
231 | | // Polynomial for log(1+x)/x generated in Sollya: |
232 | | // d = [-2^-8, 2^-7]; |
233 | | // f_log2p1 = log2(1 + x)/x; |
234 | | // Q = fpminimax(f_log2p1, 6, [|D...|], d); |
235 | | // See ./notes/log10pf_core.sollya |
236 | 0 | let p = f_polyeval6( |
237 | 0 | u, |
238 | 0 | f64::from_bits(0x3ff71547652b82fe), |
239 | 0 | f64::from_bits(0xbfe71547652b82e7), |
240 | 0 | f64::from_bits(0x3fdec709dc3b1fd5), |
241 | 0 | f64::from_bits(0xbfd7154766124214), |
242 | 0 | f64::from_bits(0x3fd2776bd902599e), |
243 | 0 | f64::from_bits(0xbfcec586c6f55d08), |
244 | | ); |
245 | 0 | f_fmla(p, u, hi) |
246 | 0 | } |
247 | | |
248 | | /// Computes log2(x+1) |
249 | | /// |
250 | | /// Max ULP 0.5 |
251 | | #[inline] |
252 | 0 | pub fn f_log2p1f(x: f32) -> f32 { |
253 | 0 | let z = x as f64; |
254 | 0 | let ux = x.to_bits().wrapping_shl(1); |
255 | 0 | if ux >= 0xffu32 << 24 || ux == 0 { |
256 | | // |x| == 0, |x| == inf, x == NaN |
257 | 0 | if ux == 0 { |
258 | 0 | return x; |
259 | 0 | } |
260 | 0 | if x.is_infinite() { |
261 | 0 | return if x.is_sign_positive() { |
262 | 0 | f32::INFINITY |
263 | | } else { |
264 | 0 | f32::NAN |
265 | | }; |
266 | 0 | } |
267 | 0 | return x + f32::NAN; |
268 | 0 | } |
269 | | |
270 | 0 | let ax = x.to_bits() & 0x7fff_ffffu32; |
271 | | |
272 | | // Use log2p1(x) = log10(1 + x) for |x| > 2^-6; |
273 | 0 | if ax > 0x3c80_0000u32 { |
274 | 0 | if x == -1. { |
275 | 0 | return f32::NEG_INFINITY; |
276 | 0 | } |
277 | 0 | let x1p = z + 1.; |
278 | 0 | if x1p <= 0. { |
279 | 0 | if x1p == 0. { |
280 | 0 | return f32::NEG_INFINITY; |
281 | 0 | } |
282 | 0 | return f32::NAN; |
283 | 0 | } |
284 | 0 | return core_log2f(x1p) as f32; |
285 | 0 | } |
286 | | |
287 | | // log2p1 is expected to be used near zero: |
288 | | // Polynomial generated by Sollya: |
289 | | // d = [-2^-6; 2^-6]; |
290 | | // f_log2pf = log2(1+x)/x; |
291 | | // Q = fpminimax(f_log2pf, 6, [|D...|], d); |
292 | | const C: [u64; 7] = [ |
293 | | 0x3ff71547652b82fe, |
294 | | 0xbfe71547652b8d18, |
295 | | 0x3fdec709dc3a501c, |
296 | | 0xbfd715475b117c95, |
297 | | 0x3fd2776c3fd833bd, |
298 | | 0xbfcec9905627faa6, |
299 | | 0x3fca64536a0ad148, |
300 | | ]; |
301 | 0 | let p = f_estrin_polyeval7( |
302 | 0 | z, |
303 | 0 | f64::from_bits(C[0]), |
304 | 0 | f64::from_bits(C[1]), |
305 | 0 | f64::from_bits(C[2]), |
306 | 0 | f64::from_bits(C[3]), |
307 | 0 | f64::from_bits(C[4]), |
308 | 0 | f64::from_bits(C[5]), |
309 | 0 | f64::from_bits(C[6]), |
310 | | ); |
311 | 0 | (p * z) as f32 |
312 | 0 | } |
313 | | |
314 | | #[cfg(test)] |
315 | | mod tests { |
316 | | use super::*; |
317 | | |
318 | | #[test] |
319 | | fn test_log2p1f() { |
320 | | assert_eq!(f_log2p1f(0.0), 0.0); |
321 | | assert_eq!(f_log2p1f(1.0), 1.0); |
322 | | assert_eq!(f_log2p1f(-0.0432432), -0.063775845); |
323 | | assert_eq!(f_log2p1f(-0.009874634), -0.01431689); |
324 | | assert_eq!(f_log2p1f(1.2443), 1.1662655); |
325 | | assert_eq!(f_log2p1f(f32::INFINITY), f32::INFINITY); |
326 | | assert!(f_log2p1f(f32::NEG_INFINITY).is_nan()); |
327 | | assert!(f_log2p1f(-1.0432432).is_nan()); |
328 | | } |
329 | | } |