/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/logs/log1pmx.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::bits::{EXP_MASK, get_exponent_f64}; |
30 | | use crate::common::f_fmla; |
31 | | use crate::double_double::DoubleDouble; |
32 | | use crate::logs::log1p::{LOG_R1_DD, R1}; |
33 | | use crate::logs::log1p_dd; |
34 | | use crate::polyeval::{f_estrin_polyeval8, f_polyeval4, f_polyeval5}; |
35 | | |
36 | | #[cold] |
37 | 0 | fn tiny_hard(x: f64) -> f64 { |
38 | | // d = [-2^-10; 2^-10]; |
39 | | // f_log1pf = (log(1+x) - x)/x^2; |
40 | | // Q = fpminimax(f_log1pf, 7, [|0, 107...|], d); |
41 | | // See ./notes/log1pmx_at_zero_hard.sollya |
42 | | |
43 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
44 | | |
45 | | const C: [(u64, u64); 7] = [ |
46 | | (0x3c755555555129de, 0x3fd5555555555555), |
47 | | (0xbbd333352fe28400, 0xbfd0000000000000), |
48 | | (0xbc698cb3ef1ea2dd, 0x3fc999999999999a), |
49 | | (0x3c4269700b3f95d0, 0xbfc55555555546ef), |
50 | | (0x3c61290e9261823e, 0x3fc2492492491565), |
51 | | (0x3c6fb0a243c2a59c, 0xbfc000018af8cb7e), |
52 | | (0x3c3b271ceb5c60a0, 0x3fbc71ca10b30518), |
53 | | ]; |
54 | | |
55 | 0 | let mut r = DoubleDouble::mul_f64_add( |
56 | 0 | DoubleDouble::from_bit_pair(C[6]), |
57 | 0 | x, |
58 | 0 | DoubleDouble::from_bit_pair(C[5]), |
59 | | ); |
60 | 0 | r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[4])); |
61 | 0 | r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[3])); |
62 | 0 | r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[2])); |
63 | 0 | r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[1])); |
64 | 0 | r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[0])); |
65 | 0 | r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000)); |
66 | 0 | r = DoubleDouble::quick_mult(r, x2); |
67 | 0 | r.to_f64() |
68 | 0 | } |
69 | | |
70 | 0 | fn log1pmx_big(x: f64) -> f64 { |
71 | 0 | let mut x_u = x.to_bits(); |
72 | | |
73 | 0 | let mut x_dd = DoubleDouble::default(); |
74 | | |
75 | 0 | let x_exp: u16 = ((x_u >> 52) & 0x7ff) as u16; |
76 | | |
77 | | const EXP_BIAS: u16 = (1u16 << (11 - 1u16)) - 1u16; |
78 | | |
79 | 0 | if x_exp >= EXP_BIAS { |
80 | | // |x| >= 1 |
81 | 0 | if x_u >= 0x4650_0000_0000_0000u64 { |
82 | 0 | x_dd.hi = x; |
83 | 0 | } else { |
84 | 0 | x_dd = DoubleDouble::from_exact_add(x, 1.0); |
85 | 0 | } |
86 | 0 | } else { |
87 | 0 | // |x| < 1 |
88 | 0 | x_dd = DoubleDouble::from_exact_add(1.0, x); |
89 | 0 | } |
90 | | |
91 | | // At this point, x_dd is the exact sum of 1 + x: |
92 | | // x_dd.hi + x_dd.lo = x + 1.0 exactly. |
93 | | // |x_dd.hi| >= 2^-54 |
94 | | // |x_dd.lo| < ulp(x_dd.hi) |
95 | | |
96 | 0 | let xhi_bits = x_dd.hi.to_bits(); |
97 | 0 | let xhi_frac = xhi_bits & ((1u64 << 52) - 1); |
98 | 0 | x_u = xhi_bits; |
99 | | // Range reduction: |
100 | | // Find k such that |x_hi - k * 2^-7| <= 2^-8. |
101 | 0 | let idx: i32 = ((xhi_frac.wrapping_add(1u64 << (52 - 8))) >> (52 - 7)) as i32; |
102 | 0 | let x_e = (get_exponent_f64(f64::from_bits(xhi_bits)) as i32).wrapping_add(idx >> 7); |
103 | 0 | let e_x = x_e as f64; |
104 | | |
105 | | const LOG_2: DoubleDouble = DoubleDouble::new( |
106 | | f64::from_bits(0x3d2ef35793c76730), |
107 | | f64::from_bits(0x3fe62e42fefa3800), |
108 | | ); |
109 | | |
110 | | // hi is exact |
111 | | // ulp(hi) = ulp(LOG_2_HI) = ulp(LOG_R1_DD[idx].hi) = 2^-43 |
112 | | |
113 | 0 | let r_dd = LOG_R1_DD[idx as usize]; |
114 | | |
115 | 0 | let hi = f_fmla(e_x, LOG_2.hi, f64::from_bits(r_dd.1)); |
116 | | // lo errors < |e_x| * ulp(LOG_2_LO) + ulp(LOG_R1[idx].lo) |
117 | | // <= 2^11 * 2^(-43-53) = 2^-85 |
118 | 0 | let lo = f_fmla(e_x, LOG_2.lo, f64::from_bits(r_dd.0)); |
119 | | |
120 | | // Scale x_dd by 2^(-xh_bits.get_exponent()). |
121 | 0 | let s_u: i64 = (x_u & EXP_MASK) as i64 - (EXP_BIAS as i64).wrapping_shl(52); |
122 | | // Normalize arguments: |
123 | | // 1 <= m_dd.hi < 2 |
124 | | // |m_dd.lo| < 2^-52. |
125 | | // This is exact. |
126 | 0 | let m_hi = 1f64.to_bits() | xhi_frac; |
127 | | |
128 | 0 | let m_lo = if x_dd.lo.abs() > x_dd.hi * f64::from_bits(0x3800000000000000) { |
129 | 0 | (x_dd.lo.to_bits() as i64).wrapping_sub(s_u) |
130 | | } else { |
131 | 0 | 0 |
132 | | }; |
133 | | |
134 | 0 | let m_dd = DoubleDouble::new(f64::from_bits(m_lo as u64), f64::from_bits(m_hi)); |
135 | | |
136 | | // Perform range reduction: |
137 | | // r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1 |
138 | | // = (r * m_dd.hi - 1) + r * m_dd.lo |
139 | | // = v_hi + (v_lo.hi + v_lo.lo) |
140 | | // where: |
141 | | // v_hi = r * m_dd.hi - 1 (exact) |
142 | | // v_lo.hi + v_lo.lo = r * m_dd.lo (exact) |
143 | | // Bounds on the values: |
144 | | // -0x1.69000000000edp-8 < r * m - 1 < 0x1.7f00000000081p-8 |
145 | | // |v_lo.hi| <= |r| * |m_dd.lo| < 2^-52 |
146 | | // |v_lo.lo| < ulp(v_lo.hi) <= 2^(-52 - 53) = 2^(-105) |
147 | 0 | let r = R1[idx as usize]; |
148 | | let v_hi; |
149 | 0 | let v_lo = DoubleDouble::from_exact_mult(m_dd.lo, f64::from_bits(r)); |
150 | | |
151 | | #[cfg(any( |
152 | | all( |
153 | | any(target_arch = "x86", target_arch = "x86_64"), |
154 | | target_feature = "fma" |
155 | | ), |
156 | | target_arch = "aarch64" |
157 | | ))] |
158 | | { |
159 | | v_hi = f_fmla(f64::from_bits(r), m_dd.hi, -1.0); // Exact. |
160 | | } |
161 | | |
162 | | #[cfg(not(any( |
163 | | all( |
164 | | any(target_arch = "x86", target_arch = "x86_64"), |
165 | | target_feature = "fma" |
166 | | ), |
167 | | target_arch = "aarch64" |
168 | | )))] |
169 | | { |
170 | | use crate::logs::log1p::RCM1; |
171 | 0 | let c = f64::from_bits( |
172 | 0 | (idx as u64) |
173 | 0 | .wrapping_shl(52 - 7) |
174 | 0 | .wrapping_add(0x3FF0_0000_0000_0000u64), |
175 | | ); |
176 | 0 | v_hi = f_fmla( |
177 | 0 | f64::from_bits(r), |
178 | 0 | m_dd.hi - c, |
179 | 0 | f64::from_bits(RCM1[idx as usize]), |
180 | 0 | ); // Exact |
181 | | } |
182 | | |
183 | | // Range reduction output: |
184 | | // -0x1.69000000000edp-8 < v_hi + v_lo < 0x1.7f00000000081p-8 |
185 | | // |v_dd.lo| < ulp(v_dd.hi) <= 2^(-7 - 53) = 2^-60 |
186 | 0 | let mut v_dd = DoubleDouble::from_exact_add(v_hi, v_lo.hi); |
187 | 0 | v_dd.lo += v_lo.lo; |
188 | | |
189 | | // Exact sum: |
190 | | // r1.hi + r1.lo = e_x * log(2)_hi - log(r)_hi + u |
191 | 0 | let mut r1 = DoubleDouble::from_exact_add(hi, v_dd.hi); |
192 | | |
193 | | // Degree-7 minimax polynomial log(1 + v) ~ v - v^2 / 2 + ... |
194 | | // generated by Sollya with: |
195 | | // > P = fpminimax(log(1 + x)/x, 6, [|1, 1, D...|], |
196 | | // [-0x1.69000000000edp-8, 0x1.7f00000000081p-8]); |
197 | | const P_COEFFS: [u64; 6] = [ |
198 | | 0xbfe0000000000000, |
199 | | 0x3fd5555555555166, |
200 | | 0xbfcfffffffdb7746, |
201 | | 0x3fc99999a8718a60, |
202 | | 0xbfc555874ce8ce22, |
203 | | 0x3fc24335555ddbe5, |
204 | | ]; |
205 | | |
206 | | // C * ulp(v_sq) + err_hi |
207 | 0 | let v_sq = v_dd.hi * v_dd.hi; |
208 | 0 | let p0 = f_fmla( |
209 | 0 | v_dd.hi, |
210 | 0 | f64::from_bits(P_COEFFS[1]), |
211 | 0 | f64::from_bits(P_COEFFS[0]), |
212 | | ); |
213 | 0 | let p1 = f_fmla( |
214 | 0 | v_dd.hi, |
215 | 0 | f64::from_bits(P_COEFFS[3]), |
216 | 0 | f64::from_bits(P_COEFFS[2]), |
217 | | ); |
218 | 0 | let p2 = f_fmla( |
219 | 0 | v_dd.hi, |
220 | 0 | f64::from_bits(P_COEFFS[5]), |
221 | 0 | f64::from_bits(P_COEFFS[4]), |
222 | | ); |
223 | 0 | let p = f_polyeval4(v_sq, (v_dd.lo + r1.lo) + lo, p0, p1, p2); |
224 | | |
225 | | const ERR_HI: [f64; 2] = [f64::from_bits(0x3aa0000000000000), 0.0]; |
226 | 0 | let err_hi = ERR_HI[if hi == 0.0 { 1 } else { 0 }]; |
227 | | |
228 | 0 | let err = f_fmla(v_sq, f64::from_bits(0x3ce0000000000000), err_hi); |
229 | | |
230 | 0 | r1.lo = p; |
231 | 0 | r1 = DoubleDouble::from_exact_add(r1.hi, r1.lo); |
232 | 0 | r1 = DoubleDouble::full_add_f64(r1, -x); |
233 | | |
234 | 0 | let left = r1.hi + (r1.lo - err); |
235 | 0 | let right = r1.hi + (r1.lo + err); |
236 | | // Ziv's test to see if fast pass is accurate enough. |
237 | 0 | if left == right { |
238 | 0 | return left; |
239 | 0 | } |
240 | 0 | log1pmx_accurate_dd(x) |
241 | 0 | } |
242 | | |
243 | | #[cold] |
244 | 0 | fn log1pmx_accurate_dd(x: f64) -> f64 { |
245 | 0 | let r = log1p_dd(x); |
246 | 0 | DoubleDouble::full_add_f64(r, -x).to_f64() |
247 | 0 | } |
248 | | |
249 | | /// Computes log(1+x) - x |
250 | | /// |
251 | | /// ulp 0.5 |
252 | 0 | pub fn f_log1pmx(x: f64) -> f64 { |
253 | 0 | let ax = x.to_bits() & 0x7fff_ffff_ffff_ffffu64; |
254 | | |
255 | 0 | let x_e = (x.to_bits() >> 52) & 0x7ff; |
256 | | |
257 | 0 | if !x.is_normal() { |
258 | 0 | if x.is_nan() { |
259 | 0 | return x + x; |
260 | 0 | } |
261 | 0 | if x.is_infinite() { |
262 | 0 | return f64::NAN; |
263 | 0 | } |
264 | 0 | if x == 0. { |
265 | 0 | return x; |
266 | 0 | } |
267 | 0 | } |
268 | | |
269 | 0 | if ax > 0x3f90000000000000u64 { |
270 | | // |x| > 2^-6 |
271 | 0 | if x <= -1. { |
272 | 0 | if x == -1. { |
273 | 0 | return f64::NEG_INFINITY; |
274 | 0 | } |
275 | 0 | return f64::NAN; |
276 | 0 | } |
277 | 0 | return log1pmx_big(x); |
278 | 0 | } |
279 | | |
280 | | const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64; |
281 | | |
282 | | // log(1+x) is expected to be used near zero |
283 | | |
284 | 0 | if x_e < E_BIAS - 10 { |
285 | 0 | if x_e < E_BIAS - 100 { |
286 | | // |x| < 2^-100 |
287 | | // taylor series log(1+x) - x ~ -x^2/2 + x^3/3 |
288 | 0 | let x2 = x * x; |
289 | 0 | let dx2 = f_fmla(x, x, -x2); |
290 | 0 | let rl = dx2 * -0.5; |
291 | 0 | return f_fmla(x2, -0.5, rl); |
292 | 0 | } |
293 | | |
294 | | // Polynomial generated by Sollya in form log(1+x) - x = x^2 * R(x): |
295 | | // d = [-2^-10; 2^-10]; |
296 | | // f_log1pf = (log(1+x) - x)/x^2; |
297 | | // Q = fpminimax(f_log1pf, 7, [|0, 107, 107, D...|], d); |
298 | | // See ./notes/log1pmx_at_zero.sollya |
299 | | |
300 | 0 | let p = f_polyeval5( |
301 | 0 | x, |
302 | 0 | f64::from_bits(0x3fc999999999999a), |
303 | 0 | f64::from_bits(0xbfc55555555546ef), |
304 | 0 | f64::from_bits(0x3fc24924923d3abf), |
305 | 0 | f64::from_bits(0xbfc000018af7f637), |
306 | 0 | f64::from_bits(0x3fbc72984db24b6a), |
307 | | ); |
308 | | |
309 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
310 | | |
311 | 0 | let mut r = DoubleDouble::f64_mul_f64_add( |
312 | 0 | x, |
313 | 0 | p, |
314 | 0 | DoubleDouble::from_bit_pair((0xbbd3330899095800, 0xbfd0000000000000)), |
315 | | ); |
316 | 0 | r = DoubleDouble::mul_f64_add( |
317 | 0 | r, |
318 | 0 | x, |
319 | 0 | DoubleDouble::from_bit_pair((0x3c75555538509407, 0x3fd5555555555555)), |
320 | 0 | ); |
321 | 0 | r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000)); |
322 | 0 | r = DoubleDouble::quick_mult(r, x2); |
323 | | const ERR: f64 = f64::from_bits(0x3af0000000000000); // 2^-80 |
324 | 0 | let ub = r.hi + (r.lo + ERR); |
325 | 0 | let lb = r.hi + (r.lo - ERR); |
326 | 0 | if lb == ub { |
327 | 0 | return r.to_f64(); |
328 | 0 | } |
329 | 0 | return tiny_hard(x); |
330 | 0 | } |
331 | | |
332 | | // Polynomial generated by Sollya in form log(1+x) - x = x^2 * R(x): |
333 | | // d = [-2^-6; 2^-6]; |
334 | | // f_log1pf = (log(1+x) - x)/x^2; |
335 | | // Q = fpminimax(f_log1pf, 10, [|0, 107, 107, D...|], d); |
336 | | // See ./notes/log1pmx.sollya |
337 | | |
338 | 0 | let p = f_estrin_polyeval8( |
339 | 0 | x, |
340 | 0 | f64::from_bits(0x3fc9999999999997), |
341 | 0 | f64::from_bits(0xbfc5555555555552), |
342 | 0 | f64::from_bits(0x3fc249249249fb64), |
343 | 0 | f64::from_bits(0xbfc000000000f450), |
344 | 0 | f64::from_bits(0x3fbc71c6e2591149), |
345 | 0 | f64::from_bits(0xbfb999995cf14d86), |
346 | 0 | f64::from_bits(0x3fb7494eb6c2c544), |
347 | 0 | f64::from_bits(0xbfb558bf05690e85), |
348 | | ); |
349 | | |
350 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
351 | | |
352 | 0 | let mut r = DoubleDouble::f64_mul_f64_add( |
353 | 0 | x, |
354 | 0 | p, |
355 | 0 | DoubleDouble::from_bit_pair((0xbb9d89dc15a38000, 0xbfd0000000000000)), |
356 | | ); |
357 | 0 | r = DoubleDouble::mul_f64_add( |
358 | 0 | r, |
359 | 0 | x, |
360 | 0 | DoubleDouble::from_bit_pair((0x3c7555a15a94e505, 0x3fd5555555555555)), |
361 | 0 | ); |
362 | 0 | r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000)); |
363 | 0 | r = DoubleDouble::quick_mult(r, x2); |
364 | | |
365 | 0 | r.to_f64() |
366 | 0 | } |
367 | | |
368 | | #[cfg(test)] |
369 | | mod tests { |
370 | | use super::*; |
371 | | |
372 | | #[test] |
373 | | fn test_log1pmx() { |
374 | | assert_eq!( |
375 | | f_log1pmx(0.00000000000000005076835735015165), |
376 | | -0.0000000000000000000000000000000012887130540163486 |
377 | | ); |
378 | | assert_eq!(f_log1pmx(5.), -3.208240530771945); |
379 | | assert_eq!(f_log1pmx(-0.99), -3.6151701859880907); |
380 | | assert_eq!(f_log1pmx(-1.), f64::NEG_INFINITY); |
381 | | assert_eq!( |
382 | | f_log1pmx(1.0000000000008708e-13), |
383 | | -0.0000000000000000000000000050000000000083744 |
384 | | ); |
385 | | assert_eq!( |
386 | | f_log1pmx(1.0000000000008708e-26), |
387 | | -0.00000000000000000000000000000000000000000000000000005000000000008707 |
388 | | ); |
389 | | assert_eq!(f_log1pmx(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012890176556069385), |
390 | | -0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000830783258233204); |
391 | | } |
392 | | } |