/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/logs/log1p.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::bits::{EXP_MASK, get_exponent_f64}; |
30 | | use crate::common::{dyad_fmla, f_fmla}; |
31 | | use crate::double_double::DoubleDouble; |
32 | | use crate::dyadic_float::DyadicFloat128; |
33 | | use crate::logs::log1p_dd::log1p_dd; |
34 | | use crate::logs::log1p_dyadic::log1p_accurate; |
35 | | use crate::polyeval::f_polyeval4; |
36 | | |
37 | | // R1[i] = 2^-8 * nearestint( 2^8 / (1 + i * 2^-7) ) |
38 | | pub(crate) static R1: [u64; 129] = [ |
39 | | 0x3ff0000000000000, |
40 | | 0x3fefc00000000000, |
41 | | 0x3fef800000000000, |
42 | | 0x3fef400000000000, |
43 | | 0x3fef000000000000, |
44 | | 0x3feec00000000000, |
45 | | 0x3feea00000000000, |
46 | | 0x3fee600000000000, |
47 | | 0x3fee200000000000, |
48 | | 0x3fede00000000000, |
49 | | 0x3feda00000000000, |
50 | | 0x3fed800000000000, |
51 | | 0x3fed400000000000, |
52 | | 0x3fed000000000000, |
53 | | 0x3fece00000000000, |
54 | | 0x3feca00000000000, |
55 | | 0x3fec800000000000, |
56 | | 0x3fec400000000000, |
57 | | 0x3fec000000000000, |
58 | | 0x3febe00000000000, |
59 | | 0x3feba00000000000, |
60 | | 0x3feb800000000000, |
61 | | 0x3feb400000000000, |
62 | | 0x3feb200000000000, |
63 | | 0x3feb000000000000, |
64 | | 0x3feac00000000000, |
65 | | 0x3feaa00000000000, |
66 | | 0x3fea600000000000, |
67 | | 0x3fea400000000000, |
68 | | 0x3fea200000000000, |
69 | | 0x3fe9e00000000000, |
70 | | 0x3fe9c00000000000, |
71 | | 0x3fe9a00000000000, |
72 | | 0x3fe9800000000000, |
73 | | 0x3fe9400000000000, |
74 | | 0x3fe9200000000000, |
75 | | 0x3fe9000000000000, |
76 | | 0x3fe8e00000000000, |
77 | | 0x3fe8a00000000000, |
78 | | 0x3fe8800000000000, |
79 | | 0x3fe8600000000000, |
80 | | 0x3fe8400000000000, |
81 | | 0x3fe8200000000000, |
82 | | 0x3fe8000000000000, |
83 | | 0x3fe7e00000000000, |
84 | | 0x3fe7a00000000000, |
85 | | 0x3fe7800000000000, |
86 | | 0x3fe7600000000000, |
87 | | 0x3fe7400000000000, |
88 | | 0x3fe7200000000000, |
89 | | 0x3fe7000000000000, |
90 | | 0x3fe6e00000000000, |
91 | | 0x3fe6c00000000000, |
92 | | 0x3fe6a00000000000, |
93 | | 0x3fe6800000000000, |
94 | | 0x3fe6600000000000, |
95 | | 0x3fe6400000000000, |
96 | | 0x3fe6200000000000, |
97 | | 0x3fe6000000000000, |
98 | | 0x3fe5e00000000000, |
99 | | 0x3fe5c00000000000, |
100 | | 0x3fe5a00000000000, |
101 | | 0x3fe5800000000000, |
102 | | 0x3fe5800000000000, |
103 | | 0x3fe5600000000000, |
104 | | 0x3fe5400000000000, |
105 | | 0x3fe5200000000000, |
106 | | 0x3fe5000000000000, |
107 | | 0x3fe4e00000000000, |
108 | | 0x3fe4c00000000000, |
109 | | 0x3fe4a00000000000, |
110 | | 0x3fe4a00000000000, |
111 | | 0x3fe4800000000000, |
112 | | 0x3fe4600000000000, |
113 | | 0x3fe4400000000000, |
114 | | 0x3fe4200000000000, |
115 | | 0x3fe4200000000000, |
116 | | 0x3fe4000000000000, |
117 | | 0x3fe3e00000000000, |
118 | | 0x3fe3c00000000000, |
119 | | 0x3fe3c00000000000, |
120 | | 0x3fe3a00000000000, |
121 | | 0x3fe3800000000000, |
122 | | 0x3fe3600000000000, |
123 | | 0x3fe3600000000000, |
124 | | 0x3fe3400000000000, |
125 | | 0x3fe3200000000000, |
126 | | 0x3fe3000000000000, |
127 | | 0x3fe3000000000000, |
128 | | 0x3fe2e00000000000, |
129 | | 0x3fe2c00000000000, |
130 | | 0x3fe2c00000000000, |
131 | | 0x3fe2a00000000000, |
132 | | 0x3fe2800000000000, |
133 | | 0x3fe2800000000000, |
134 | | 0x3fe2600000000000, |
135 | | 0x3fe2400000000000, |
136 | | 0x3fe2400000000000, |
137 | | 0x3fe2200000000000, |
138 | | 0x3fe2000000000000, |
139 | | 0x3fe2000000000000, |
140 | | 0x3fe1e00000000000, |
141 | | 0x3fe1c00000000000, |
142 | | 0x3fe1c00000000000, |
143 | | 0x3fe1a00000000000, |
144 | | 0x3fe1a00000000000, |
145 | | 0x3fe1800000000000, |
146 | | 0x3fe1600000000000, |
147 | | 0x3fe1600000000000, |
148 | | 0x3fe1400000000000, |
149 | | 0x3fe1400000000000, |
150 | | 0x3fe1200000000000, |
151 | | 0x3fe1200000000000, |
152 | | 0x3fe1000000000000, |
153 | | 0x3fe0e00000000000, |
154 | | 0x3fe0e00000000000, |
155 | | 0x3fe0c00000000000, |
156 | | 0x3fe0c00000000000, |
157 | | 0x3fe0a00000000000, |
158 | | 0x3fe0a00000000000, |
159 | | 0x3fe0800000000000, |
160 | | 0x3fe0800000000000, |
161 | | 0x3fe0600000000000, |
162 | | 0x3fe0600000000000, |
163 | | 0x3fe0400000000000, |
164 | | 0x3fe0400000000000, |
165 | | 0x3fe0200000000000, |
166 | | 0x3fe0200000000000, |
167 | | 0x3fe0000000000000, |
168 | | ]; |
169 | | |
170 | | // Extra constants for exact range reduction when FMA instructions are not |
171 | | // available: |
172 | | // r * c - 1 for r = 2^-8 * nearestint( 2^8 / (1 + i * 2^-7)) |
173 | | // and c = 1 + i * 2^-7 |
174 | | // with i = 0..128. |
175 | | #[cfg(not(any( |
176 | | all( |
177 | | any(target_arch = "x86", target_arch = "x86_64"), |
178 | | target_feature = "fma" |
179 | | ), |
180 | | target_arch = "aarch64" |
181 | | )))] |
182 | | pub(crate) static RCM1: [u64; 129] = [ |
183 | | 0x0000000000000000, |
184 | | 0xbf10000000000000, |
185 | | 0xbf30000000000000, |
186 | | 0xbf42000000000000, |
187 | | 0xbf50000000000000, |
188 | | 0xbf59000000000000, |
189 | | 0x3f5f000000000000, |
190 | | 0x3f52800000000000, |
191 | | 0x3f30000000000000, |
192 | | 0xbf49000000000000, |
193 | | 0xbf5f000000000000, |
194 | | 0x3f52000000000000, |
195 | | 0xbf30000000000000, |
196 | | 0xbf5c000000000000, |
197 | | 0x3f51000000000000, |
198 | | 0xbf45000000000000, |
199 | | 0x3f60000000000000, |
200 | | 0x3f10000000000000, |
201 | | 0xbf60000000000000, |
202 | | 0x3f3a000000000000, |
203 | | 0xbf5e000000000000, |
204 | | 0x3f38000000000000, |
205 | | 0xbf61000000000000, |
206 | | 0xbf00000000000000, |
207 | | 0x3f60000000000000, |
208 | | 0xbf4a000000000000, |
209 | | 0x3f51000000000000, |
210 | | 0xbf5f800000000000, |
211 | | 0xbf30000000000000, |
212 | | 0x3f56800000000000, |
213 | | 0xbf5f000000000000, |
214 | | 0xbf3c000000000000, |
215 | | 0x3f50000000000000, |
216 | | 0x3f63000000000000, |
217 | | 0xbf56000000000000, |
218 | | 0xbf24000000000000, |
219 | | 0x3f50000000000000, |
220 | | 0x3f60c00000000000, |
221 | | 0xbf60800000000000, |
222 | | 0xbf52000000000000, |
223 | | 0xbf30000000000000, |
224 | | 0x3f42000000000000, |
225 | | 0x3f55000000000000, |
226 | | 0x3f60000000000000, |
227 | | 0x3f65000000000000, |
228 | | 0xbf61c00000000000, |
229 | | 0xbf5c000000000000, |
230 | | 0xbf55800000000000, |
231 | | 0xbf50000000000000, |
232 | | 0xbf47000000000000, |
233 | | 0xbf40000000000000, |
234 | | 0xbf36000000000000, |
235 | | 0xbf30000000000000, |
236 | | 0xbf2c000000000000, |
237 | | 0xbf30000000000000, |
238 | | 0xbf36000000000000, |
239 | | 0xbf40000000000000, |
240 | | 0xbf47000000000000, |
241 | | 0xbf50000000000000, |
242 | | 0xbf55800000000000, |
243 | | 0xbf5c000000000000, |
244 | | 0xbf61c00000000000, |
245 | | 0xbf66000000000000, |
246 | | 0x3f65000000000000, |
247 | | 0x3f60000000000000, |
248 | | 0x3f55000000000000, |
249 | | 0x3f42000000000000, |
250 | | 0xbf30000000000000, |
251 | | 0xbf52000000000000, |
252 | | 0xbf60800000000000, |
253 | | 0xbf68800000000000, |
254 | | 0x3f60c00000000000, |
255 | | 0x3f50000000000000, |
256 | | 0xbf24000000000000, |
257 | | 0xbf56000000000000, |
258 | | 0xbf65400000000000, |
259 | | 0x3f63000000000000, |
260 | | 0x3f50000000000000, |
261 | | 0xbf3c000000000000, |
262 | | 0xbf5f000000000000, |
263 | | 0x3f68000000000000, |
264 | | 0x3f56800000000000, |
265 | | 0xbf30000000000000, |
266 | | 0xbf5f800000000000, |
267 | | 0x3f67000000000000, |
268 | | 0x3f51000000000000, |
269 | | 0xbf4a000000000000, |
270 | | 0xbf66000000000000, |
271 | | 0x3f60000000000000, |
272 | | 0xbf00000000000000, |
273 | | 0xbf61000000000000, |
274 | | 0x3f64800000000000, |
275 | | 0x3f38000000000000, |
276 | | 0xbf5e000000000000, |
277 | | 0x3f66000000000000, |
278 | | 0x3f3a000000000000, |
279 | | 0xbf60000000000000, |
280 | | 0x3f64800000000000, |
281 | | 0x3f10000000000000, |
282 | | 0xbf64000000000000, |
283 | | 0x3f60000000000000, |
284 | | 0xbf45000000000000, |
285 | | 0xbf6b000000000000, |
286 | | 0x3f51000000000000, |
287 | | 0xbf5c000000000000, |
288 | | 0x3f65400000000000, |
289 | | 0xbf30000000000000, |
290 | | 0xbf69c00000000000, |
291 | | 0x3f52000000000000, |
292 | | 0xbf5f000000000000, |
293 | | 0x3f63000000000000, |
294 | | 0xbf49000000000000, |
295 | | 0x3f6c000000000000, |
296 | | 0x3f30000000000000, |
297 | | 0xbf68800000000000, |
298 | | 0x3f52800000000000, |
299 | | 0xbf62000000000000, |
300 | | 0x3f5f000000000000, |
301 | | 0xbf59000000000000, |
302 | | 0x3f64c00000000000, |
303 | | 0xbf50000000000000, |
304 | | 0x3f69000000000000, |
305 | | 0xbf42000000000000, |
306 | | 0x3f6c400000000000, |
307 | | 0xbf30000000000000, |
308 | | 0x3f6e800000000000, |
309 | | 0xbf10000000000000, |
310 | | 0x3f6fc00000000000, |
311 | | 0x0000000000000000, |
312 | | ]; |
313 | | |
314 | | pub(crate) static LOG_R1_DD: [(u64, u64); 129] = [ |
315 | | (0x0000000000000000, 0x0000000000000000), |
316 | | (0xbd10c76b999d2be8, 0x3f80101575890000), |
317 | | (0xbd23dc5b06e2f7d2, 0x3f90205658938000), |
318 | | (0xbd2aa0ba325a0c34, 0x3f98492528c90000), |
319 | | (0x3d0111c05cf1d753, 0x3fa0415d89e74000), |
320 | | (0xbd2c167375bdfd28, 0x3fa466aed42e0000), |
321 | | (0xbd029efbec19afa2, 0x3fa67c94f2d4c000), |
322 | | (0x3d20fc1a353bb42e, 0x3faaaef2d0fb0000), |
323 | | (0xbd0e113e4fc93b7b, 0x3faeea31c006c000), |
324 | | (0xbd25325d560d9e9b, 0x3fb1973bd1466000), |
325 | | (0x3d2cc85ea5db4ed7, 0x3fb3bdf5a7d1e000), |
326 | | (0xbcf53a2582f4e1ef, 0x3fb4d3115d208000), |
327 | | (0x3cec1e8da99ded32, 0x3fb700d30aeac000), |
328 | | (0x3d23115c3abd47da, 0x3fb9335e5d594000), |
329 | | (0xbd0e42b6b94407c8, 0x3fba4e7640b1c000), |
330 | | (0x3d2646d1c65aacd3, 0x3fbc885801bc4000), |
331 | | (0x3d1a89401fa71733, 0x3fbda72763844000), |
332 | | (0xbd2534d64fa10afd, 0x3fbfe89139dbe000), |
333 | | (0x3d21ef78ce2d07f2, 0x3fc1178e8227e000), |
334 | | (0x3d2ca78e44389934, 0x3fc1aa2b7e23f000), |
335 | | (0x3d039d6ccb81b4a1, 0x3fc2d1610c868000), |
336 | | (0x3cc62fa8234b7289, 0x3fc365fcb0159000), |
337 | | (0x3d25837954fdb678, 0x3fc4913d8333b000), |
338 | | (0x3d2633e8e5697dc7, 0x3fc527e5e4a1b000), |
339 | | (0xbd127023eb68981c, 0x3fc5bf406b544000), |
340 | | (0xbd25118de59c21e1, 0x3fc6f0128b757000), |
341 | | (0xbd1c661070914305, 0x3fc7898d85445000), |
342 | | (0xbd073d54aae92cd1, 0x3fc8beafeb390000), |
343 | | (0x3d07f22858a0ff6f, 0x3fc95a5adcf70000), |
344 | | (0x3d29904d6865817a, 0x3fc9f6c407089000), |
345 | | (0xbd0c358d4eace1aa, 0x3fcb31d8575bd000), |
346 | | (0xbd2d4bc4595412b6, 0x3fcbd087383be000), |
347 | | (0xbcf1ec72c5962bd2, 0x3fcc6ffbc6f01000), |
348 | | (0xbd084a7e75b6f6e4, 0x3fcd1037f2656000), |
349 | | (0x3cc212276041f430, 0x3fce530effe71000), |
350 | | (0xbcca211565bb8e11, 0x3fcef5ade4dd0000), |
351 | | (0x3d1bcbecca0cdf30, 0x3fcf991c6cb3b000), |
352 | | (0xbd16f08c1485e94a, 0x3fd01eae5626c800), |
353 | | (0x3d27188b163ceae9, 0x3fd0c42d67616000), |
354 | | (0xbd2c210e63a5f01c, 0x3fd1178e8227e800), |
355 | | (0x3d2b9acdf7a51681, 0x3fd16b5ccbacf800), |
356 | | (0x3d2ca6ed5147bdb7, 0x3fd1bf99635a6800), |
357 | | (0x3d0a87deba46baea, 0x3fd214456d0eb800), |
358 | | (0x3d2c93c1df5bb3b6, 0x3fd269621134d800), |
359 | | (0x3d2a9cfa4a5004f4, 0x3fd2bef07cdc9000), |
360 | | (0x3d116ecdb0f177c8, 0x3fd36b6776be1000), |
361 | | (0x3d183b54b606bd5c, 0x3fd3c25277333000), |
362 | | (0x3d08e436ec90e09d, 0x3fd419b423d5e800), |
363 | | (0xbd2f27ce0967d675, 0x3fd4718dc271c800), |
364 | | (0xbd2e20891b0ad8a4, 0x3fd4c9e09e173000), |
365 | | (0x3d2ebe708164c759, 0x3fd522ae0738a000), |
366 | | (0x3d1fadedee5d40ef, 0x3fd57bf753c8d000), |
367 | | (0xbd0a0b2a08a465dc, 0x3fd5d5bddf596000), |
368 | | (0xbd2db623e731ae00, 0x3fd630030b3ab000), |
369 | | (0x3d20a0d32756eba0, 0x3fd68ac83e9c6800), |
370 | | (0x3d1721657c222d87, 0x3fd6e60ee6af1800), |
371 | | (0x3d2d8b0949dc60b3, 0x3fd741d876c67800), |
372 | | (0x3d29ec7d2efd1778, 0x3fd79e26687cf800), |
373 | | (0xbd272090c812566a, 0x3fd7fafa3bd81800), |
374 | | (0x3d2fd56f3333778a, 0x3fd85855776dc800), |
375 | | (0xbd205ae1e5e70470, 0x3fd8b639a88b3000), |
376 | | (0xbd1766b52ee6307d, 0x3fd914a8635bf800), |
377 | | (0xbd152313a502d9f0, 0x3fd973a343135800), |
378 | | (0xbd152313a502d9f0, 0x3fd973a343135800), |
379 | | (0xbd26279e10d0c0b0, 0x3fd9d32bea15f000), |
380 | | (0x3d23c6457f9d79f5, 0x3fda33440224f800), |
381 | | (0x3d1e36f2bea77a5d, 0x3fda93ed3c8ad800), |
382 | | (0xbd217cc552774458, 0x3fdaf5295248d000), |
383 | | (0x3d1095252d841995, 0x3fdb56fa04462800), |
384 | | (0x3d27d85bf40a666d, 0x3fdbb9611b80e000), |
385 | | (0x3d2cec807fe8e180, 0x3fdc1c60693fa000), |
386 | | (0x3d2cec807fe8e180, 0x3fdc1c60693fa000), |
387 | | (0xbd29b6ddc15249ae, 0x3fdc7ff9c7455800), |
388 | | (0xbd0797c33ec7a6b0, 0x3fdce42f18064800), |
389 | | (0x3d235bafe9a767a8, 0x3fdd490246def800), |
390 | | (0xbd1ea42d60dc616a, 0x3fddae75484c9800), |
391 | | (0xbd1ea42d60dc616a, 0x3fddae75484c9800), |
392 | | (0xbd1326b207322938, 0x3fde148a1a272800), |
393 | | (0xbd2465505372bd08, 0x3fde7b42c3ddb000), |
394 | | (0x3d2f27f45a470251, 0x3fdee2a156b41000), |
395 | | (0x3d2f27f45a470251, 0x3fdee2a156b41000), |
396 | | (0x3d12cde56f014a8b, 0x3fdf4aa7ee031800), |
397 | | (0x3d0085fa3c164935, 0x3fdfb358af7a4800), |
398 | | (0xbd053ba3b1727b1c, 0x3fe00e5ae5b20800), |
399 | | (0xbd053ba3b1727b1c, 0x3fe00e5ae5b20800), |
400 | | (0xbd04c45fe79539e0, 0x3fe04360be760400), |
401 | | (0x3d26812241edf5fd, 0x3fe078bf0533c400), |
402 | | (0x3d1f486b887e7e27, 0x3fe0ae76e2d05400), |
403 | | (0x3d1f486b887e7e27, 0x3fe0ae76e2d05400), |
404 | | (0x3d1c299807801742, 0x3fe0e4898611cc00), |
405 | | (0xbd258647bb9ddcb2, 0x3fe11af823c75c00), |
406 | | (0xbd258647bb9ddcb2, 0x3fe11af823c75c00), |
407 | | (0xbd2edd97a293ae49, 0x3fe151c3f6f29800), |
408 | | (0x3d14cc4ef8ab4650, 0x3fe188ee40f23c00), |
409 | | (0x3d14cc4ef8ab4650, 0x3fe188ee40f23c00), |
410 | | (0x3cccacdeed70e667, 0x3fe1c07849ae6000), |
411 | | (0xbd2a7242c9fe81d3, 0x3fe1f8635fc61800), |
412 | | (0xbd2a7242c9fe81d3, 0x3fe1f8635fc61800), |
413 | | (0x3d12fc066e48667b, 0x3fe230b0d8bebc00), |
414 | | (0xbd0b61f105226250, 0x3fe269621134dc00), |
415 | | (0xbd0b61f105226250, 0x3fe269621134dc00), |
416 | | (0x3d206d2be797882d, 0x3fe2a2786d0ec000), |
417 | | (0xbd17a6e507b9dc11, 0x3fe2dbf557b0e000), |
418 | | (0xbd17a6e507b9dc11, 0x3fe2dbf557b0e000), |
419 | | (0xbd274e93c5a0ed9c, 0x3fe315da44340800), |
420 | | (0xbd274e93c5a0ed9c, 0x3fe315da44340800), |
421 | | (0x3d10b83f9527e6ac, 0x3fe35028ad9d8c00), |
422 | | (0xbd218b7abb5569a4, 0x3fe38ae217197800), |
423 | | (0xbd218b7abb5569a4, 0x3fe38ae217197800), |
424 | | (0xbd02b7367cfe13c2, 0x3fe3c6080c36c000), |
425 | | (0xbd02b7367cfe13c2, 0x3fe3c6080c36c000), |
426 | | (0xbd26ce7930f0c74c, 0x3fe4019c2125cc00), |
427 | | (0xbd26ce7930f0c74c, 0x3fe4019c2125cc00), |
428 | | (0xbcfd984f481051f7, 0x3fe43d9ff2f92400), |
429 | | (0xbd22cb6af94d60aa, 0x3fe47a1527e8a400), |
430 | | (0xbd22cb6af94d60aa, 0x3fe47a1527e8a400), |
431 | | (0x3cef7115ed4c541c, 0x3fe4b6fd6f970c00), |
432 | | (0x3cef7115ed4c541c, 0x3fe4b6fd6f970c00), |
433 | | (0xbd2e6c516d93b8fb, 0x3fe4f45a835a5000), |
434 | | (0xbd2e6c516d93b8fb, 0x3fe4f45a835a5000), |
435 | | (0x3d05ccc45d257531, 0x3fe5322e26867800), |
436 | | (0x3d05ccc45d257531, 0x3fe5322e26867800), |
437 | | (0x3d09980bff3303dd, 0x3fe5707a26bb8c00), |
438 | | (0x3d09980bff3303dd, 0x3fe5707a26bb8c00), |
439 | | (0x3d2dfa63ac10c9fb, 0x3fe5af405c364800), |
440 | | (0x3d2dfa63ac10c9fb, 0x3fe5af405c364800), |
441 | | (0x3d2202380cda46be, 0x3fe5ee82aa241800), |
442 | | (0x3d2202380cda46be, 0x3fe5ee82aa241800), |
443 | | (0x0000000000000000, 0x0000000000000000), |
444 | | ]; |
445 | | |
446 | | #[inline] |
447 | 0 | pub(crate) fn log1p_f64_dyadic(x: f64) -> DyadicFloat128 { |
448 | 0 | let mut x_u = x.to_bits(); |
449 | | |
450 | 0 | let mut x_dd = DoubleDouble::default(); |
451 | | |
452 | 0 | let x_exp: u16 = ((x_u >> 52) & 0x7ff) as u16; |
453 | | |
454 | 0 | if x_exp >= EXP_BIAS { |
455 | | // |x| >= 1 |
456 | 0 | if x_u >= 0x4650_0000_0000_0000u64 { |
457 | 0 | x_dd.hi = x; |
458 | 0 | } else { |
459 | 0 | x_dd = DoubleDouble::from_exact_add(x, 1.0); |
460 | 0 | } |
461 | 0 | } else { |
462 | 0 | // |x| < 1 |
463 | 0 | x_dd = DoubleDouble::from_exact_add(1.0, x); |
464 | 0 | } |
465 | | |
466 | | const EXP_BIAS: u16 = (1u16 << (11 - 1u16)) - 1u16; |
467 | | |
468 | | // At this point, x_dd is the exact sum of 1 + x: |
469 | | // x_dd.hi + x_dd.lo = x + 1.0 exactly. |
470 | | // |x_dd.hi| >= 2^-54 |
471 | | // |x_dd.lo| < ulp(x_dd.hi) |
472 | | |
473 | 0 | let xhi_bits = x_dd.hi.to_bits(); |
474 | 0 | let xhi_frac = xhi_bits & ((1u64 << 52) - 1); |
475 | 0 | x_u = xhi_bits; |
476 | | // Range reduction: |
477 | | // Find k such that |x_hi - k * 2^-7| <= 2^-8. |
478 | 0 | let idx: i32 = ((xhi_frac.wrapping_add(1u64 << (52 - 8))) >> (52 - 7)) as i32; |
479 | 0 | let x_e = (get_exponent_f64(f64::from_bits(xhi_bits)) as i32).wrapping_add(idx >> 7); |
480 | | |
481 | | // Scale x_dd by 2^(-xh_bits.get_exponent()). |
482 | 0 | let s_u: i64 = (x_u & EXP_MASK) as i64 - (EXP_BIAS as i64).wrapping_shl(52); |
483 | | // Normalize arguments: |
484 | | // 1 <= m_dd.hi < 2 |
485 | | // |m_dd.lo| < 2^-52. |
486 | | // This is exact. |
487 | 0 | let m_hi = 1f64.to_bits() | xhi_frac; |
488 | | |
489 | 0 | let m_lo = if x_dd.lo.abs() > x_dd.hi * f64::from_bits(0x3800000000000000) { |
490 | 0 | (x_dd.lo.to_bits() as i64).wrapping_sub(s_u) |
491 | | } else { |
492 | 0 | 0 |
493 | | }; |
494 | | |
495 | 0 | let m_dd = DoubleDouble::new(f64::from_bits(m_lo as u64), f64::from_bits(m_hi)); |
496 | | |
497 | | // Perform range reduction: |
498 | | // r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1 |
499 | | // = (r * m_dd.hi - 1) + r * m_dd.lo |
500 | | // = v_hi + (v_lo.hi + v_lo.lo) |
501 | | // where: |
502 | | // v_hi = r * m_dd.hi - 1 (exact) |
503 | | // v_lo.hi + v_lo.lo = r * m_dd.lo (exact) |
504 | | // Bounds on the values: |
505 | | // -0x1.69000000000edp-8 < r * m - 1 < 0x1.7f00000000081p-8 |
506 | | // |v_lo.hi| <= |r| * |m_dd.lo| < 2^-52 |
507 | | // |v_lo.lo| < ulp(v_lo.hi) <= 2^(-52 - 53) = 2^(-105) |
508 | 0 | let r = R1[idx as usize]; |
509 | | let v_hi; |
510 | 0 | let v_lo = DoubleDouble::from_exact_mult(m_dd.lo, f64::from_bits(r)); |
511 | | |
512 | | #[cfg(any( |
513 | | all( |
514 | | any(target_arch = "x86", target_arch = "x86_64"), |
515 | | target_feature = "fma" |
516 | | ), |
517 | | target_arch = "aarch64" |
518 | | ))] |
519 | | { |
520 | | v_hi = f_fmla(f64::from_bits(r), m_dd.hi, -1.0); // Exact. |
521 | | } |
522 | | |
523 | | #[cfg(not(any( |
524 | | all( |
525 | | any(target_arch = "x86", target_arch = "x86_64"), |
526 | | target_feature = "fma" |
527 | | ), |
528 | | target_arch = "aarch64" |
529 | | )))] |
530 | 0 | { |
531 | 0 | let c = f64::from_bits( |
532 | 0 | (idx as u64) |
533 | 0 | .wrapping_shl(52 - 7) |
534 | 0 | .wrapping_add(0x3FF0_0000_0000_0000u64), |
535 | 0 | ); |
536 | 0 | v_hi = f_fmla( |
537 | 0 | f64::from_bits(r), |
538 | 0 | m_dd.hi - c, |
539 | 0 | f64::from_bits(RCM1[idx as usize]), |
540 | 0 | ); // Exact |
541 | 0 | } |
542 | | |
543 | | // Range reduction output: |
544 | | // -0x1.69000000000edp-8 < v_hi + v_lo < 0x1.7f00000000081p-8 |
545 | | // |v_dd.lo| < ulp(v_dd.hi) <= 2^(-7 - 53) = 2^-60 |
546 | 0 | let mut v_dd = DoubleDouble::from_exact_add(v_hi, v_lo.hi); |
547 | 0 | v_dd.lo += v_lo.lo; |
548 | | |
549 | 0 | log1p_accurate(x_e, idx as usize, v_dd) |
550 | 0 | } |
551 | | |
552 | | /// Computes log(x+1) |
553 | | /// |
554 | | /// Max ULP 0.5 |
555 | 0 | pub fn f_log1p(x: f64) -> f64 { |
556 | 0 | let mut x_u = x.to_bits(); |
557 | | |
558 | 0 | let mut x_dd = DoubleDouble::default(); |
559 | | |
560 | 0 | let x_exp: u16 = ((x_u >> 52) & 0x7ff) as u16; |
561 | | |
562 | | const EXP_BIAS: u16 = (1u16 << (11 - 1u16)) - 1u16; |
563 | | |
564 | 0 | let e = (((x_u >> 52) & 0x7ff) as i32).wrapping_sub(0x3ff); |
565 | | |
566 | 0 | if e == 0x400 || x == 0. || x <= -1.0 { |
567 | | /* case NaN/Inf, +/-0 or x <= -1 */ |
568 | 0 | if e == 0x400 && x.to_bits() != 0xfffu64 << 52 { |
569 | | /* NaN or + Inf*/ |
570 | 0 | return x + x; |
571 | 0 | } |
572 | 0 | if x <= -1.0 |
573 | | /* we use the fact that NaN < -1 is false */ |
574 | | { |
575 | | /* log2p(x<-1) is NaN, log2p(-1) is -Inf and raises DivByZero */ |
576 | 0 | return if x < -1.0 { |
577 | 0 | f64::NAN |
578 | | } else { |
579 | | // x=-1 |
580 | 0 | f64::NEG_INFINITY |
581 | | }; |
582 | 0 | } |
583 | 0 | return x + x; /* +/-0 */ |
584 | 0 | } |
585 | | |
586 | 0 | let ax = x_u.wrapping_shl(1); |
587 | | |
588 | 0 | if ax < 0x7f60000000000000u64 { |
589 | | // |x| < 0.0625 |
590 | | // check case x tiny first to avoid spurious underflow in x*x |
591 | 0 | if ax < 0x7940000000000000u64 { |
592 | | // |x| < 0x1p-53 |
593 | 0 | if ax == 0 { |
594 | 0 | return x; |
595 | 0 | } |
596 | | /* we have underflow when |x| < 2^-1022, or when |x| = 2^-1022 and |
597 | | the result is smaller than 2^-1022 in absolute value */ |
598 | 0 | let res = dyad_fmla(x.abs(), f64::from_bits(0xbc90000000000000), x); |
599 | 0 | return res; |
600 | 0 | } |
601 | 0 | } |
602 | | |
603 | 0 | if x_exp >= EXP_BIAS { |
604 | | // |x| >= 1 |
605 | 0 | if x_u >= 0x4650_0000_0000_0000u64 { |
606 | 0 | x_dd.hi = x; |
607 | 0 | } else { |
608 | 0 | x_dd = DoubleDouble::from_exact_add(x, 1.0); |
609 | 0 | } |
610 | | } else { |
611 | | // |x| < 1 |
612 | 0 | if x_exp < EXP_BIAS - 52 - 1 { |
613 | | // Quick return when |x| < 2^-53. |
614 | | // Since log(1 + x) = x - x^2/2 + x^3/3 - ..., |
615 | | // for |x| < 2^-53, |
616 | | // x > log(1 + x) > x - x^2 > x(1 - 2^-54) > x - ulp(x)/2 |
617 | | // Thus, |
618 | | // log(1 + x) = nextafter(x, -inf) for FE_DOWNWARD, or |
619 | | // FE_TOWARDZERO and x > 0, |
620 | | // = x otherwise. |
621 | 0 | if x == 0.0 { |
622 | 0 | return x + x; |
623 | 0 | } |
624 | | |
625 | 0 | let tp = 1.0f32; |
626 | 0 | let tn = -1.0f32; |
627 | 0 | let rdp = tp - f32::from_bits(0x3d594caf) != tp; |
628 | 0 | let rdn = tn - f32::from_bits(0x33800000) != tn; |
629 | | |
630 | 0 | if x > 0. && rdp { |
631 | 0 | return f64::from_bits(x_u - 1); |
632 | 0 | } |
633 | | |
634 | 0 | if x < 0. && rdn { |
635 | 0 | return f64::from_bits(x_u + 1); |
636 | 0 | } |
637 | | |
638 | 0 | return if x + x == 0.0 { x + x } else { x }; |
639 | 0 | } |
640 | 0 | x_dd = DoubleDouble::from_exact_add(1.0, x); |
641 | | } |
642 | | |
643 | | // At this point, x_dd is the exact sum of 1 + x: |
644 | | // x_dd.hi + x_dd.lo = x + 1.0 exactly. |
645 | | // |x_dd.hi| >= 2^-54 |
646 | | // |x_dd.lo| < ulp(x_dd.hi) |
647 | | |
648 | 0 | let xhi_bits = x_dd.hi.to_bits(); |
649 | 0 | let xhi_frac = xhi_bits & ((1u64 << 52) - 1); |
650 | 0 | x_u = xhi_bits; |
651 | | // Range reduction: |
652 | | // Find k such that |x_hi - k * 2^-7| <= 2^-8. |
653 | 0 | let idx: i32 = ((xhi_frac.wrapping_add(1u64 << (52 - 8))) >> (52 - 7)) as i32; |
654 | 0 | let x_e = (get_exponent_f64(f64::from_bits(xhi_bits)) as i32).wrapping_add(idx >> 7); |
655 | 0 | let e_x = x_e as f64; |
656 | | |
657 | | const LOG_2: DoubleDouble = DoubleDouble::new( |
658 | | f64::from_bits(0x3d2ef35793c76730), |
659 | | f64::from_bits(0x3fe62e42fefa3800), |
660 | | ); |
661 | | |
662 | | // hi is exact |
663 | | // ulp(hi) = ulp(LOG_2_HI) = ulp(LOG_R1_DD[idx].hi) = 2^-43 |
664 | | |
665 | 0 | let r_dd = LOG_R1_DD[idx as usize]; |
666 | | |
667 | 0 | let hi = f_fmla(e_x, LOG_2.hi, f64::from_bits(r_dd.1)); |
668 | | // lo errors < |e_x| * ulp(LOG_2_LO) + ulp(LOG_R1[idx].lo) |
669 | | // <= 2^11 * 2^(-43-53) = 2^-85 |
670 | 0 | let lo = f_fmla(e_x, LOG_2.lo, f64::from_bits(r_dd.0)); |
671 | | |
672 | | // Scale x_dd by 2^(-xh_bits.get_exponent()). |
673 | 0 | let s_u: i64 = (x_u & EXP_MASK) as i64 - (EXP_BIAS as i64).wrapping_shl(52); |
674 | | // Normalize arguments: |
675 | | // 1 <= m_dd.hi < 2 |
676 | | // |m_dd.lo| < 2^-52. |
677 | | // This is exact. |
678 | 0 | let m_hi = 1f64.to_bits() | xhi_frac; |
679 | | |
680 | 0 | let m_lo = if x_dd.lo.abs() > x_dd.hi * f64::from_bits(0x3800000000000000) { |
681 | 0 | (x_dd.lo.to_bits() as i64).wrapping_sub(s_u) |
682 | | } else { |
683 | 0 | 0 |
684 | | }; |
685 | | |
686 | 0 | let m_dd = DoubleDouble::new(f64::from_bits(m_lo as u64), f64::from_bits(m_hi)); |
687 | | |
688 | | // Perform range reduction: |
689 | | // r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1 |
690 | | // = (r * m_dd.hi - 1) + r * m_dd.lo |
691 | | // = v_hi + (v_lo.hi + v_lo.lo) |
692 | | // where: |
693 | | // v_hi = r * m_dd.hi - 1 (exact) |
694 | | // v_lo.hi + v_lo.lo = r * m_dd.lo (exact) |
695 | | // Bounds on the values: |
696 | | // -0x1.69000000000edp-8 < r * m - 1 < 0x1.7f00000000081p-8 |
697 | | // |v_lo.hi| <= |r| * |m_dd.lo| < 2^-52 |
698 | | // |v_lo.lo| < ulp(v_lo.hi) <= 2^(-52 - 53) = 2^(-105) |
699 | 0 | let r = R1[idx as usize]; |
700 | | let v_hi; |
701 | 0 | let v_lo = DoubleDouble::from_exact_mult(m_dd.lo, f64::from_bits(r)); |
702 | | |
703 | | #[cfg(any( |
704 | | all( |
705 | | any(target_arch = "x86", target_arch = "x86_64"), |
706 | | target_feature = "fma" |
707 | | ), |
708 | | target_arch = "aarch64" |
709 | | ))] |
710 | | { |
711 | | v_hi = f_fmla(f64::from_bits(r), m_dd.hi, -1.0); // Exact. |
712 | | } |
713 | | |
714 | | #[cfg(not(any( |
715 | | all( |
716 | | any(target_arch = "x86", target_arch = "x86_64"), |
717 | | target_feature = "fma" |
718 | | ), |
719 | | target_arch = "aarch64" |
720 | | )))] |
721 | 0 | { |
722 | 0 | let c = f64::from_bits( |
723 | 0 | (idx as u64) |
724 | 0 | .wrapping_shl(52 - 7) |
725 | 0 | .wrapping_add(0x3FF0_0000_0000_0000u64), |
726 | 0 | ); |
727 | 0 | v_hi = f_fmla( |
728 | 0 | f64::from_bits(r), |
729 | 0 | m_dd.hi - c, |
730 | 0 | f64::from_bits(RCM1[idx as usize]), |
731 | 0 | ); // Exact |
732 | 0 | } |
733 | | |
734 | | // Range reduction output: |
735 | | // -0x1.69000000000edp-8 < v_hi + v_lo < 0x1.7f00000000081p-8 |
736 | | // |v_dd.lo| < ulp(v_dd.hi) <= 2^(-7 - 53) = 2^-60 |
737 | 0 | let mut v_dd = DoubleDouble::from_exact_add(v_hi, v_lo.hi); |
738 | 0 | v_dd.lo += v_lo.lo; |
739 | | |
740 | | // Exact sum: |
741 | | // r1.hi + r1.lo = e_x * log(2)_hi - log(r)_hi + u |
742 | 0 | let r1 = DoubleDouble::from_exact_add(hi, v_dd.hi); |
743 | | |
744 | | // Degree-7 minimax polynomial log(1 + v) ~ v - v^2 / 2 + ... |
745 | | // generated by Sollya with: |
746 | | // > P = fpminimax(log(1 + x)/x, 6, [|1, 1, D...|], |
747 | | // [-0x1.69000000000edp-8, 0x1.7f00000000081p-8]); |
748 | | const P_COEFFS: [u64; 6] = [ |
749 | | 0xbfe0000000000000, |
750 | | 0x3fd5555555555166, |
751 | | 0xbfcfffffffdb7746, |
752 | | 0x3fc99999a8718a60, |
753 | | 0xbfc555874ce8ce22, |
754 | | 0x3fc24335555ddbe5, |
755 | | ]; |
756 | | |
757 | | // C * ulp(v_sq) + err_hi |
758 | 0 | let v_sq = v_dd.hi * v_dd.hi; |
759 | 0 | let p0 = f_fmla( |
760 | 0 | v_dd.hi, |
761 | 0 | f64::from_bits(P_COEFFS[1]), |
762 | 0 | f64::from_bits(P_COEFFS[0]), |
763 | | ); |
764 | 0 | let p1 = f_fmla( |
765 | 0 | v_dd.hi, |
766 | 0 | f64::from_bits(P_COEFFS[3]), |
767 | 0 | f64::from_bits(P_COEFFS[2]), |
768 | | ); |
769 | 0 | let p2 = f_fmla( |
770 | 0 | v_dd.hi, |
771 | 0 | f64::from_bits(P_COEFFS[5]), |
772 | 0 | f64::from_bits(P_COEFFS[4]), |
773 | | ); |
774 | 0 | let p = f_polyeval4(v_sq, (v_dd.lo + r1.lo) + lo, p0, p1, p2); |
775 | | |
776 | | const ERR_HI: [f64; 2] = [f64::from_bits(0x3aa0000000000000), 0.0]; |
777 | 0 | let err_hi = ERR_HI[if hi == 0.0 { 1 } else { 0 }]; |
778 | | |
779 | 0 | let err = f_fmla(v_sq, f64::from_bits(0x3ce0000000000000), err_hi); |
780 | | |
781 | 0 | let left = r1.hi + (p - err); |
782 | 0 | let right = r1.hi + (p + err); |
783 | | // Ziv's test to see if fast pass is accurate enough. |
784 | 0 | if left == right { |
785 | 0 | return left; |
786 | 0 | } |
787 | 0 | log1p_accurate_dd(x) |
788 | 0 | } |
789 | | |
790 | | #[cold] |
791 | 0 | fn log1p_accurate_dd(x: f64) -> f64 { |
792 | 0 | log1p_dd(x).to_f64() |
793 | 0 | } |
794 | | |
795 | | #[cfg(test)] |
796 | | mod tests { |
797 | | use super::*; |
798 | | |
799 | | #[test] |
800 | | fn test_log1p() { |
801 | | assert_eq!( |
802 | | f_log1p(-0.0000000000000003834186599935256), |
803 | | -0.00000000000000038341865999352564 |
804 | | ); |
805 | | assert_eq!(f_log1p(0.000032417476177515677), 0.000032416950742490306); |
806 | | assert_eq!(f_log1p(-0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001866527236137164), |
807 | | -0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001866527236137164); |
808 | | assert_eq!(f_log1p(-0.0016481876264151651), -0.0016495473819346394); |
809 | | assert_eq!(f_log1p(-0.55), -0.7985076962177717); |
810 | | assert_eq!(f_log1p(-0.65), -1.0498221244986778); |
811 | | assert_eq!(f_log1p(1.65), 0.9745596399981308); |
812 | | assert!(f_log1p(-2.).is_nan()); |
813 | | } |
814 | | } |