Coverage Report

Created: 2025-10-10 07:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/logs/log1p.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::bits::{EXP_MASK, get_exponent_f64};
30
use crate::common::{dyad_fmla, f_fmla};
31
use crate::double_double::DoubleDouble;
32
use crate::dyadic_float::DyadicFloat128;
33
use crate::logs::log1p_dd::log1p_dd;
34
use crate::logs::log1p_dyadic::log1p_accurate;
35
use crate::polyeval::f_polyeval4;
36
37
// R1[i] = 2^-8 * nearestint( 2^8 / (1 + i * 2^-7) )
38
pub(crate) static R1: [u64; 129] = [
39
    0x3ff0000000000000,
40
    0x3fefc00000000000,
41
    0x3fef800000000000,
42
    0x3fef400000000000,
43
    0x3fef000000000000,
44
    0x3feec00000000000,
45
    0x3feea00000000000,
46
    0x3fee600000000000,
47
    0x3fee200000000000,
48
    0x3fede00000000000,
49
    0x3feda00000000000,
50
    0x3fed800000000000,
51
    0x3fed400000000000,
52
    0x3fed000000000000,
53
    0x3fece00000000000,
54
    0x3feca00000000000,
55
    0x3fec800000000000,
56
    0x3fec400000000000,
57
    0x3fec000000000000,
58
    0x3febe00000000000,
59
    0x3feba00000000000,
60
    0x3feb800000000000,
61
    0x3feb400000000000,
62
    0x3feb200000000000,
63
    0x3feb000000000000,
64
    0x3feac00000000000,
65
    0x3feaa00000000000,
66
    0x3fea600000000000,
67
    0x3fea400000000000,
68
    0x3fea200000000000,
69
    0x3fe9e00000000000,
70
    0x3fe9c00000000000,
71
    0x3fe9a00000000000,
72
    0x3fe9800000000000,
73
    0x3fe9400000000000,
74
    0x3fe9200000000000,
75
    0x3fe9000000000000,
76
    0x3fe8e00000000000,
77
    0x3fe8a00000000000,
78
    0x3fe8800000000000,
79
    0x3fe8600000000000,
80
    0x3fe8400000000000,
81
    0x3fe8200000000000,
82
    0x3fe8000000000000,
83
    0x3fe7e00000000000,
84
    0x3fe7a00000000000,
85
    0x3fe7800000000000,
86
    0x3fe7600000000000,
87
    0x3fe7400000000000,
88
    0x3fe7200000000000,
89
    0x3fe7000000000000,
90
    0x3fe6e00000000000,
91
    0x3fe6c00000000000,
92
    0x3fe6a00000000000,
93
    0x3fe6800000000000,
94
    0x3fe6600000000000,
95
    0x3fe6400000000000,
96
    0x3fe6200000000000,
97
    0x3fe6000000000000,
98
    0x3fe5e00000000000,
99
    0x3fe5c00000000000,
100
    0x3fe5a00000000000,
101
    0x3fe5800000000000,
102
    0x3fe5800000000000,
103
    0x3fe5600000000000,
104
    0x3fe5400000000000,
105
    0x3fe5200000000000,
106
    0x3fe5000000000000,
107
    0x3fe4e00000000000,
108
    0x3fe4c00000000000,
109
    0x3fe4a00000000000,
110
    0x3fe4a00000000000,
111
    0x3fe4800000000000,
112
    0x3fe4600000000000,
113
    0x3fe4400000000000,
114
    0x3fe4200000000000,
115
    0x3fe4200000000000,
116
    0x3fe4000000000000,
117
    0x3fe3e00000000000,
118
    0x3fe3c00000000000,
119
    0x3fe3c00000000000,
120
    0x3fe3a00000000000,
121
    0x3fe3800000000000,
122
    0x3fe3600000000000,
123
    0x3fe3600000000000,
124
    0x3fe3400000000000,
125
    0x3fe3200000000000,
126
    0x3fe3000000000000,
127
    0x3fe3000000000000,
128
    0x3fe2e00000000000,
129
    0x3fe2c00000000000,
130
    0x3fe2c00000000000,
131
    0x3fe2a00000000000,
132
    0x3fe2800000000000,
133
    0x3fe2800000000000,
134
    0x3fe2600000000000,
135
    0x3fe2400000000000,
136
    0x3fe2400000000000,
137
    0x3fe2200000000000,
138
    0x3fe2000000000000,
139
    0x3fe2000000000000,
140
    0x3fe1e00000000000,
141
    0x3fe1c00000000000,
142
    0x3fe1c00000000000,
143
    0x3fe1a00000000000,
144
    0x3fe1a00000000000,
145
    0x3fe1800000000000,
146
    0x3fe1600000000000,
147
    0x3fe1600000000000,
148
    0x3fe1400000000000,
149
    0x3fe1400000000000,
150
    0x3fe1200000000000,
151
    0x3fe1200000000000,
152
    0x3fe1000000000000,
153
    0x3fe0e00000000000,
154
    0x3fe0e00000000000,
155
    0x3fe0c00000000000,
156
    0x3fe0c00000000000,
157
    0x3fe0a00000000000,
158
    0x3fe0a00000000000,
159
    0x3fe0800000000000,
160
    0x3fe0800000000000,
161
    0x3fe0600000000000,
162
    0x3fe0600000000000,
163
    0x3fe0400000000000,
164
    0x3fe0400000000000,
165
    0x3fe0200000000000,
166
    0x3fe0200000000000,
167
    0x3fe0000000000000,
168
];
169
170
// Extra constants for exact range reduction when FMA instructions are not
171
// available:
172
// r * c - 1 for r = 2^-8 * nearestint( 2^8 / (1 + i * 2^-7))
173
//           and c = 1 + i * 2^-7
174
//           with i = 0..128.
175
#[cfg(not(any(
176
    all(
177
        any(target_arch = "x86", target_arch = "x86_64"),
178
        target_feature = "fma"
179
    ),
180
    target_arch = "aarch64"
181
)))]
182
pub(crate) static RCM1: [u64; 129] = [
183
    0x0000000000000000,
184
    0xbf10000000000000,
185
    0xbf30000000000000,
186
    0xbf42000000000000,
187
    0xbf50000000000000,
188
    0xbf59000000000000,
189
    0x3f5f000000000000,
190
    0x3f52800000000000,
191
    0x3f30000000000000,
192
    0xbf49000000000000,
193
    0xbf5f000000000000,
194
    0x3f52000000000000,
195
    0xbf30000000000000,
196
    0xbf5c000000000000,
197
    0x3f51000000000000,
198
    0xbf45000000000000,
199
    0x3f60000000000000,
200
    0x3f10000000000000,
201
    0xbf60000000000000,
202
    0x3f3a000000000000,
203
    0xbf5e000000000000,
204
    0x3f38000000000000,
205
    0xbf61000000000000,
206
    0xbf00000000000000,
207
    0x3f60000000000000,
208
    0xbf4a000000000000,
209
    0x3f51000000000000,
210
    0xbf5f800000000000,
211
    0xbf30000000000000,
212
    0x3f56800000000000,
213
    0xbf5f000000000000,
214
    0xbf3c000000000000,
215
    0x3f50000000000000,
216
    0x3f63000000000000,
217
    0xbf56000000000000,
218
    0xbf24000000000000,
219
    0x3f50000000000000,
220
    0x3f60c00000000000,
221
    0xbf60800000000000,
222
    0xbf52000000000000,
223
    0xbf30000000000000,
224
    0x3f42000000000000,
225
    0x3f55000000000000,
226
    0x3f60000000000000,
227
    0x3f65000000000000,
228
    0xbf61c00000000000,
229
    0xbf5c000000000000,
230
    0xbf55800000000000,
231
    0xbf50000000000000,
232
    0xbf47000000000000,
233
    0xbf40000000000000,
234
    0xbf36000000000000,
235
    0xbf30000000000000,
236
    0xbf2c000000000000,
237
    0xbf30000000000000,
238
    0xbf36000000000000,
239
    0xbf40000000000000,
240
    0xbf47000000000000,
241
    0xbf50000000000000,
242
    0xbf55800000000000,
243
    0xbf5c000000000000,
244
    0xbf61c00000000000,
245
    0xbf66000000000000,
246
    0x3f65000000000000,
247
    0x3f60000000000000,
248
    0x3f55000000000000,
249
    0x3f42000000000000,
250
    0xbf30000000000000,
251
    0xbf52000000000000,
252
    0xbf60800000000000,
253
    0xbf68800000000000,
254
    0x3f60c00000000000,
255
    0x3f50000000000000,
256
    0xbf24000000000000,
257
    0xbf56000000000000,
258
    0xbf65400000000000,
259
    0x3f63000000000000,
260
    0x3f50000000000000,
261
    0xbf3c000000000000,
262
    0xbf5f000000000000,
263
    0x3f68000000000000,
264
    0x3f56800000000000,
265
    0xbf30000000000000,
266
    0xbf5f800000000000,
267
    0x3f67000000000000,
268
    0x3f51000000000000,
269
    0xbf4a000000000000,
270
    0xbf66000000000000,
271
    0x3f60000000000000,
272
    0xbf00000000000000,
273
    0xbf61000000000000,
274
    0x3f64800000000000,
275
    0x3f38000000000000,
276
    0xbf5e000000000000,
277
    0x3f66000000000000,
278
    0x3f3a000000000000,
279
    0xbf60000000000000,
280
    0x3f64800000000000,
281
    0x3f10000000000000,
282
    0xbf64000000000000,
283
    0x3f60000000000000,
284
    0xbf45000000000000,
285
    0xbf6b000000000000,
286
    0x3f51000000000000,
287
    0xbf5c000000000000,
288
    0x3f65400000000000,
289
    0xbf30000000000000,
290
    0xbf69c00000000000,
291
    0x3f52000000000000,
292
    0xbf5f000000000000,
293
    0x3f63000000000000,
294
    0xbf49000000000000,
295
    0x3f6c000000000000,
296
    0x3f30000000000000,
297
    0xbf68800000000000,
298
    0x3f52800000000000,
299
    0xbf62000000000000,
300
    0x3f5f000000000000,
301
    0xbf59000000000000,
302
    0x3f64c00000000000,
303
    0xbf50000000000000,
304
    0x3f69000000000000,
305
    0xbf42000000000000,
306
    0x3f6c400000000000,
307
    0xbf30000000000000,
308
    0x3f6e800000000000,
309
    0xbf10000000000000,
310
    0x3f6fc00000000000,
311
    0x0000000000000000,
312
];
313
314
pub(crate) static LOG_R1_DD: [(u64, u64); 129] = [
315
    (0x0000000000000000, 0x0000000000000000),
316
    (0xbd10c76b999d2be8, 0x3f80101575890000),
317
    (0xbd23dc5b06e2f7d2, 0x3f90205658938000),
318
    (0xbd2aa0ba325a0c34, 0x3f98492528c90000),
319
    (0x3d0111c05cf1d753, 0x3fa0415d89e74000),
320
    (0xbd2c167375bdfd28, 0x3fa466aed42e0000),
321
    (0xbd029efbec19afa2, 0x3fa67c94f2d4c000),
322
    (0x3d20fc1a353bb42e, 0x3faaaef2d0fb0000),
323
    (0xbd0e113e4fc93b7b, 0x3faeea31c006c000),
324
    (0xbd25325d560d9e9b, 0x3fb1973bd1466000),
325
    (0x3d2cc85ea5db4ed7, 0x3fb3bdf5a7d1e000),
326
    (0xbcf53a2582f4e1ef, 0x3fb4d3115d208000),
327
    (0x3cec1e8da99ded32, 0x3fb700d30aeac000),
328
    (0x3d23115c3abd47da, 0x3fb9335e5d594000),
329
    (0xbd0e42b6b94407c8, 0x3fba4e7640b1c000),
330
    (0x3d2646d1c65aacd3, 0x3fbc885801bc4000),
331
    (0x3d1a89401fa71733, 0x3fbda72763844000),
332
    (0xbd2534d64fa10afd, 0x3fbfe89139dbe000),
333
    (0x3d21ef78ce2d07f2, 0x3fc1178e8227e000),
334
    (0x3d2ca78e44389934, 0x3fc1aa2b7e23f000),
335
    (0x3d039d6ccb81b4a1, 0x3fc2d1610c868000),
336
    (0x3cc62fa8234b7289, 0x3fc365fcb0159000),
337
    (0x3d25837954fdb678, 0x3fc4913d8333b000),
338
    (0x3d2633e8e5697dc7, 0x3fc527e5e4a1b000),
339
    (0xbd127023eb68981c, 0x3fc5bf406b544000),
340
    (0xbd25118de59c21e1, 0x3fc6f0128b757000),
341
    (0xbd1c661070914305, 0x3fc7898d85445000),
342
    (0xbd073d54aae92cd1, 0x3fc8beafeb390000),
343
    (0x3d07f22858a0ff6f, 0x3fc95a5adcf70000),
344
    (0x3d29904d6865817a, 0x3fc9f6c407089000),
345
    (0xbd0c358d4eace1aa, 0x3fcb31d8575bd000),
346
    (0xbd2d4bc4595412b6, 0x3fcbd087383be000),
347
    (0xbcf1ec72c5962bd2, 0x3fcc6ffbc6f01000),
348
    (0xbd084a7e75b6f6e4, 0x3fcd1037f2656000),
349
    (0x3cc212276041f430, 0x3fce530effe71000),
350
    (0xbcca211565bb8e11, 0x3fcef5ade4dd0000),
351
    (0x3d1bcbecca0cdf30, 0x3fcf991c6cb3b000),
352
    (0xbd16f08c1485e94a, 0x3fd01eae5626c800),
353
    (0x3d27188b163ceae9, 0x3fd0c42d67616000),
354
    (0xbd2c210e63a5f01c, 0x3fd1178e8227e800),
355
    (0x3d2b9acdf7a51681, 0x3fd16b5ccbacf800),
356
    (0x3d2ca6ed5147bdb7, 0x3fd1bf99635a6800),
357
    (0x3d0a87deba46baea, 0x3fd214456d0eb800),
358
    (0x3d2c93c1df5bb3b6, 0x3fd269621134d800),
359
    (0x3d2a9cfa4a5004f4, 0x3fd2bef07cdc9000),
360
    (0x3d116ecdb0f177c8, 0x3fd36b6776be1000),
361
    (0x3d183b54b606bd5c, 0x3fd3c25277333000),
362
    (0x3d08e436ec90e09d, 0x3fd419b423d5e800),
363
    (0xbd2f27ce0967d675, 0x3fd4718dc271c800),
364
    (0xbd2e20891b0ad8a4, 0x3fd4c9e09e173000),
365
    (0x3d2ebe708164c759, 0x3fd522ae0738a000),
366
    (0x3d1fadedee5d40ef, 0x3fd57bf753c8d000),
367
    (0xbd0a0b2a08a465dc, 0x3fd5d5bddf596000),
368
    (0xbd2db623e731ae00, 0x3fd630030b3ab000),
369
    (0x3d20a0d32756eba0, 0x3fd68ac83e9c6800),
370
    (0x3d1721657c222d87, 0x3fd6e60ee6af1800),
371
    (0x3d2d8b0949dc60b3, 0x3fd741d876c67800),
372
    (0x3d29ec7d2efd1778, 0x3fd79e26687cf800),
373
    (0xbd272090c812566a, 0x3fd7fafa3bd81800),
374
    (0x3d2fd56f3333778a, 0x3fd85855776dc800),
375
    (0xbd205ae1e5e70470, 0x3fd8b639a88b3000),
376
    (0xbd1766b52ee6307d, 0x3fd914a8635bf800),
377
    (0xbd152313a502d9f0, 0x3fd973a343135800),
378
    (0xbd152313a502d9f0, 0x3fd973a343135800),
379
    (0xbd26279e10d0c0b0, 0x3fd9d32bea15f000),
380
    (0x3d23c6457f9d79f5, 0x3fda33440224f800),
381
    (0x3d1e36f2bea77a5d, 0x3fda93ed3c8ad800),
382
    (0xbd217cc552774458, 0x3fdaf5295248d000),
383
    (0x3d1095252d841995, 0x3fdb56fa04462800),
384
    (0x3d27d85bf40a666d, 0x3fdbb9611b80e000),
385
    (0x3d2cec807fe8e180, 0x3fdc1c60693fa000),
386
    (0x3d2cec807fe8e180, 0x3fdc1c60693fa000),
387
    (0xbd29b6ddc15249ae, 0x3fdc7ff9c7455800),
388
    (0xbd0797c33ec7a6b0, 0x3fdce42f18064800),
389
    (0x3d235bafe9a767a8, 0x3fdd490246def800),
390
    (0xbd1ea42d60dc616a, 0x3fddae75484c9800),
391
    (0xbd1ea42d60dc616a, 0x3fddae75484c9800),
392
    (0xbd1326b207322938, 0x3fde148a1a272800),
393
    (0xbd2465505372bd08, 0x3fde7b42c3ddb000),
394
    (0x3d2f27f45a470251, 0x3fdee2a156b41000),
395
    (0x3d2f27f45a470251, 0x3fdee2a156b41000),
396
    (0x3d12cde56f014a8b, 0x3fdf4aa7ee031800),
397
    (0x3d0085fa3c164935, 0x3fdfb358af7a4800),
398
    (0xbd053ba3b1727b1c, 0x3fe00e5ae5b20800),
399
    (0xbd053ba3b1727b1c, 0x3fe00e5ae5b20800),
400
    (0xbd04c45fe79539e0, 0x3fe04360be760400),
401
    (0x3d26812241edf5fd, 0x3fe078bf0533c400),
402
    (0x3d1f486b887e7e27, 0x3fe0ae76e2d05400),
403
    (0x3d1f486b887e7e27, 0x3fe0ae76e2d05400),
404
    (0x3d1c299807801742, 0x3fe0e4898611cc00),
405
    (0xbd258647bb9ddcb2, 0x3fe11af823c75c00),
406
    (0xbd258647bb9ddcb2, 0x3fe11af823c75c00),
407
    (0xbd2edd97a293ae49, 0x3fe151c3f6f29800),
408
    (0x3d14cc4ef8ab4650, 0x3fe188ee40f23c00),
409
    (0x3d14cc4ef8ab4650, 0x3fe188ee40f23c00),
410
    (0x3cccacdeed70e667, 0x3fe1c07849ae6000),
411
    (0xbd2a7242c9fe81d3, 0x3fe1f8635fc61800),
412
    (0xbd2a7242c9fe81d3, 0x3fe1f8635fc61800),
413
    (0x3d12fc066e48667b, 0x3fe230b0d8bebc00),
414
    (0xbd0b61f105226250, 0x3fe269621134dc00),
415
    (0xbd0b61f105226250, 0x3fe269621134dc00),
416
    (0x3d206d2be797882d, 0x3fe2a2786d0ec000),
417
    (0xbd17a6e507b9dc11, 0x3fe2dbf557b0e000),
418
    (0xbd17a6e507b9dc11, 0x3fe2dbf557b0e000),
419
    (0xbd274e93c5a0ed9c, 0x3fe315da44340800),
420
    (0xbd274e93c5a0ed9c, 0x3fe315da44340800),
421
    (0x3d10b83f9527e6ac, 0x3fe35028ad9d8c00),
422
    (0xbd218b7abb5569a4, 0x3fe38ae217197800),
423
    (0xbd218b7abb5569a4, 0x3fe38ae217197800),
424
    (0xbd02b7367cfe13c2, 0x3fe3c6080c36c000),
425
    (0xbd02b7367cfe13c2, 0x3fe3c6080c36c000),
426
    (0xbd26ce7930f0c74c, 0x3fe4019c2125cc00),
427
    (0xbd26ce7930f0c74c, 0x3fe4019c2125cc00),
428
    (0xbcfd984f481051f7, 0x3fe43d9ff2f92400),
429
    (0xbd22cb6af94d60aa, 0x3fe47a1527e8a400),
430
    (0xbd22cb6af94d60aa, 0x3fe47a1527e8a400),
431
    (0x3cef7115ed4c541c, 0x3fe4b6fd6f970c00),
432
    (0x3cef7115ed4c541c, 0x3fe4b6fd6f970c00),
433
    (0xbd2e6c516d93b8fb, 0x3fe4f45a835a5000),
434
    (0xbd2e6c516d93b8fb, 0x3fe4f45a835a5000),
435
    (0x3d05ccc45d257531, 0x3fe5322e26867800),
436
    (0x3d05ccc45d257531, 0x3fe5322e26867800),
437
    (0x3d09980bff3303dd, 0x3fe5707a26bb8c00),
438
    (0x3d09980bff3303dd, 0x3fe5707a26bb8c00),
439
    (0x3d2dfa63ac10c9fb, 0x3fe5af405c364800),
440
    (0x3d2dfa63ac10c9fb, 0x3fe5af405c364800),
441
    (0x3d2202380cda46be, 0x3fe5ee82aa241800),
442
    (0x3d2202380cda46be, 0x3fe5ee82aa241800),
443
    (0x0000000000000000, 0x0000000000000000),
444
];
445
446
#[inline]
447
0
pub(crate) fn log1p_f64_dyadic(x: f64) -> DyadicFloat128 {
448
0
    let mut x_u = x.to_bits();
449
450
0
    let mut x_dd = DoubleDouble::default();
451
452
0
    let x_exp: u16 = ((x_u >> 52) & 0x7ff) as u16;
453
454
0
    if x_exp >= EXP_BIAS {
455
        // |x| >= 1
456
0
        if x_u >= 0x4650_0000_0000_0000u64 {
457
0
            x_dd.hi = x;
458
0
        } else {
459
0
            x_dd = DoubleDouble::from_exact_add(x, 1.0);
460
0
        }
461
0
    } else {
462
0
        // |x| < 1
463
0
        x_dd = DoubleDouble::from_exact_add(1.0, x);
464
0
    }
465
466
    const EXP_BIAS: u16 = (1u16 << (11 - 1u16)) - 1u16;
467
468
    // At this point, x_dd is the exact sum of 1 + x:
469
    //   x_dd.hi + x_dd.lo = x + 1.0 exactly.
470
    //   |x_dd.hi| >= 2^-54
471
    //   |x_dd.lo| < ulp(x_dd.hi)
472
473
0
    let xhi_bits = x_dd.hi.to_bits();
474
0
    let xhi_frac = xhi_bits & ((1u64 << 52) - 1);
475
0
    x_u = xhi_bits;
476
    // Range reduction:
477
    // Find k such that |x_hi - k * 2^-7| <= 2^-8.
478
0
    let idx: i32 = ((xhi_frac.wrapping_add(1u64 << (52 - 8))) >> (52 - 7)) as i32;
479
0
    let x_e = (get_exponent_f64(f64::from_bits(xhi_bits)) as i32).wrapping_add(idx >> 7);
480
481
    // Scale x_dd by 2^(-xh_bits.get_exponent()).
482
0
    let s_u: i64 = (x_u & EXP_MASK) as i64 - (EXP_BIAS as i64).wrapping_shl(52);
483
    // Normalize arguments:
484
    //   1 <= m_dd.hi < 2
485
    //   |m_dd.lo| < 2^-52.
486
    // This is exact.
487
0
    let m_hi = 1f64.to_bits() | xhi_frac;
488
489
0
    let m_lo = if x_dd.lo.abs() > x_dd.hi * f64::from_bits(0x3800000000000000) {
490
0
        (x_dd.lo.to_bits() as i64).wrapping_sub(s_u)
491
    } else {
492
0
        0
493
    };
494
495
0
    let m_dd = DoubleDouble::new(f64::from_bits(m_lo as u64), f64::from_bits(m_hi));
496
497
    // Perform range reduction:
498
    //   r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1
499
    //             = (r * m_dd.hi - 1) + r * m_dd.lo
500
    //             = v_hi + (v_lo.hi + v_lo.lo)
501
    // where:
502
    //   v_hi = r * m_dd.hi - 1          (exact)
503
    //   v_lo.hi + v_lo.lo = r * m_dd.lo (exact)
504
    // Bounds on the values:
505
    //   -0x1.69000000000edp-8 < r * m - 1 < 0x1.7f00000000081p-8
506
    //   |v_lo.hi| <= |r| * |m_dd.lo| < 2^-52
507
    //   |v_lo.lo| < ulp(v_lo.hi) <= 2^(-52 - 53) = 2^(-105)
508
0
    let r = R1[idx as usize];
509
    let v_hi;
510
0
    let v_lo = DoubleDouble::from_exact_mult(m_dd.lo, f64::from_bits(r));
511
512
    #[cfg(any(
513
        all(
514
            any(target_arch = "x86", target_arch = "x86_64"),
515
            target_feature = "fma"
516
        ),
517
        target_arch = "aarch64"
518
    ))]
519
    {
520
        v_hi = f_fmla(f64::from_bits(r), m_dd.hi, -1.0); // Exact.
521
    }
522
523
    #[cfg(not(any(
524
        all(
525
            any(target_arch = "x86", target_arch = "x86_64"),
526
            target_feature = "fma"
527
        ),
528
        target_arch = "aarch64"
529
    )))]
530
0
    {
531
0
        let c = f64::from_bits(
532
0
            (idx as u64)
533
0
                .wrapping_shl(52 - 7)
534
0
                .wrapping_add(0x3FF0_0000_0000_0000u64),
535
0
        );
536
0
        v_hi = f_fmla(
537
0
            f64::from_bits(r),
538
0
            m_dd.hi - c,
539
0
            f64::from_bits(RCM1[idx as usize]),
540
0
        ); // Exact
541
0
    }
542
543
    // Range reduction output:
544
    //   -0x1.69000000000edp-8 < v_hi + v_lo < 0x1.7f00000000081p-8
545
    //   |v_dd.lo| < ulp(v_dd.hi) <= 2^(-7 - 53) = 2^-60
546
0
    let mut v_dd = DoubleDouble::from_exact_add(v_hi, v_lo.hi);
547
0
    v_dd.lo += v_lo.lo;
548
549
0
    log1p_accurate(x_e, idx as usize, v_dd)
550
0
}
551
552
/// Computes log(x+1)
553
///
554
/// Max ULP 0.5
555
0
pub fn f_log1p(x: f64) -> f64 {
556
0
    let mut x_u = x.to_bits();
557
558
0
    let mut x_dd = DoubleDouble::default();
559
560
0
    let x_exp: u16 = ((x_u >> 52) & 0x7ff) as u16;
561
562
    const EXP_BIAS: u16 = (1u16 << (11 - 1u16)) - 1u16;
563
564
0
    let e = (((x_u >> 52) & 0x7ff) as i32).wrapping_sub(0x3ff);
565
566
0
    if e == 0x400 || x == 0. || x <= -1.0 {
567
        /* case NaN/Inf, +/-0 or x <= -1 */
568
0
        if e == 0x400 && x.to_bits() != 0xfffu64 << 52 {
569
            /* NaN or + Inf*/
570
0
            return x + x;
571
0
        }
572
0
        if x <= -1.0
573
        /* we use the fact that NaN < -1 is false */
574
        {
575
            /* log2p(x<-1) is NaN, log2p(-1) is -Inf and raises DivByZero */
576
0
            return if x < -1.0 {
577
0
                f64::NAN
578
            } else {
579
                // x=-1
580
0
                f64::NEG_INFINITY
581
            };
582
0
        }
583
0
        return x + x; /* +/-0 */
584
0
    }
585
586
0
    let ax = x_u.wrapping_shl(1);
587
588
0
    if ax < 0x7f60000000000000u64 {
589
        // |x| < 0.0625
590
        // check case x tiny first to avoid spurious underflow in x*x
591
0
        if ax < 0x7940000000000000u64 {
592
            // |x| < 0x1p-53
593
0
            if ax == 0 {
594
0
                return x;
595
0
            }
596
            /* we have underflow when |x| < 2^-1022, or when |x| = 2^-1022 and
597
            the result is smaller than 2^-1022 in absolute value */
598
0
            let res = dyad_fmla(x.abs(), f64::from_bits(0xbc90000000000000), x);
599
0
            return res;
600
0
        }
601
0
    }
602
603
0
    if x_exp >= EXP_BIAS {
604
        // |x| >= 1
605
0
        if x_u >= 0x4650_0000_0000_0000u64 {
606
0
            x_dd.hi = x;
607
0
        } else {
608
0
            x_dd = DoubleDouble::from_exact_add(x, 1.0);
609
0
        }
610
    } else {
611
        // |x| < 1
612
0
        if x_exp < EXP_BIAS - 52 - 1 {
613
            // Quick return when |x| < 2^-53.
614
            // Since log(1 + x) = x - x^2/2 + x^3/3 - ...,
615
            // for |x| < 2^-53,
616
            //   x > log(1 + x) > x - x^2 > x(1 - 2^-54) > x - ulp(x)/2
617
            // Thus,
618
            //   log(1 + x) = nextafter(x, -inf) for FE_DOWNWARD, or
619
            //                                       FE_TOWARDZERO and x > 0,
620
            //              = x                  otherwise.
621
0
            if x == 0.0 {
622
0
                return x + x;
623
0
            }
624
625
0
            let tp = 1.0f32;
626
0
            let tn = -1.0f32;
627
0
            let rdp = tp - f32::from_bits(0x3d594caf) != tp;
628
0
            let rdn = tn - f32::from_bits(0x33800000) != tn;
629
630
0
            if x > 0. && rdp {
631
0
                return f64::from_bits(x_u - 1);
632
0
            }
633
634
0
            if x < 0. && rdn {
635
0
                return f64::from_bits(x_u + 1);
636
0
            }
637
638
0
            return if x + x == 0.0 { x + x } else { x };
639
0
        }
640
0
        x_dd = DoubleDouble::from_exact_add(1.0, x);
641
    }
642
643
    // At this point, x_dd is the exact sum of 1 + x:
644
    //   x_dd.hi + x_dd.lo = x + 1.0 exactly.
645
    //   |x_dd.hi| >= 2^-54
646
    //   |x_dd.lo| < ulp(x_dd.hi)
647
648
0
    let xhi_bits = x_dd.hi.to_bits();
649
0
    let xhi_frac = xhi_bits & ((1u64 << 52) - 1);
650
0
    x_u = xhi_bits;
651
    // Range reduction:
652
    // Find k such that |x_hi - k * 2^-7| <= 2^-8.
653
0
    let idx: i32 = ((xhi_frac.wrapping_add(1u64 << (52 - 8))) >> (52 - 7)) as i32;
654
0
    let x_e = (get_exponent_f64(f64::from_bits(xhi_bits)) as i32).wrapping_add(idx >> 7);
655
0
    let e_x = x_e as f64;
656
657
    const LOG_2: DoubleDouble = DoubleDouble::new(
658
        f64::from_bits(0x3d2ef35793c76730),
659
        f64::from_bits(0x3fe62e42fefa3800),
660
    );
661
662
    // hi is exact
663
    // ulp(hi) = ulp(LOG_2_HI) = ulp(LOG_R1_DD[idx].hi) = 2^-43
664
665
0
    let r_dd = LOG_R1_DD[idx as usize];
666
667
0
    let hi = f_fmla(e_x, LOG_2.hi, f64::from_bits(r_dd.1));
668
    // lo errors < |e_x| * ulp(LOG_2_LO) + ulp(LOG_R1[idx].lo)
669
    //           <= 2^11 * 2^(-43-53) = 2^-85
670
0
    let lo = f_fmla(e_x, LOG_2.lo, f64::from_bits(r_dd.0));
671
672
    // Scale x_dd by 2^(-xh_bits.get_exponent()).
673
0
    let s_u: i64 = (x_u & EXP_MASK) as i64 - (EXP_BIAS as i64).wrapping_shl(52);
674
    // Normalize arguments:
675
    //   1 <= m_dd.hi < 2
676
    //   |m_dd.lo| < 2^-52.
677
    // This is exact.
678
0
    let m_hi = 1f64.to_bits() | xhi_frac;
679
680
0
    let m_lo = if x_dd.lo.abs() > x_dd.hi * f64::from_bits(0x3800000000000000) {
681
0
        (x_dd.lo.to_bits() as i64).wrapping_sub(s_u)
682
    } else {
683
0
        0
684
    };
685
686
0
    let m_dd = DoubleDouble::new(f64::from_bits(m_lo as u64), f64::from_bits(m_hi));
687
688
    // Perform range reduction:
689
    //   r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1
690
    //             = (r * m_dd.hi - 1) + r * m_dd.lo
691
    //             = v_hi + (v_lo.hi + v_lo.lo)
692
    // where:
693
    //   v_hi = r * m_dd.hi - 1          (exact)
694
    //   v_lo.hi + v_lo.lo = r * m_dd.lo (exact)
695
    // Bounds on the values:
696
    //   -0x1.69000000000edp-8 < r * m - 1 < 0x1.7f00000000081p-8
697
    //   |v_lo.hi| <= |r| * |m_dd.lo| < 2^-52
698
    //   |v_lo.lo| < ulp(v_lo.hi) <= 2^(-52 - 53) = 2^(-105)
699
0
    let r = R1[idx as usize];
700
    let v_hi;
701
0
    let v_lo = DoubleDouble::from_exact_mult(m_dd.lo, f64::from_bits(r));
702
703
    #[cfg(any(
704
        all(
705
            any(target_arch = "x86", target_arch = "x86_64"),
706
            target_feature = "fma"
707
        ),
708
        target_arch = "aarch64"
709
    ))]
710
    {
711
        v_hi = f_fmla(f64::from_bits(r), m_dd.hi, -1.0); // Exact.
712
    }
713
714
    #[cfg(not(any(
715
        all(
716
            any(target_arch = "x86", target_arch = "x86_64"),
717
            target_feature = "fma"
718
        ),
719
        target_arch = "aarch64"
720
    )))]
721
0
    {
722
0
        let c = f64::from_bits(
723
0
            (idx as u64)
724
0
                .wrapping_shl(52 - 7)
725
0
                .wrapping_add(0x3FF0_0000_0000_0000u64),
726
0
        );
727
0
        v_hi = f_fmla(
728
0
            f64::from_bits(r),
729
0
            m_dd.hi - c,
730
0
            f64::from_bits(RCM1[idx as usize]),
731
0
        ); // Exact
732
0
    }
733
734
    // Range reduction output:
735
    //   -0x1.69000000000edp-8 < v_hi + v_lo < 0x1.7f00000000081p-8
736
    //   |v_dd.lo| < ulp(v_dd.hi) <= 2^(-7 - 53) = 2^-60
737
0
    let mut v_dd = DoubleDouble::from_exact_add(v_hi, v_lo.hi);
738
0
    v_dd.lo += v_lo.lo;
739
740
    // Exact sum:
741
    //   r1.hi + r1.lo = e_x * log(2)_hi - log(r)_hi + u
742
0
    let r1 = DoubleDouble::from_exact_add(hi, v_dd.hi);
743
744
    // Degree-7 minimax polynomial log(1 + v) ~ v - v^2 / 2 + ...
745
    // generated by Sollya with:
746
    // > P = fpminimax(log(1 + x)/x, 6, [|1, 1, D...|],
747
    //                 [-0x1.69000000000edp-8, 0x1.7f00000000081p-8]);
748
    const P_COEFFS: [u64; 6] = [
749
        0xbfe0000000000000,
750
        0x3fd5555555555166,
751
        0xbfcfffffffdb7746,
752
        0x3fc99999a8718a60,
753
        0xbfc555874ce8ce22,
754
        0x3fc24335555ddbe5,
755
    ];
756
757
    //   C * ulp(v_sq) + err_hi
758
0
    let v_sq = v_dd.hi * v_dd.hi;
759
0
    let p0 = f_fmla(
760
0
        v_dd.hi,
761
0
        f64::from_bits(P_COEFFS[1]),
762
0
        f64::from_bits(P_COEFFS[0]),
763
    );
764
0
    let p1 = f_fmla(
765
0
        v_dd.hi,
766
0
        f64::from_bits(P_COEFFS[3]),
767
0
        f64::from_bits(P_COEFFS[2]),
768
    );
769
0
    let p2 = f_fmla(
770
0
        v_dd.hi,
771
0
        f64::from_bits(P_COEFFS[5]),
772
0
        f64::from_bits(P_COEFFS[4]),
773
    );
774
0
    let p = f_polyeval4(v_sq, (v_dd.lo + r1.lo) + lo, p0, p1, p2);
775
776
    const ERR_HI: [f64; 2] = [f64::from_bits(0x3aa0000000000000), 0.0];
777
0
    let err_hi = ERR_HI[if hi == 0.0 { 1 } else { 0 }];
778
779
0
    let err = f_fmla(v_sq, f64::from_bits(0x3ce0000000000000), err_hi);
780
781
0
    let left = r1.hi + (p - err);
782
0
    let right = r1.hi + (p + err);
783
    // Ziv's test to see if fast pass is accurate enough.
784
0
    if left == right {
785
0
        return left;
786
0
    }
787
0
    log1p_accurate_dd(x)
788
0
}
789
790
#[cold]
791
0
fn log1p_accurate_dd(x: f64) -> f64 {
792
0
    log1p_dd(x).to_f64()
793
0
}
794
795
#[cfg(test)]
796
mod tests {
797
    use super::*;
798
799
    #[test]
800
    fn test_log1p() {
801
        assert_eq!(
802
            f_log1p(-0.0000000000000003834186599935256),
803
            -0.00000000000000038341865999352564
804
        );
805
        assert_eq!(f_log1p(0.000032417476177515677), 0.000032416950742490306);
806
        assert_eq!(f_log1p(-0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001866527236137164),
807
                   -0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001866527236137164);
808
        assert_eq!(f_log1p(-0.0016481876264151651), -0.0016495473819346394);
809
        assert_eq!(f_log1p(-0.55), -0.7985076962177717);
810
        assert_eq!(f_log1p(-0.65), -1.0498221244986778);
811
        assert_eq!(f_log1p(1.65), 0.9745596399981308);
812
        assert!(f_log1p(-2.).is_nan());
813
    }
814
}