Coverage Report

Created: 2026-01-19 07:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/exponents/expm1.rs
Line
Count
Source
1
/*
2
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3
 * //
4
 * // Redistribution and use in source and binary forms, with or without modification,
5
 * // are permitted provided that the following conditions are met:
6
 * //
7
 * // 1.  Redistributions of source code must retain the above copyright notice, this
8
 * // list of conditions and the following disclaimer.
9
 * //
10
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11
 * // this list of conditions and the following disclaimer in the documentation
12
 * // and/or other materials provided with the distribution.
13
 * //
14
 * // 3.  Neither the name of the copyright holder nor the names of its
15
 * // contributors may be used to endorse or promote products derived from
16
 * // this software without specific prior written permission.
17
 * //
18
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
use crate::common::{dd_fmla, f_fmla};
30
use crate::double_double::DoubleDouble;
31
use crate::exponents::expf::{ExpfBackend, GenericExpfBackend};
32
use crate::exponents::fast_ldexp;
33
use crate::rounding::CpuRoundTiesEven;
34
use crate::shared_eval::poly_dekker_generic;
35
use std::hint::black_box;
36
37
static TZ: [(u64, u64); 65] = [
38
    (0xbc6797d4686c5393, 0xbfcc5041854df7d4),
39
    (0xbc8ea1cb9d163339, 0xbfcb881a23aebb48),
40
    (0x3c8f483a3e8cd60f, 0xbfcabe60e1f21838),
41
    (0x3c7dffd920f493db, 0xbfc9f3129931fab0),
42
    (0xbc851bfdbb129094, 0xbfc9262c1c3430a0),
43
    (0x3c8cd3e5225e2206, 0xbfc857aa375db4e4),
44
    (0x3c5e3a6bdaece8f9, 0xbfc78789b0a5e0c0),
45
    (0xbc8daf2ae0c2d3d4, 0xbfc6b5c7478983d8),
46
    (0xbc7fd36226fadd44, 0xbfc5e25fb4fde210),
47
    (0x3c7d887cd0341ab0, 0xbfc50d4fab639758),
48
    (0xbc8676a52a1a618b, 0xbfc43693d679612c),
49
    (0x3c79776b420ad283, 0xbfc35e28db4ecd9c),
50
    (0x3c73d5fd7d70a5ed, 0xbfc2840b5836cf68),
51
    (0x3c5a94ad2c8fa0bf, 0xbfc1a837e4ba3760),
52
    (0x3c26ad4c353465b0, 0xbfc0caab118a1278),
53
    (0xbc78bba170e59b65, 0xbfbfd6c2d0e3d910),
54
    (0xbc8e1e0a76cb0685, 0xbfbe14aed893eef0),
55
    (0x3c8fe131f55e75f8, 0xbfbc4f1331d22d40),
56
    (0xbc8b5beee8bcee31, 0xbfba85e8c62d9c10),
57
    (0xbc77fe9b02c25e9b, 0xbfb8b92870fa2b58),
58
    (0xbc832ae7bdaf1116, 0xbfb6e8caff341fe8),
59
    (0x3c7a6cfe58cbd73b, 0xbfb514c92f634788),
60
    (0x3c68798de3138a56, 0xbfb33d1bb17df2e8),
61
    (0xbc3589321a7ef10b, 0xbfb161bb26cbb590),
62
    (0xbc78d0e700fcfb65, 0xbfaf0540438fd5c0),
63
    (0x3c8473ef07d5dd3b, 0xbfab3f864c080000),
64
    (0xbc838e62149c16e2, 0xbfa7723950130400),
65
    (0xbc508bb6309bd394, 0xbfa39d4a1a77e050),
66
    (0xbc8bad3fd501a227, 0xbf9f8152aee94500),
67
    (0x3c63d27ac39ed253, 0xbf97b88f290230e0),
68
    (0xbc8b60bbd08aac55, 0xbf8fc055004416c0),
69
    (0xbc4a00d03b3359de, 0xbf7fe0154aaeed80),
70
    (0x0000000000000000, 0x0000000000000000),
71
    (0x3c8861931c15e39b, 0x3f80100ab00222c0),
72
    (0x3c77ab864b3e9045, 0x3f90202ad5778e40),
73
    (0x3c74e5659d75e95b, 0x3f984890d9043740),
74
    (0x3c78e0bd083aba81, 0x3fa040ac0224fd90),
75
    (0x3c345cc1cf959b1b, 0x3fa465509d383eb0),
76
    (0xbc8eb6980ce14da7, 0x3fa89246d053d180),
77
    (0x3c77324137d6c342, 0x3facc79f4f5613a0),
78
    (0xbc45272ff30eed1b, 0x3fb082b577d34ed8),
79
    (0xbc81280f19dace1c, 0x3fb2a5dd543ccc50),
80
    (0xbc8d550af31c8ec3, 0x3fb4cd4fc989cd68),
81
    (0x3c87923b72aa582d, 0x3fb6f91575870690),
82
    (0xbc776c2e732457f1, 0x3fb92937074e0cd8),
83
    (0x3c881f5c92a5200f, 0x3fbb5dbd3f681220),
84
    (0x3c8e8ac7a4d3206c, 0x3fbd96b0eff0e790),
85
    (0xbc712db6f4bbe33b, 0x3fbfd41afcba45e8),
86
    (0xbc58c4a5df1ec7e5, 0x3fc10b022db7ae68),
87
    (0xbc6bd4b1c37ea8a2, 0x3fc22e3b09dc54d8),
88
    (0x3c85aeb9860044d0, 0x3fc353bc9fb00b20),
89
    (0xbc64c26602c63fda, 0x3fc47b8b853aafec),
90
    (0xbc87f644c1f9d314, 0x3fc5a5ac59b963cc),
91
    (0x3c8f5aa8ec61fc2d, 0x3fc6d223c5b10638),
92
    (0x3c27ab912c69ffeb, 0x3fc800f67b00d7b8),
93
    (0xbc5b3564bc0ec9cd, 0x3fc9322934f54148),
94
    (0x3c86a7062465be33, 0x3fca65c0b85ac1a8),
95
    (0xbc885718d2ff1bf4, 0x3fcb9bc1d3910094),
96
    (0xbc8045cb0c685e08, 0x3fccd4315e9e0834),
97
    (0xbc16e7fb859d5055, 0x3fce0f143b41a554),
98
    (0x3c851bbdee020603, 0x3fcf4c6f5508ee5c),
99
    (0x3c6e17611afc42c5, 0x3fd04623d0b0f8c8),
100
    (0xbc71c5b2e8735a43, 0x3fd0e7510fd7c564),
101
    (0xbc825fe139c4cffd, 0x3fd189c1ecaeb084),
102
    (0xbc789843c4964554, 0x3fd22d78f0fa061a),
103
];
104
105
pub(crate) static EXPM1_T0: [(u64, u64); 64] = [
106
    (0x0000000000000000, 0x3ff0000000000000),
107
    (0xbc719083535b085e, 0x3ff02c9a3e778061),
108
    (0x3c8d73e2a475b466, 0x3ff059b0d3158574),
109
    (0x3c6186be4bb28500, 0x3ff0874518759bc8),
110
    (0x3c98a62e4adc610a, 0x3ff0b5586cf9890f),
111
    (0x3c403a1727c57b52, 0x3ff0e3ec32d3d1a2),
112
    (0xbc96c51039449b3a, 0x3ff11301d0125b51),
113
    (0xbc932fbf9af1369e, 0x3ff1429aaea92de0),
114
    (0xbc819041b9d78a76, 0x3ff172b83c7d517b),
115
    (0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75),
116
    (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
117
    (0x3c8dc775814a8494, 0x3ff2063b88628cd6),
118
    (0x3c99b07eb6c70572, 0x3ff2387a6e756238),
119
    (0x3c82bd339940e9da, 0x3ff26b4565e27cdd),
120
    (0x3c8612e8afad1256, 0x3ff29e9df51fdee1),
121
    (0x3c90024754db41d4, 0x3ff2d285a6e4030b),
122
    (0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
123
    (0x3c932721843659a6, 0x3ff33c08b26416ff),
124
    (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
125
    (0xbc75e436d661f5e2, 0x3ff3a7db34e59ff7),
126
    (0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
127
    (0xbc5ef3691c309278, 0x3ff4160a21f72e2a),
128
    (0x3c489b7a04ef80d0, 0x3ff44e086061892d),
129
    (0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0),
130
    (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
131
    (0xbc94b309d25957e4, 0x3ff4f9b2769d2ca7),
132
    (0xbc807abe1db13cac, 0x3ff5342b569d4f82),
133
    (0x3c99bb2c011d93ac, 0x3ff56f4736b527da),
134
    (0x3c96324c054647ac, 0x3ff5ab07dd485429),
135
    (0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148),
136
    (0xbc9383c17e40b496, 0x3ff6247eb03a5585),
137
    (0xbc9bb60987591c34, 0x3ff6623882552225),
138
    (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
139
    (0xbc6bbe3a683c88aa, 0x3ff6dfb23c651a2f),
140
    (0xbc816e4786887a9a, 0x3ff71f75e8ec5f74),
141
    (0xbc90245957316dd4, 0x3ff75feb564267c9),
142
    (0xbc841577ee049930, 0x3ff7a11473eb0187),
143
    (0x3c705d02ba15797e, 0x3ff7e2f336cf4e62),
144
    (0xbc9d4c1dd41532d8, 0x3ff82589994cce13),
145
    (0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed),
146
    (0x3c96e9f156864b26, 0x3ff8ace5422aa0db),
147
    (0x3c85cc13a2e3976c, 0x3ff8f1ae99157736),
148
    (0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5),
149
    (0xbc9d185b7c1b85d0, 0x3ff97d829fde4e50),
150
    (0x3c7c7c46b071f2be, 0x3ff9c49182a3f090),
151
    (0xbc9359495d1cd532, 0x3ffa0c667b5de565),
152
    (0xbc9d2f6edb8d41e2, 0x3ffa5503b23e255d),
153
    (0x3c90fac90ef7fd32, 0x3ffa9e6b5579fdbf),
154
    (0x3c97a1cd345dcc82, 0x3ffae89f995ad3ad),
155
    (0xbc62805e3084d708, 0x3ffb33a2b84f15fb),
156
    (0xbc75584f7e54ac3a, 0x3ffb7f76f2fb5e47),
157
    (0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2),
158
    (0x3c811065895048de, 0x3ffc199bdd85529c),
159
    (0x3c92884dff483cac, 0x3ffc67f12e57d14b),
160
    (0x3c7503cbd1e949dc, 0x3ffcb720dcef9069),
161
    (0xbc9cbc3743797a9c, 0x3ffd072d4a07897c),
162
    (0x3c82ed02d75b3706, 0x3ffd5818dcfba487),
163
    (0x3c9c2300696db532, 0x3ffda9e603db3285),
164
    (0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f),
165
    (0x3c839e8980a9cc90, 0x3ffe502ee78b3ff6),
166
    (0xbc9e9c23179c2894, 0x3ffea4afa2a490da),
167
    (0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27),
168
    (0x3c99d3e12dd8a18a, 0x3fff50765b6e4540),
169
    (0x3c874853f3a5931e, 0x3fffa7c1819e90d8),
170
];
171
172
pub(crate) static EXPM1_T1: [(u64, u64); 64] = [
173
    (0x0000000000000000, 0x3ff0000000000000),
174
    (0x3c9ae8e38c59c72a, 0x3ff000b175effdc7),
175
    (0xbc57b5d0d58ea8f4, 0x3ff00162f3904052),
176
    (0x3c94115cb6b16a8e, 0x3ff0021478e11ce6),
177
    (0xbc8d7c96f201bb2e, 0x3ff002c605e2e8cf),
178
    (0x3c984711d4c35ea0, 0x3ff003779a95f959),
179
    (0xbc80484245243778, 0x3ff0042936faa3d8),
180
    (0xbc94b237da2025fa, 0x3ff004dadb113da0),
181
    (0xbc75e00e62d6b30e, 0x3ff0058c86da1c0a),
182
    (0x3c9a1d6cedbb9480, 0x3ff0063e3a559473),
183
    (0xbc94acf197a00142, 0x3ff006eff583fc3d),
184
    (0xbc6eaf2ea42391a6, 0x3ff007a1b865a8ca),
185
    (0x3c7da93f90835f76, 0x3ff0085382faef83),
186
    (0xbc86a79084ab093c, 0x3ff00905554425d4),
187
    (0x3c986364f8fbe8f8, 0x3ff009b72f41a12b),
188
    (0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd),
189
    (0xbc84f6b2a7609f72, 0x3ff00b1afa5abcbf),
190
    (0xbc7e1a258ea8f71a, 0x3ff00bcceb7707ec),
191
    (0x3c74362ca5bc26f2, 0x3ff00c7ee448ee02),
192
    (0x3c9095a56c919d02, 0x3ff00d30e4d0c483),
193
    (0xbc6406ac4e81a646, 0x3ff00de2ed0ee0f5),
194
    (0x3c9b5a6902767e08, 0x3ff00e94fd0398e0),
195
    (0xbc991b2060859320, 0x3ff00f4714af41d3),
196
    (0x3c8427068ab22306, 0x3ff00ff93412315c),
197
    (0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11),
198
    (0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b),
199
    (0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63),
200
    (0xbc734104ee7edae8, 0x3ff012c1fecd613b),
201
    (0xbc72b6aeb6176892, 0x3ff0137444c9b5b5),
202
    (0x3c7a8cd33b8a1bb2, 0x3ff01426927f5278),
203
    (0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f),
204
    (0x3c857ba2dc7e0c72, 0x3ff0158b4517bb88),
205
    (0x3c9b61299ab8cdb8, 0x3ff0163da9fb3335),
206
    (0xbc990565902c5f44, 0x3ff016f0169949ed),
207
    (0x3c870fc41c5c2d54, 0x3ff017a28af25567),
208
    (0x3c94b9a6e145d76c, 0x3ff018550706ab62),
209
    (0xbc7008eff5142bfa, 0x3ff019078ad6a19f),
210
    (0xbc977669f033c7de, 0x3ff019ba16628de2),
211
    (0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3),
212
    (0x3c9371231477ece6, 0x3ff01b1f44af9f9e),
213
    (0x3c75e7626621eb5a, 0x3ff01bd1e77170b4),
214
    (0xbc9bc72b100828a4, 0x3ff01c8491f08f08),
215
    (0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070),
216
    (0x3c816996709da2e2, 0x3ff01de9fe280ac8),
217
    (0xbc8c11f5239bf536, 0x3ff01e9cbfe113ef),
218
    (0x3c8e1d4eb5edc6b4, 0x3ff01f4f8958c1c6),
219
    (0xbc9afb99946ee3f0, 0x3ff020025a8f6a35),
220
    (0xbc98f06d8a148a32, 0x3ff020b533856324),
221
    (0xbc82bf310fc54eb6, 0x3ff02168143b0281),
222
    (0xbc9c95a035eb4176, 0x3ff0221afcb09e3e),
223
    (0xbc9491793e46834c, 0x3ff022cdece68c4f),
224
    (0xbc73e8d0d9c49090, 0x3ff02380e4dd22ad),
225
    (0xbc9314aa16278aa4, 0x3ff02433e494b755),
226
    (0x3c848daf888e9650, 0x3ff024e6ec0da046),
227
    (0x3c856dc8046821f4, 0x3ff02599fb483385),
228
    (0x3c945b42356b9d46, 0x3ff0264d1244c719),
229
    (0xbc7082ef51b61d7e, 0x3ff027003103b10e),
230
    (0x3c72106ed0920a34, 0x3ff027b357854772),
231
    (0xbc9fd4cf26ea5d0e, 0x3ff0286685c9e059),
232
    (0xbc909f8775e78084, 0x3ff02919bbd1d1d8),
233
    (0x3c564cbba902ca28, 0x3ff029ccf99d720a),
234
    (0x3c94383ef231d206, 0x3ff02a803f2d170d),
235
    (0x3c94a47a505b3a46, 0x3ff02b338c811703),
236
    (0x3c9e471202234680, 0x3ff02be6e199c811),
237
];
238
239
static EXPM1_DD1: [(u64, u64); 11] = [
240
    (0x3c65555555555554, 0x3fc5555555555555),
241
    (0x3c45555555555123, 0x3fa5555555555555),
242
    (0x3c01111111118167, 0x3f81111111111111),
243
    (0xbbef49f49e220cea, 0x3f56c16c16c16c17),
244
    (0x3b6a019eff6f919c, 0x3f2a01a01a01a01a),
245
    (0x3b39fcff48a75b41, 0x3efa01a01a01a01a),
246
    (0xbb6c14f73758cd7f, 0x3ec71de3a556c734),
247
    (0x3b3dfce97931018f, 0x3e927e4fb7789f5c),
248
    (0x3afc513da9e4c9c5, 0x3e5ae64567f544e3),
249
    (0x3acca00af84f2b60, 0x3e21eed8eff8d831),
250
    (0x3a8f27ac6000898f, 0x3de6124613a86e8f),
251
];
252
253
static EXPM1_DD2: [(u64, u64); 7] = [
254
    (0x3ff0000000000000, 0x0000000000000000),
255
    (0x3fe0000000000000, 0x39c712f72ecec2cf),
256
    (0x3fc5555555555555, 0x3c65555555554d07),
257
    (0x3fa5555555555555, 0x3c455194d28275da),
258
    (0x3f81111111111111, 0x3c012faa0e1c0f7b),
259
    (0x3f56c16c16da6973, 0xbbf4ba45ab25d2a3),
260
    (0x3f2a01a019eb7f31, 0xbbc9091d845ecd36),
261
];
262
263
#[inline]
264
0
pub(crate) fn opoly_dd_generic<const N: usize>(
265
0
    x: DoubleDouble,
266
0
    poly: [(u64, u64); N],
267
0
) -> DoubleDouble {
268
0
    let zch = poly.last().unwrap();
269
0
    let p0 = DoubleDouble::from_exact_add(f64::from_bits(zch.0), x.lo);
270
0
    let ach = p0.hi;
271
0
    let acl = f64::from_bits(zch.1) + p0.lo;
272
0
    let mut ch = DoubleDouble::new(acl, ach);
273
274
0
    for zch in poly.iter().rev().skip(1) {
275
0
        ch = DoubleDouble::quick_mult_f64(ch, x.hi);
276
0
        let z0 = DoubleDouble::from_bit_pair(*zch);
277
0
        ch = DoubleDouble::add(z0, ch);
278
0
    }
279
280
0
    ch
281
0
}
282
283
#[cold]
284
0
fn as_expm1_accurate(x: f64) -> f64 {
285
    let mut ix;
286
0
    if x.abs() < 0.25 {
287
        const CL: [u64; 6] = [
288
            0x3da93974a8ca5354,
289
            0x3d6ae7f3e71e4908,
290
            0x3d2ae7f357341648,
291
            0x3ce952c7f96664cb,
292
            0x3ca686f8ce633aae,
293
            0x3c62f49b2fbfb5b6,
294
        ];
295
296
0
        let fl0 = f_fmla(x, f64::from_bits(CL[5]), f64::from_bits(CL[4]));
297
0
        let fl1 = f_fmla(x, fl0, f64::from_bits(CL[3]));
298
0
        let fl2 = f_fmla(x, fl1, f64::from_bits(CL[2]));
299
0
        let fl3 = f_fmla(x, fl2, f64::from_bits(CL[1]));
300
301
0
        let fl = x * f_fmla(x, fl3, f64::from_bits(CL[0]));
302
0
        let mut f = opoly_dd_generic(DoubleDouble::new(fl, x), EXPM1_DD1);
303
0
        f = DoubleDouble::quick_mult_f64(f, x);
304
0
        f = DoubleDouble::quick_mult_f64(f, x);
305
0
        f = DoubleDouble::quick_mult_f64(f, x);
306
0
        let hx = 0.5 * x;
307
0
        let dx2dd = DoubleDouble::from_exact_mult(x, hx);
308
0
        f = DoubleDouble::add(dx2dd, f);
309
0
        let v0 = DoubleDouble::from_exact_add(x, f.hi);
310
0
        let v1 = DoubleDouble::from_exact_add(v0.lo, f.lo);
311
0
        let v2 = DoubleDouble::from_exact_add(v0.hi, v1.hi);
312
0
        let mut v3 = DoubleDouble::from_exact_add(v2.lo, v1.lo);
313
0
        ix = v3.hi.to_bits();
314
0
        if (ix & 0x000fffffffffffff) == 0 {
315
0
            let v = v3.lo.to_bits();
316
0
            let d: i64 = ((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64)
317
0
                .wrapping_shl(1)
318
0
                .wrapping_add(1) as i64;
319
0
            ix = ix.wrapping_add(d as u64);
320
0
            v3.hi = f64::from_bits(ix);
321
0
        }
322
0
        v3.hi + v2.hi
323
    } else {
324
        const S: f64 = f64::from_bits(0x40b71547652b82fe);
325
0
        let t = (x * S).cpu_round_ties_even();
326
0
        let jt: i64 = t as i64;
327
0
        let i0 = (jt >> 6) & 0x3f;
328
0
        let i1 = jt & 0x3f;
329
0
        let ie = jt >> 12;
330
0
        let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i0 as usize]);
331
0
        let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
332
333
0
        let bt = DoubleDouble::quick_mult(t0, t1);
334
335
        const L2H: f64 = f64::from_bits(0x3f262e42ff000000);
336
        const L2L: f64 = f64::from_bits(0x3d0718432a1b0e26);
337
        const L2LL: f64 = f64::from_bits(0x3999ff0342542fc3);
338
339
0
        let dx = f_fmla(-L2H, t, x);
340
0
        let dxl = L2L * t;
341
0
        let dxll = f_fmla(L2LL, t, dd_fmla(L2L, t, -dxl));
342
0
        let dxh = dx + dxl;
343
0
        let dxl = (dx - dxh) + dxl + dxll;
344
0
        let mut f = poly_dekker_generic(DoubleDouble::new(dxl, dxh), EXPM1_DD2);
345
0
        f = DoubleDouble::quick_mult(DoubleDouble::new(dxl, dxh), f);
346
0
        f = DoubleDouble::quick_mult(f, bt);
347
0
        f = DoubleDouble::add(bt, f);
348
0
        let off: u64 = (2048i64 + 1023i64).wrapping_sub(ie).wrapping_shl(52) as u64;
349
        let e: f64;
350
0
        if ie < 53 {
351
0
            let fhz = DoubleDouble::from_exact_add(f64::from_bits(off), f.hi);
352
0
            f.hi = fhz.hi;
353
0
            e = fhz.lo;
354
0
        } else if ie < 104 {
355
0
            let fhz = DoubleDouble::from_exact_add(f.hi, f64::from_bits(off));
356
0
            f.hi = fhz.hi;
357
0
            e = fhz.lo;
358
0
        } else {
359
0
            e = 0.;
360
0
        }
361
0
        f.lo += e;
362
0
        let dst = DoubleDouble::from_exact_add(f.hi, f.lo);
363
0
        fast_ldexp(dst.hi, ie as i32)
364
    }
365
0
}
366
367
#[inline(always)]
368
0
fn expm1_gen<B: ExpfBackend>(x: f64, backend: B) -> f64 {
369
0
    let ix = x.to_bits();
370
0
    let aix: u64 = ix & 0x7fff_ffff_ffff_ffff;
371
0
    if aix < 0x3fd0000000000000u64 {
372
0
        if aix < 0x3ca0000000000000u64 {
373
0
            if aix == 0 {
374
0
                return x;
375
0
            }
376
0
            return backend.dyad_fma(f64::from_bits(0x3c90000000000000), x.abs(), x);
377
0
        }
378
0
        let sx = f64::from_bits(0x4060000000000000) * x;
379
0
        let fx = backend.round_ties_even(sx);
380
0
        let z = sx - fx;
381
0
        let z2 = z * z;
382
0
        let i: i64 = unsafe {
383
0
            fx.to_int_unchecked::<i64>() // fx is already integer, this is just a conversion
384
        };
385
0
        let t = DoubleDouble::from_bit_pair(TZ[i.wrapping_add(32) as usize]);
386
        const C: [u64; 6] = [
387
            0x3f80000000000000,
388
            0x3f00000000000000,
389
            0x3e755555555551ad,
390
            0x3de555555555599c,
391
            0x3d511111ad1ad69d,
392
            0x3cb6c16c168b1fb5,
393
        ];
394
0
        let fh = z * f64::from_bits(C[0]);
395
396
0
        let fl0 = backend.fma(z, f64::from_bits(C[5]), f64::from_bits(C[4]));
397
0
        let fl1 = backend.fma(z, f64::from_bits(C[2]), f64::from_bits(C[1]));
398
399
0
        let fw0 = backend.fma(z, fl0, f64::from_bits(C[3]));
400
401
0
        let fl = z2 * backend.fma(z2, fw0, fl1);
402
0
        let mut f = DoubleDouble::new(fl, fh);
403
0
        let e0 = f64::from_bits(0x3bea000000000000);
404
0
        let eps = z2 * e0 + f64::from_bits(0x3970000000000000);
405
0
        let mut r = DoubleDouble::from_exact_add(t.hi, f.hi);
406
0
        r.lo += t.lo + f.lo;
407
0
        f = backend.quick_mult(t, f);
408
0
        f = DoubleDouble::add(r, f);
409
0
        let ub = f.hi + (f.lo + eps);
410
0
        let lb = f.hi + (f.lo - eps);
411
0
        if ub != lb {
412
0
            return as_expm1_accurate(x);
413
0
        }
414
0
        lb
415
    } else {
416
0
        if aix >= 0x40862e42fefa39f0u64 {
417
0
            if aix > 0x7ff0000000000000u64 {
418
0
                return x + x;
419
0
            } // nan
420
0
            if aix == 0x7ff0000000000000u64 {
421
0
                return if (ix >> 63) != 0 { -1.0 } else { x };
422
0
            }
423
0
            if (ix >> 63) == 0 {
424
                const Z: f64 = f64::from_bits(0x7fe0000000000000);
425
0
                return black_box(Z) * black_box(Z);
426
0
            }
427
0
        }
428
0
        if ix >= 0xc0425e4f7b2737fau64 {
429
0
            if ix >= 0xc042b708872320e2u64 {
430
0
                return black_box(-1.0) + black_box(f64::from_bits(0x3c80000000000000));
431
0
            }
432
0
            return (f64::from_bits(0x40425e4f7b2737fa) + x + f64::from_bits(0x3cc8486612173c69))
433
0
                * f64::from_bits(0x3c971547652b82fe)
434
0
                - f64::from_bits(0x3fefffffffffffff);
435
0
        }
436
437
        const S: f64 = f64::from_bits(0x40b71547652b82fe);
438
0
        let t = backend.round_ties_even(x * S);
439
0
        let jt: i64 = unsafe {
440
0
            t.to_int_unchecked::<i64>() // t is already integer, this is just a conversion
441
        };
442
0
        let i0 = (jt >> 6) & 0x3f;
443
0
        let i1 = jt & 0x3f;
444
0
        let ie = jt >> 12;
445
0
        let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i0 as usize]);
446
0
        let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
447
448
0
        let bt = backend.quick_mult(t0, t1);
449
450
        const L2H: f64 = f64::from_bits(0x3f262e42ff000000);
451
        const L2L: f64 = f64::from_bits(0x3d0718432a1b0e26);
452
0
        let dx = backend.dd_fma(L2L, t, backend.fma(-L2H, t, x));
453
0
        let dx2 = dx * dx;
454
455
        const CH: [u64; 4] = [
456
            0x3ff0000000000000,
457
            0x3fe0000000000000,
458
            0x3fc55555557e54ff,
459
            0x3fa55555553a12f4,
460
        ];
461
462
0
        let p0 = backend.fma(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
463
0
        let p1 = backend.fma(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
464
465
0
        let p = backend.fma(dx2, p0, p1);
466
0
        let mut fh = bt.hi;
467
0
        let tx = bt.hi * dx;
468
0
        let mut fl = backend.fma(tx, p, bt.lo);
469
0
        let eps = f64::from_bits(0x3c0833beace2b6fe) * bt.hi;
470
471
0
        let off: u64 = ((2048i64 + 1023i64).wrapping_sub(ie) as u64).wrapping_shl(52);
472
        let e: f64;
473
0
        if ie < 53 {
474
0
            let flz = DoubleDouble::from_exact_add(f64::from_bits(off), fh);
475
0
            e = flz.lo;
476
0
            fh = flz.hi;
477
0
        } else if ie < 75 {
478
0
            let flz = DoubleDouble::from_exact_add(fh, f64::from_bits(off));
479
0
            e = flz.lo;
480
0
            fh = flz.hi;
481
0
        } else {
482
0
            e = 0.;
483
0
        }
484
0
        fl += e;
485
0
        let ub = fh + (fl + eps);
486
0
        let lb = fh + (fl - eps);
487
0
        if ub != lb {
488
0
            return as_expm1_accurate(x);
489
0
        }
490
0
        fast_ldexp(lb, ie as i32)
491
    }
492
0
}
Unexecuted instantiation: pxfm::exponents::expm1::expm1_gen::<pxfm::exponents::expf::FmaBackend>
Unexecuted instantiation: pxfm::exponents::expm1::expm1_gen::<pxfm::exponents::expf::GenericExpfBackend>
493
494
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
495
#[target_feature(enable = "avx", enable = "fma")]
496
0
unsafe fn expm1_fma_impl(x: f64) -> f64 {
497
    use crate::exponents::expf::FmaBackend;
498
0
    expm1_gen(x, FmaBackend {})
499
0
}
500
501
/// Computes e^x - 1
502
///
503
/// Max found ULP 0.5
504
0
pub fn f_expm1(x: f64) -> f64 {
505
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
506
    {
507
        expm1_gen(x, GenericExpfBackend {})
508
    }
509
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
510
    {
511
        use std::sync::OnceLock;
512
        static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
513
0
        let q = EXECUTOR.get_or_init(|| {
514
0
            if std::arch::is_x86_feature_detected!("avx")
515
0
                && std::arch::is_x86_feature_detected!("fma")
516
            {
517
0
                expm1_fma_impl
518
            } else {
519
0
                fn def_expm1(x: f64) -> f64 {
520
0
                    expm1_gen(x, GenericExpfBackend {})
521
0
                }
522
0
                def_expm1
523
            }
524
0
        });
525
0
        unsafe { q(x) }
526
    }
527
0
}
528
529
#[cfg(test)]
530
mod tests {
531
    use super::*;
532
533
    #[test]
534
    fn test_expm1() {
535
        assert_eq!(f_expm1(0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000028321017343872864),
536
                   0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000028321017343872864 );
537
        assert_eq!(f_expm1(36.52188110363568), 7265264535836525.);
538
        assert_eq!(f_expm1(2.), 6.38905609893065);
539
        assert_eq!(f_expm1(0.4321321), 0.5405386068701713);
540
        assert_eq!(f_expm1(-0.0000004321321), -4.321320066309375e-7);
541
    }
542
}