/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.26/src/exponents/expm1.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::{dd_fmla, dyad_fmla, f_fmla}; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use crate::exponents::fast_ldexp; |
32 | | use crate::rounding::CpuRoundTiesEven; |
33 | | use crate::shared_eval::poly_dekker_generic; |
34 | | use std::hint::black_box; |
35 | | |
36 | | static TZ: [(u64, u64); 65] = [ |
37 | | (0xbc6797d4686c5393, 0xbfcc5041854df7d4), |
38 | | (0xbc8ea1cb9d163339, 0xbfcb881a23aebb48), |
39 | | (0x3c8f483a3e8cd60f, 0xbfcabe60e1f21838), |
40 | | (0x3c7dffd920f493db, 0xbfc9f3129931fab0), |
41 | | (0xbc851bfdbb129094, 0xbfc9262c1c3430a0), |
42 | | (0x3c8cd3e5225e2206, 0xbfc857aa375db4e4), |
43 | | (0x3c5e3a6bdaece8f9, 0xbfc78789b0a5e0c0), |
44 | | (0xbc8daf2ae0c2d3d4, 0xbfc6b5c7478983d8), |
45 | | (0xbc7fd36226fadd44, 0xbfc5e25fb4fde210), |
46 | | (0x3c7d887cd0341ab0, 0xbfc50d4fab639758), |
47 | | (0xbc8676a52a1a618b, 0xbfc43693d679612c), |
48 | | (0x3c79776b420ad283, 0xbfc35e28db4ecd9c), |
49 | | (0x3c73d5fd7d70a5ed, 0xbfc2840b5836cf68), |
50 | | (0x3c5a94ad2c8fa0bf, 0xbfc1a837e4ba3760), |
51 | | (0x3c26ad4c353465b0, 0xbfc0caab118a1278), |
52 | | (0xbc78bba170e59b65, 0xbfbfd6c2d0e3d910), |
53 | | (0xbc8e1e0a76cb0685, 0xbfbe14aed893eef0), |
54 | | (0x3c8fe131f55e75f8, 0xbfbc4f1331d22d40), |
55 | | (0xbc8b5beee8bcee31, 0xbfba85e8c62d9c10), |
56 | | (0xbc77fe9b02c25e9b, 0xbfb8b92870fa2b58), |
57 | | (0xbc832ae7bdaf1116, 0xbfb6e8caff341fe8), |
58 | | (0x3c7a6cfe58cbd73b, 0xbfb514c92f634788), |
59 | | (0x3c68798de3138a56, 0xbfb33d1bb17df2e8), |
60 | | (0xbc3589321a7ef10b, 0xbfb161bb26cbb590), |
61 | | (0xbc78d0e700fcfb65, 0xbfaf0540438fd5c0), |
62 | | (0x3c8473ef07d5dd3b, 0xbfab3f864c080000), |
63 | | (0xbc838e62149c16e2, 0xbfa7723950130400), |
64 | | (0xbc508bb6309bd394, 0xbfa39d4a1a77e050), |
65 | | (0xbc8bad3fd501a227, 0xbf9f8152aee94500), |
66 | | (0x3c63d27ac39ed253, 0xbf97b88f290230e0), |
67 | | (0xbc8b60bbd08aac55, 0xbf8fc055004416c0), |
68 | | (0xbc4a00d03b3359de, 0xbf7fe0154aaeed80), |
69 | | (0x0000000000000000, 0x0000000000000000), |
70 | | (0x3c8861931c15e39b, 0x3f80100ab00222c0), |
71 | | (0x3c77ab864b3e9045, 0x3f90202ad5778e40), |
72 | | (0x3c74e5659d75e95b, 0x3f984890d9043740), |
73 | | (0x3c78e0bd083aba81, 0x3fa040ac0224fd90), |
74 | | (0x3c345cc1cf959b1b, 0x3fa465509d383eb0), |
75 | | (0xbc8eb6980ce14da7, 0x3fa89246d053d180), |
76 | | (0x3c77324137d6c342, 0x3facc79f4f5613a0), |
77 | | (0xbc45272ff30eed1b, 0x3fb082b577d34ed8), |
78 | | (0xbc81280f19dace1c, 0x3fb2a5dd543ccc50), |
79 | | (0xbc8d550af31c8ec3, 0x3fb4cd4fc989cd68), |
80 | | (0x3c87923b72aa582d, 0x3fb6f91575870690), |
81 | | (0xbc776c2e732457f1, 0x3fb92937074e0cd8), |
82 | | (0x3c881f5c92a5200f, 0x3fbb5dbd3f681220), |
83 | | (0x3c8e8ac7a4d3206c, 0x3fbd96b0eff0e790), |
84 | | (0xbc712db6f4bbe33b, 0x3fbfd41afcba45e8), |
85 | | (0xbc58c4a5df1ec7e5, 0x3fc10b022db7ae68), |
86 | | (0xbc6bd4b1c37ea8a2, 0x3fc22e3b09dc54d8), |
87 | | (0x3c85aeb9860044d0, 0x3fc353bc9fb00b20), |
88 | | (0xbc64c26602c63fda, 0x3fc47b8b853aafec), |
89 | | (0xbc87f644c1f9d314, 0x3fc5a5ac59b963cc), |
90 | | (0x3c8f5aa8ec61fc2d, 0x3fc6d223c5b10638), |
91 | | (0x3c27ab912c69ffeb, 0x3fc800f67b00d7b8), |
92 | | (0xbc5b3564bc0ec9cd, 0x3fc9322934f54148), |
93 | | (0x3c86a7062465be33, 0x3fca65c0b85ac1a8), |
94 | | (0xbc885718d2ff1bf4, 0x3fcb9bc1d3910094), |
95 | | (0xbc8045cb0c685e08, 0x3fccd4315e9e0834), |
96 | | (0xbc16e7fb859d5055, 0x3fce0f143b41a554), |
97 | | (0x3c851bbdee020603, 0x3fcf4c6f5508ee5c), |
98 | | (0x3c6e17611afc42c5, 0x3fd04623d0b0f8c8), |
99 | | (0xbc71c5b2e8735a43, 0x3fd0e7510fd7c564), |
100 | | (0xbc825fe139c4cffd, 0x3fd189c1ecaeb084), |
101 | | (0xbc789843c4964554, 0x3fd22d78f0fa061a), |
102 | | ]; |
103 | | |
104 | | pub(crate) static EXPM1_T0: [(u64, u64); 64] = [ |
105 | | (0x0000000000000000, 0x3ff0000000000000), |
106 | | (0xbc719083535b085e, 0x3ff02c9a3e778061), |
107 | | (0x3c8d73e2a475b466, 0x3ff059b0d3158574), |
108 | | (0x3c6186be4bb28500, 0x3ff0874518759bc8), |
109 | | (0x3c98a62e4adc610a, 0x3ff0b5586cf9890f), |
110 | | (0x3c403a1727c57b52, 0x3ff0e3ec32d3d1a2), |
111 | | (0xbc96c51039449b3a, 0x3ff11301d0125b51), |
112 | | (0xbc932fbf9af1369e, 0x3ff1429aaea92de0), |
113 | | (0xbc819041b9d78a76, 0x3ff172b83c7d517b), |
114 | | (0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75), |
115 | | (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa), |
116 | | (0x3c8dc775814a8494, 0x3ff2063b88628cd6), |
117 | | (0x3c99b07eb6c70572, 0x3ff2387a6e756238), |
118 | | (0x3c82bd339940e9da, 0x3ff26b4565e27cdd), |
119 | | (0x3c8612e8afad1256, 0x3ff29e9df51fdee1), |
120 | | (0x3c90024754db41d4, 0x3ff2d285a6e4030b), |
121 | | (0x3c86f46ad23182e4, 0x3ff306fe0a31b715), |
122 | | (0x3c932721843659a6, 0x3ff33c08b26416ff), |
123 | | (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb), |
124 | | (0xbc75e436d661f5e2, 0x3ff3a7db34e59ff7), |
125 | | (0x3c8ada0911f09ebc, 0x3ff3dea64c123422), |
126 | | (0xbc5ef3691c309278, 0x3ff4160a21f72e2a), |
127 | | (0x3c489b7a04ef80d0, 0x3ff44e086061892d), |
128 | | (0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0), |
129 | | (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27), |
130 | | (0xbc94b309d25957e4, 0x3ff4f9b2769d2ca7), |
131 | | (0xbc807abe1db13cac, 0x3ff5342b569d4f82), |
132 | | (0x3c99bb2c011d93ac, 0x3ff56f4736b527da), |
133 | | (0x3c96324c054647ac, 0x3ff5ab07dd485429), |
134 | | (0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148), |
135 | | (0xbc9383c17e40b496, 0x3ff6247eb03a5585), |
136 | | (0xbc9bb60987591c34, 0x3ff6623882552225), |
137 | | (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd), |
138 | | (0xbc6bbe3a683c88aa, 0x3ff6dfb23c651a2f), |
139 | | (0xbc816e4786887a9a, 0x3ff71f75e8ec5f74), |
140 | | (0xbc90245957316dd4, 0x3ff75feb564267c9), |
141 | | (0xbc841577ee049930, 0x3ff7a11473eb0187), |
142 | | (0x3c705d02ba15797e, 0x3ff7e2f336cf4e62), |
143 | | (0xbc9d4c1dd41532d8, 0x3ff82589994cce13), |
144 | | (0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed), |
145 | | (0x3c96e9f156864b26, 0x3ff8ace5422aa0db), |
146 | | (0x3c85cc13a2e3976c, 0x3ff8f1ae99157736), |
147 | | (0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5), |
148 | | (0xbc9d185b7c1b85d0, 0x3ff97d829fde4e50), |
149 | | (0x3c7c7c46b071f2be, 0x3ff9c49182a3f090), |
150 | | (0xbc9359495d1cd532, 0x3ffa0c667b5de565), |
151 | | (0xbc9d2f6edb8d41e2, 0x3ffa5503b23e255d), |
152 | | (0x3c90fac90ef7fd32, 0x3ffa9e6b5579fdbf), |
153 | | (0x3c97a1cd345dcc82, 0x3ffae89f995ad3ad), |
154 | | (0xbc62805e3084d708, 0x3ffb33a2b84f15fb), |
155 | | (0xbc75584f7e54ac3a, 0x3ffb7f76f2fb5e47), |
156 | | (0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2), |
157 | | (0x3c811065895048de, 0x3ffc199bdd85529c), |
158 | | (0x3c92884dff483cac, 0x3ffc67f12e57d14b), |
159 | | (0x3c7503cbd1e949dc, 0x3ffcb720dcef9069), |
160 | | (0xbc9cbc3743797a9c, 0x3ffd072d4a07897c), |
161 | | (0x3c82ed02d75b3706, 0x3ffd5818dcfba487), |
162 | | (0x3c9c2300696db532, 0x3ffda9e603db3285), |
163 | | (0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f), |
164 | | (0x3c839e8980a9cc90, 0x3ffe502ee78b3ff6), |
165 | | (0xbc9e9c23179c2894, 0x3ffea4afa2a490da), |
166 | | (0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27), |
167 | | (0x3c99d3e12dd8a18a, 0x3fff50765b6e4540), |
168 | | (0x3c874853f3a5931e, 0x3fffa7c1819e90d8), |
169 | | ]; |
170 | | |
171 | | pub(crate) static EXPM1_T1: [(u64, u64); 64] = [ |
172 | | (0x0000000000000000, 0x3ff0000000000000), |
173 | | (0x3c9ae8e38c59c72a, 0x3ff000b175effdc7), |
174 | | (0xbc57b5d0d58ea8f4, 0x3ff00162f3904052), |
175 | | (0x3c94115cb6b16a8e, 0x3ff0021478e11ce6), |
176 | | (0xbc8d7c96f201bb2e, 0x3ff002c605e2e8cf), |
177 | | (0x3c984711d4c35ea0, 0x3ff003779a95f959), |
178 | | (0xbc80484245243778, 0x3ff0042936faa3d8), |
179 | | (0xbc94b237da2025fa, 0x3ff004dadb113da0), |
180 | | (0xbc75e00e62d6b30e, 0x3ff0058c86da1c0a), |
181 | | (0x3c9a1d6cedbb9480, 0x3ff0063e3a559473), |
182 | | (0xbc94acf197a00142, 0x3ff006eff583fc3d), |
183 | | (0xbc6eaf2ea42391a6, 0x3ff007a1b865a8ca), |
184 | | (0x3c7da93f90835f76, 0x3ff0085382faef83), |
185 | | (0xbc86a79084ab093c, 0x3ff00905554425d4), |
186 | | (0x3c986364f8fbe8f8, 0x3ff009b72f41a12b), |
187 | | (0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd), |
188 | | (0xbc84f6b2a7609f72, 0x3ff00b1afa5abcbf), |
189 | | (0xbc7e1a258ea8f71a, 0x3ff00bcceb7707ec), |
190 | | (0x3c74362ca5bc26f2, 0x3ff00c7ee448ee02), |
191 | | (0x3c9095a56c919d02, 0x3ff00d30e4d0c483), |
192 | | (0xbc6406ac4e81a646, 0x3ff00de2ed0ee0f5), |
193 | | (0x3c9b5a6902767e08, 0x3ff00e94fd0398e0), |
194 | | (0xbc991b2060859320, 0x3ff00f4714af41d3), |
195 | | (0x3c8427068ab22306, 0x3ff00ff93412315c), |
196 | | (0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11), |
197 | | (0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b), |
198 | | (0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63), |
199 | | (0xbc734104ee7edae8, 0x3ff012c1fecd613b), |
200 | | (0xbc72b6aeb6176892, 0x3ff0137444c9b5b5), |
201 | | (0x3c7a8cd33b8a1bb2, 0x3ff01426927f5278), |
202 | | (0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f), |
203 | | (0x3c857ba2dc7e0c72, 0x3ff0158b4517bb88), |
204 | | (0x3c9b61299ab8cdb8, 0x3ff0163da9fb3335), |
205 | | (0xbc990565902c5f44, 0x3ff016f0169949ed), |
206 | | (0x3c870fc41c5c2d54, 0x3ff017a28af25567), |
207 | | (0x3c94b9a6e145d76c, 0x3ff018550706ab62), |
208 | | (0xbc7008eff5142bfa, 0x3ff019078ad6a19f), |
209 | | (0xbc977669f033c7de, 0x3ff019ba16628de2), |
210 | | (0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3), |
211 | | (0x3c9371231477ece6, 0x3ff01b1f44af9f9e), |
212 | | (0x3c75e7626621eb5a, 0x3ff01bd1e77170b4), |
213 | | (0xbc9bc72b100828a4, 0x3ff01c8491f08f08), |
214 | | (0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070), |
215 | | (0x3c816996709da2e2, 0x3ff01de9fe280ac8), |
216 | | (0xbc8c11f5239bf536, 0x3ff01e9cbfe113ef), |
217 | | (0x3c8e1d4eb5edc6b4, 0x3ff01f4f8958c1c6), |
218 | | (0xbc9afb99946ee3f0, 0x3ff020025a8f6a35), |
219 | | (0xbc98f06d8a148a32, 0x3ff020b533856324), |
220 | | (0xbc82bf310fc54eb6, 0x3ff02168143b0281), |
221 | | (0xbc9c95a035eb4176, 0x3ff0221afcb09e3e), |
222 | | (0xbc9491793e46834c, 0x3ff022cdece68c4f), |
223 | | (0xbc73e8d0d9c49090, 0x3ff02380e4dd22ad), |
224 | | (0xbc9314aa16278aa4, 0x3ff02433e494b755), |
225 | | (0x3c848daf888e9650, 0x3ff024e6ec0da046), |
226 | | (0x3c856dc8046821f4, 0x3ff02599fb483385), |
227 | | (0x3c945b42356b9d46, 0x3ff0264d1244c719), |
228 | | (0xbc7082ef51b61d7e, 0x3ff027003103b10e), |
229 | | (0x3c72106ed0920a34, 0x3ff027b357854772), |
230 | | (0xbc9fd4cf26ea5d0e, 0x3ff0286685c9e059), |
231 | | (0xbc909f8775e78084, 0x3ff02919bbd1d1d8), |
232 | | (0x3c564cbba902ca28, 0x3ff029ccf99d720a), |
233 | | (0x3c94383ef231d206, 0x3ff02a803f2d170d), |
234 | | (0x3c94a47a505b3a46, 0x3ff02b338c811703), |
235 | | (0x3c9e471202234680, 0x3ff02be6e199c811), |
236 | | ]; |
237 | | |
238 | | static EXPM1_DD1: [(u64, u64); 11] = [ |
239 | | (0x3c65555555555554, 0x3fc5555555555555), |
240 | | (0x3c45555555555123, 0x3fa5555555555555), |
241 | | (0x3c01111111118167, 0x3f81111111111111), |
242 | | (0xbbef49f49e220cea, 0x3f56c16c16c16c17), |
243 | | (0x3b6a019eff6f919c, 0x3f2a01a01a01a01a), |
244 | | (0x3b39fcff48a75b41, 0x3efa01a01a01a01a), |
245 | | (0xbb6c14f73758cd7f, 0x3ec71de3a556c734), |
246 | | (0x3b3dfce97931018f, 0x3e927e4fb7789f5c), |
247 | | (0x3afc513da9e4c9c5, 0x3e5ae64567f544e3), |
248 | | (0x3acca00af84f2b60, 0x3e21eed8eff8d831), |
249 | | (0x3a8f27ac6000898f, 0x3de6124613a86e8f), |
250 | | ]; |
251 | | |
252 | | static EXPM1_DD2: [(u64, u64); 7] = [ |
253 | | (0x3ff0000000000000, 0x0000000000000000), |
254 | | (0x3fe0000000000000, 0x39c712f72ecec2cf), |
255 | | (0x3fc5555555555555, 0x3c65555555554d07), |
256 | | (0x3fa5555555555555, 0x3c455194d28275da), |
257 | | (0x3f81111111111111, 0x3c012faa0e1c0f7b), |
258 | | (0x3f56c16c16da6973, 0xbbf4ba45ab25d2a3), |
259 | | (0x3f2a01a019eb7f31, 0xbbc9091d845ecd36), |
260 | | ]; |
261 | | |
262 | | #[inline] |
263 | 0 | pub(crate) fn opoly_dd_generic<const N: usize>( |
264 | 0 | x: DoubleDouble, |
265 | 0 | poly: [(u64, u64); N], |
266 | 0 | ) -> DoubleDouble { |
267 | 0 | let zch = poly.last().unwrap(); |
268 | 0 | let p0 = DoubleDouble::from_exact_add(f64::from_bits(zch.0), x.lo); |
269 | 0 | let ach = p0.hi; |
270 | 0 | let acl = f64::from_bits(zch.1) + p0.lo; |
271 | 0 | let mut ch = DoubleDouble::new(acl, ach); |
272 | | |
273 | 0 | for zch in poly.iter().rev().skip(1) { |
274 | 0 | ch = DoubleDouble::quick_mult_f64(ch, x.hi); |
275 | 0 | let z0 = DoubleDouble::from_bit_pair(*zch); |
276 | 0 | ch = DoubleDouble::add(z0, ch); |
277 | 0 | } |
278 | | |
279 | 0 | ch |
280 | 0 | } |
281 | | |
282 | | #[cold] |
283 | 0 | fn as_expm1_accurate(x: f64) -> f64 { |
284 | | let mut ix; |
285 | 0 | if x.abs() < 0.25 { |
286 | | const CL: [u64; 6] = [ |
287 | | 0x3da93974a8ca5354, |
288 | | 0x3d6ae7f3e71e4908, |
289 | | 0x3d2ae7f357341648, |
290 | | 0x3ce952c7f96664cb, |
291 | | 0x3ca686f8ce633aae, |
292 | | 0x3c62f49b2fbfb5b6, |
293 | | ]; |
294 | | |
295 | 0 | let fl0 = f_fmla(x, f64::from_bits(CL[5]), f64::from_bits(CL[4])); |
296 | 0 | let fl1 = f_fmla(x, fl0, f64::from_bits(CL[3])); |
297 | 0 | let fl2 = f_fmla(x, fl1, f64::from_bits(CL[2])); |
298 | 0 | let fl3 = f_fmla(x, fl2, f64::from_bits(CL[1])); |
299 | | |
300 | 0 | let fl = x * f_fmla(x, fl3, f64::from_bits(CL[0])); |
301 | 0 | let mut f = opoly_dd_generic(DoubleDouble::new(fl, x), EXPM1_DD1); |
302 | 0 | f = DoubleDouble::quick_mult_f64(f, x); |
303 | 0 | f = DoubleDouble::quick_mult_f64(f, x); |
304 | 0 | f = DoubleDouble::quick_mult_f64(f, x); |
305 | 0 | let hx = 0.5 * x; |
306 | 0 | let dx2dd = DoubleDouble::from_exact_mult(x, hx); |
307 | 0 | f = DoubleDouble::add(dx2dd, f); |
308 | 0 | let v0 = DoubleDouble::from_exact_add(x, f.hi); |
309 | 0 | let v1 = DoubleDouble::from_exact_add(v0.lo, f.lo); |
310 | 0 | let v2 = DoubleDouble::from_exact_add(v0.hi, v1.hi); |
311 | 0 | let mut v3 = DoubleDouble::from_exact_add(v2.lo, v1.lo); |
312 | 0 | ix = v3.hi.to_bits(); |
313 | 0 | if (ix & 0x000fffffffffffff) == 0 { |
314 | 0 | let v = v3.lo.to_bits(); |
315 | 0 | let d: i64 = ((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64) |
316 | 0 | .wrapping_shl(1) |
317 | 0 | .wrapping_add(1) as i64; |
318 | 0 | ix = ix.wrapping_add(d as u64); |
319 | 0 | v3.hi = f64::from_bits(ix); |
320 | 0 | } |
321 | 0 | v3.hi + v2.hi |
322 | | } else { |
323 | | const S: f64 = f64::from_bits(0x40b71547652b82fe); |
324 | 0 | let t = (x * S).cpu_round_ties_even(); |
325 | 0 | let jt: i64 = t as i64; |
326 | 0 | let i0 = (jt >> 6) & 0x3f; |
327 | 0 | let i1 = jt & 0x3f; |
328 | 0 | let ie = jt >> 12; |
329 | 0 | let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i0 as usize]); |
330 | 0 | let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]); |
331 | | |
332 | 0 | let bt = DoubleDouble::quick_mult(t0, t1); |
333 | | |
334 | | const L2H: f64 = f64::from_bits(0x3f262e42ff000000); |
335 | | const L2L: f64 = f64::from_bits(0x3d0718432a1b0e26); |
336 | | const L2LL: f64 = f64::from_bits(0x3999ff0342542fc3); |
337 | | |
338 | 0 | let dx = f_fmla(-L2H, t, x); |
339 | 0 | let dxl = L2L * t; |
340 | 0 | let dxll = f_fmla(L2LL, t, dd_fmla(L2L, t, -dxl)); |
341 | 0 | let dxh = dx + dxl; |
342 | 0 | let dxl = (dx - dxh) + dxl + dxll; |
343 | 0 | let mut f = poly_dekker_generic(DoubleDouble::new(dxl, dxh), EXPM1_DD2); |
344 | 0 | f = DoubleDouble::quick_mult(DoubleDouble::new(dxl, dxh), f); |
345 | 0 | f = DoubleDouble::quick_mult(f, bt); |
346 | 0 | f = DoubleDouble::add(bt, f); |
347 | 0 | let off: u64 = (2048i64 + 1023i64).wrapping_sub(ie).wrapping_shl(52) as u64; |
348 | | let e: f64; |
349 | 0 | if ie < 53 { |
350 | 0 | let fhz = DoubleDouble::from_exact_add(f64::from_bits(off), f.hi); |
351 | 0 | f.hi = fhz.hi; |
352 | 0 | e = fhz.lo; |
353 | 0 | } else if ie < 104 { |
354 | 0 | let fhz = DoubleDouble::from_exact_add(f.hi, f64::from_bits(off)); |
355 | 0 | f.hi = fhz.hi; |
356 | 0 | e = fhz.lo; |
357 | 0 | } else { |
358 | 0 | e = 0.; |
359 | 0 | } |
360 | 0 | f.lo += e; |
361 | 0 | let dst = DoubleDouble::from_exact_add(f.hi, f.lo); |
362 | 0 | fast_ldexp(dst.hi, ie as i32) |
363 | | } |
364 | 0 | } |
365 | | |
366 | | /// Computes e^x - 1 |
367 | | /// |
368 | | /// Max found ULP 0.5 |
369 | 0 | pub fn f_expm1(x: f64) -> f64 { |
370 | 0 | let ix = x.to_bits(); |
371 | 0 | let aix: u64 = ix & 0x7fff_ffff_ffff_ffff; |
372 | 0 | if aix < 0x3fd0000000000000u64 { |
373 | 0 | if aix < 0x3ca0000000000000u64 { |
374 | 0 | if aix == 0 { |
375 | 0 | return x; |
376 | 0 | } |
377 | 0 | return dyad_fmla(f64::from_bits(0x3c90000000000000), x.abs(), x); |
378 | 0 | } |
379 | 0 | let sx = f64::from_bits(0x4060000000000000) * x; |
380 | 0 | let fx = sx.cpu_round_ties_even(); |
381 | 0 | let z = sx - fx; |
382 | 0 | let z2 = z * z; |
383 | 0 | let i: i64 = unsafe { |
384 | 0 | fx.to_int_unchecked::<i64>() // fx is already integer, this is just a conversion |
385 | | }; |
386 | 0 | let t = DoubleDouble::from_bit_pair(TZ[i.wrapping_add(32) as usize]); |
387 | | const C: [u64; 6] = [ |
388 | | 0x3f80000000000000, |
389 | | 0x3f00000000000000, |
390 | | 0x3e755555555551ad, |
391 | | 0x3de555555555599c, |
392 | | 0x3d511111ad1ad69d, |
393 | | 0x3cb6c16c168b1fb5, |
394 | | ]; |
395 | 0 | let fh = z * f64::from_bits(C[0]); |
396 | | |
397 | 0 | let fl0 = f_fmla(z, f64::from_bits(C[5]), f64::from_bits(C[4])); |
398 | 0 | let fl1 = f_fmla(z, f64::from_bits(C[2]), f64::from_bits(C[1])); |
399 | | |
400 | 0 | let fw0 = f_fmla(z, fl0, f64::from_bits(C[3])); |
401 | | |
402 | 0 | let fl = z2 * f_fmla(z2, fw0, fl1); |
403 | 0 | let mut f = DoubleDouble::new(fl, fh); |
404 | 0 | let e0 = f64::from_bits(0x3bea000000000000); |
405 | 0 | let eps = z2 * e0 + f64::from_bits(0x3970000000000000); |
406 | 0 | let mut r = DoubleDouble::from_exact_add(t.hi, f.hi); |
407 | 0 | r.lo += t.lo + f.lo; |
408 | 0 | f = DoubleDouble::quick_mult(t, f); |
409 | 0 | f = DoubleDouble::add(r, f); |
410 | 0 | let ub = f.hi + (f.lo + eps); |
411 | 0 | let lb = f.hi + (f.lo - eps); |
412 | 0 | if ub != lb { |
413 | 0 | return as_expm1_accurate(x); |
414 | 0 | } |
415 | 0 | lb |
416 | | } else { |
417 | 0 | if aix >= 0x40862e42fefa39f0u64 { |
418 | 0 | if aix > 0x7ff0000000000000u64 { |
419 | 0 | return x + x; |
420 | 0 | } // nan |
421 | 0 | if aix == 0x7ff0000000000000u64 { |
422 | 0 | return if (ix >> 63) != 0 { -1.0 } else { x }; |
423 | 0 | } |
424 | 0 | if (ix >> 63) == 0 { |
425 | | const Z: f64 = f64::from_bits(0x7fe0000000000000); |
426 | 0 | return black_box(Z) * black_box(Z); |
427 | 0 | } |
428 | 0 | } |
429 | 0 | if ix >= 0xc0425e4f7b2737fau64 { |
430 | 0 | if ix >= 0xc042b708872320e2u64 { |
431 | 0 | return black_box(-1.0) + black_box(f64::from_bits(0x3c80000000000000)); |
432 | 0 | } |
433 | 0 | return (f64::from_bits(0x40425e4f7b2737fa) + x + f64::from_bits(0x3cc8486612173c69)) |
434 | 0 | * f64::from_bits(0x3c971547652b82fe) |
435 | 0 | - f64::from_bits(0x3fefffffffffffff); |
436 | 0 | } |
437 | | |
438 | | const S: f64 = f64::from_bits(0x40b71547652b82fe); |
439 | 0 | let t = (x * S).cpu_round_ties_even(); |
440 | 0 | let jt: i64 = unsafe { |
441 | 0 | t.to_int_unchecked::<i64>() // t is already integer, this is just a conversion |
442 | | }; |
443 | 0 | let i0 = (jt >> 6) & 0x3f; |
444 | 0 | let i1 = jt & 0x3f; |
445 | 0 | let ie = jt >> 12; |
446 | 0 | let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i0 as usize]); |
447 | 0 | let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]); |
448 | | |
449 | 0 | let bt = DoubleDouble::quick_mult(t0, t1); |
450 | | |
451 | | const L2H: f64 = f64::from_bits(0x3f262e42ff000000); |
452 | | const L2L: f64 = f64::from_bits(0x3d0718432a1b0e26); |
453 | 0 | let dx = dd_fmla(L2L, t, f_fmla(-L2H, t, x)); |
454 | 0 | let dx2 = dx * dx; |
455 | | |
456 | | const CH: [u64; 4] = [ |
457 | | 0x3ff0000000000000, |
458 | | 0x3fe0000000000000, |
459 | | 0x3fc55555557e54ff, |
460 | | 0x3fa55555553a12f4, |
461 | | ]; |
462 | | |
463 | 0 | let p0 = f_fmla(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2])); |
464 | 0 | let p1 = f_fmla(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0])); |
465 | | |
466 | 0 | let p = f_fmla(dx2, p0, p1); |
467 | 0 | let mut fh = bt.hi; |
468 | 0 | let tx = bt.hi * dx; |
469 | 0 | let mut fl = f_fmla(tx, p, bt.lo); |
470 | 0 | let eps = f64::from_bits(0x3c0833beace2b6fe) * bt.hi; |
471 | | |
472 | 0 | let off: u64 = ((2048i64 + 1023i64).wrapping_sub(ie) as u64).wrapping_shl(52); |
473 | | let e: f64; |
474 | 0 | if ie < 53 { |
475 | 0 | let flz = DoubleDouble::from_exact_add(f64::from_bits(off), fh); |
476 | 0 | e = flz.lo; |
477 | 0 | fh = flz.hi; |
478 | 0 | } else if ie < 75 { |
479 | 0 | let flz = DoubleDouble::from_exact_add(fh, f64::from_bits(off)); |
480 | 0 | e = flz.lo; |
481 | 0 | fh = flz.hi; |
482 | 0 | } else { |
483 | 0 | e = 0.; |
484 | 0 | } |
485 | 0 | fl += e; |
486 | 0 | let ub = fh + (fl + eps); |
487 | 0 | let lb = fh + (fl - eps); |
488 | 0 | if ub != lb { |
489 | 0 | return as_expm1_accurate(x); |
490 | 0 | } |
491 | 0 | fast_ldexp(lb, ie as i32) |
492 | | } |
493 | 0 | } |
494 | | |
495 | | #[cfg(test)] |
496 | | mod tests { |
497 | | use super::*; |
498 | | |
499 | | #[test] |
500 | | fn test_expm1() { |
501 | | assert_eq!(f_expm1(0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000028321017343872864), |
502 | | 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000028321017343872864 ); |
503 | | assert_eq!(f_expm1(36.52188110363568), 7265264535836525.); |
504 | | assert_eq!(f_expm1(2.), 6.38905609893065); |
505 | | assert_eq!(f_expm1(0.4321321), 0.5405386068701713); |
506 | | assert_eq!(f_expm1(-0.0000004321321), -4.321320066309375e-7); |
507 | | } |
508 | | } |