/src/fast_float/include/fast_float/bigint.h
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef FASTFLOAT_BIGINT_H |
2 | | #define FASTFLOAT_BIGINT_H |
3 | | |
4 | | #include <algorithm> |
5 | | #include <cstdint> |
6 | | #include <climits> |
7 | | #include <cstring> |
8 | | |
9 | | #include "float_common.h" |
10 | | |
11 | | namespace fast_float { |
12 | | |
13 | | // the limb width: we want efficient multiplication of double the bits in |
14 | | // limb, or for 64-bit limbs, at least 64-bit multiplication where we can |
15 | | // extract the high and low parts efficiently. this is every 64-bit |
16 | | // architecture except for sparc, which emulates 128-bit multiplication. |
17 | | // we might have platforms where `CHAR_BIT` is not 8, so let's avoid |
18 | | // doing `8 * sizeof(limb)`. |
19 | | #if defined(FASTFLOAT_64BIT) && !defined(__sparc) |
20 | | #define FASTFLOAT_64BIT_LIMB 1 |
21 | | typedef uint64_t limb; |
22 | | constexpr size_t limb_bits = 64; |
23 | | #else |
24 | | #define FASTFLOAT_32BIT_LIMB |
25 | | typedef uint32_t limb; |
26 | | constexpr size_t limb_bits = 32; |
27 | | #endif |
28 | | |
29 | | typedef span<limb> limb_span; |
30 | | |
31 | | // number of bits in a bigint. this needs to be at least the number |
32 | | // of bits required to store the largest bigint, which is |
33 | | // `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or |
34 | | // ~3600 bits, so we round to 4000. |
35 | | constexpr size_t bigint_bits = 4000; |
36 | | constexpr size_t bigint_limbs = bigint_bits / limb_bits; |
37 | | |
38 | | // vector-like type that is allocated on the stack. the entire |
39 | | // buffer is pre-allocated, and only the length changes. |
40 | | template <uint16_t size> struct stackvec { |
41 | | limb data[size]; |
42 | | // we never need more than 150 limbs |
43 | | uint16_t length{0}; |
44 | | |
45 | 5.16k | stackvec() = default; |
46 | | stackvec(stackvec const &) = delete; |
47 | | stackvec &operator=(stackvec const &) = delete; |
48 | | stackvec(stackvec &&) = delete; |
49 | | stackvec &operator=(stackvec &&other) = delete; |
50 | | |
51 | | // create stack vector from existing limb span. |
52 | 459 | FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) { |
53 | 459 | FASTFLOAT_ASSERT(try_extend(s)); |
54 | 459 | } |
55 | | |
56 | 84.0k | FASTFLOAT_CONSTEXPR14 limb &operator[](size_t index) noexcept { |
57 | 84.0k | FASTFLOAT_DEBUG_ASSERT(index < length); |
58 | 84.0k | return data[index]; |
59 | 84.0k | } |
60 | | |
61 | 5.18k | FASTFLOAT_CONSTEXPR14 const limb &operator[](size_t index) const noexcept { |
62 | 5.18k | FASTFLOAT_DEBUG_ASSERT(index < length); |
63 | 5.18k | return data[index]; |
64 | 5.18k | } |
65 | | |
66 | | // index from the end of the container |
67 | 4.67k | FASTFLOAT_CONSTEXPR14 const limb &rindex(size_t index) const noexcept { |
68 | 4.67k | FASTFLOAT_DEBUG_ASSERT(index < length); |
69 | 4.67k | size_t rindex = length - index - 1; |
70 | 4.67k | return data[rindex]; |
71 | 4.67k | } |
72 | | |
73 | | // set the length, without bounds checking. |
74 | 6.55k | FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept { |
75 | 6.55k | length = uint16_t(len); |
76 | 6.55k | } |
77 | | |
78 | 90.7k | constexpr size_t len() const noexcept { return length; } |
79 | | |
80 | 1.61k | constexpr bool is_empty() const noexcept { return length == 0; } |
81 | | |
82 | 14.9k | constexpr size_t capacity() const noexcept { return size; } |
83 | | |
84 | | // append item to vector, without bounds checking |
85 | 11.4k | FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept { |
86 | 11.4k | data[length] = value; |
87 | 11.4k | length++; |
88 | 11.4k | } |
89 | | |
90 | | // append item to vector, returning if item was added |
91 | 10.2k | FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept { |
92 | 10.2k | if (len() < capacity()) { |
93 | 10.2k | push_unchecked(value); |
94 | 10.2k | return true; |
95 | 10.2k | } else { |
96 | 0 | return false; |
97 | 0 | } |
98 | 10.2k | } |
99 | | |
100 | | // add items to the vector, from a span, without bounds checking |
101 | 2.29k | FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept { |
102 | 2.29k | limb *ptr = data + length; |
103 | 2.29k | std::copy_n(s.ptr, s.len(), ptr); |
104 | 2.29k | set_len(len() + s.len()); |
105 | 2.29k | } |
106 | | |
107 | | // try to add items to the vector, returning if items were added |
108 | 2.29k | FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept { |
109 | 2.29k | if (len() + s.len() <= capacity()) { |
110 | 2.29k | extend_unchecked(s); |
111 | 2.29k | return true; |
112 | 2.29k | } else { |
113 | 0 | return false; |
114 | 0 | } |
115 | 2.29k | } |
116 | | |
117 | | // resize the vector, without bounds checking |
118 | | // if the new size is longer than the vector, assign value to each |
119 | | // appended item. |
120 | | FASTFLOAT_CONSTEXPR20 |
121 | 1.74k | void resize_unchecked(size_t new_len, limb value) noexcept { |
122 | 1.74k | if (new_len > len()) { |
123 | 1.74k | size_t count = new_len - len(); |
124 | 1.74k | limb *first = data + len(); |
125 | 1.74k | limb *last = first + count; |
126 | 1.74k | ::std::fill(first, last, value); |
127 | 1.74k | set_len(new_len); |
128 | 1.74k | } else { |
129 | 0 | set_len(new_len); |
130 | 0 | } |
131 | 1.74k | } |
132 | | |
133 | | // try to resize the vector, returning if the vector was resized. |
134 | 1.74k | FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept { |
135 | 1.74k | if (new_len > capacity()) { |
136 | 0 | return false; |
137 | 1.74k | } else { |
138 | 1.74k | resize_unchecked(new_len, value); |
139 | 1.74k | return true; |
140 | 1.74k | } |
141 | 1.74k | } |
142 | | |
143 | | // check if any limbs are non-zero after the given index. |
144 | | // this needs to be done in reverse order, since the index |
145 | | // is relative to the most significant limbs. |
146 | 712 | FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept { |
147 | 718 | while (index < len()) { |
148 | 430 | if (rindex(index) != 0) { |
149 | 424 | return true; |
150 | 424 | } |
151 | 6 | index++; |
152 | 6 | } |
153 | 288 | return false; |
154 | 712 | } |
155 | | |
156 | | // normalize the big integer, so most-significant zero limbs are removed. |
157 | 1.65k | FASTFLOAT_CONSTEXPR14 void normalize() noexcept { |
158 | 1.65k | while (len() > 0 && rindex(0) == 0) { |
159 | 0 | length--; |
160 | 0 | } |
161 | 1.65k | } |
162 | | }; |
163 | | |
164 | | fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t |
165 | 0 | empty_hi64(bool &truncated) noexcept { |
166 | 0 | truncated = false; |
167 | 0 | return 0; |
168 | 0 | } |
169 | | |
170 | | fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t |
171 | 230 | uint64_hi64(uint64_t r0, bool &truncated) noexcept { |
172 | 230 | truncated = false; |
173 | 230 | int shl = leading_zeroes(r0); |
174 | 230 | return r0 << shl; |
175 | 230 | } |
176 | | |
177 | | fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t |
178 | 712 | uint64_hi64(uint64_t r0, uint64_t r1, bool &truncated) noexcept { |
179 | 712 | int shl = leading_zeroes(r0); |
180 | 712 | if (shl == 0) { |
181 | 189 | truncated = r1 != 0; |
182 | 189 | return r0; |
183 | 523 | } else { |
184 | 523 | int shr = 64 - shl; |
185 | 523 | truncated = (r1 << shl) != 0; |
186 | 523 | return (r0 << shl) | (r1 >> shr); |
187 | 523 | } |
188 | 712 | } |
189 | | |
190 | | fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t |
191 | 0 | uint32_hi64(uint32_t r0, bool &truncated) noexcept { |
192 | 0 | return uint64_hi64(r0, truncated); |
193 | 0 | } |
194 | | |
195 | | fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t |
196 | 0 | uint32_hi64(uint32_t r0, uint32_t r1, bool &truncated) noexcept { |
197 | 0 | uint64_t x0 = r0; |
198 | 0 | uint64_t x1 = r1; |
199 | 0 | return uint64_hi64((x0 << 32) | x1, truncated); |
200 | 0 | } |
201 | | |
202 | | fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t |
203 | 0 | uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool &truncated) noexcept { |
204 | 0 | uint64_t x0 = r0; |
205 | 0 | uint64_t x1 = r1; |
206 | 0 | uint64_t x2 = r2; |
207 | 0 | return uint64_hi64(x0, (x1 << 32) | x2, truncated); |
208 | 0 | } |
209 | | |
210 | | // add two small integers, checking for overflow. |
211 | | // we want an efficient operation. for msvc, where |
212 | | // we don't have built-in intrinsics, this is still |
213 | | // pretty fast. |
214 | | fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb |
215 | 12.0k | scalar_add(limb x, limb y, bool &overflow) noexcept { |
216 | 12.0k | limb z; |
217 | | // gcc and clang |
218 | 12.0k | #if defined(__has_builtin) |
219 | 12.0k | #if __has_builtin(__builtin_add_overflow) |
220 | 12.0k | if (!cpp20_and_in_constexpr()) { |
221 | 12.0k | overflow = __builtin_add_overflow(x, y, &z); |
222 | 12.0k | return z; |
223 | 12.0k | } |
224 | 0 | #endif |
225 | 0 | #endif |
226 | | |
227 | | // generic, this still optimizes correctly on MSVC. |
228 | 0 | z = x + y; |
229 | 0 | overflow = z < x; |
230 | 0 | return z; |
231 | 12.0k | } |
232 | | |
233 | | // multiply two small integers, getting both the high and low bits. |
234 | | fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb |
235 | 26.6k | scalar_mul(limb x, limb y, limb &carry) noexcept { |
236 | 26.6k | #ifdef FASTFLOAT_64BIT_LIMB |
237 | 26.6k | #if defined(__SIZEOF_INT128__) |
238 | | // GCC and clang both define it as an extension. |
239 | 26.6k | __uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry); |
240 | 26.6k | carry = limb(z >> limb_bits); |
241 | 26.6k | return limb(z); |
242 | | #else |
243 | | // fallback, no native 128-bit integer multiplication with carry. |
244 | | // on msvc, this optimizes identically, somehow. |
245 | | value128 z = full_multiplication(x, y); |
246 | | bool overflow; |
247 | | z.low = scalar_add(z.low, carry, overflow); |
248 | | z.high += uint64_t(overflow); // cannot overflow |
249 | | carry = z.high; |
250 | | return z.low; |
251 | | #endif |
252 | | #else |
253 | | uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry); |
254 | | carry = limb(z >> limb_bits); |
255 | | return limb(z); |
256 | | #endif |
257 | 26.6k | } |
258 | | |
259 | | // add scalar value to bigint starting from offset. |
260 | | // used in grade school multiplication |
261 | | template <uint16_t size> |
262 | | inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec<size> &vec, limb y, |
263 | 5.84k | size_t start) noexcept { |
264 | 5.84k | size_t index = start; |
265 | 5.84k | limb carry = y; |
266 | 5.84k | bool overflow; |
267 | 9.07k | while (carry != 0 && index < vec.len()) { |
268 | 3.23k | vec[index] = scalar_add(vec[index], carry, overflow); |
269 | 3.23k | carry = limb(overflow); |
270 | 3.23k | index += 1; |
271 | 3.23k | } |
272 | 5.84k | if (carry != 0) { |
273 | 2.14k | FASTFLOAT_TRY(vec.try_push(carry)); |
274 | 2.14k | } |
275 | 5.84k | return true; |
276 | 5.84k | } |
277 | | |
278 | | // add scalar value to bigint. |
279 | | template <uint16_t size> |
280 | | fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool |
281 | 5.84k | small_add(stackvec<size> &vec, limb y) noexcept { |
282 | 5.84k | return small_add_from(vec, y, 0); |
283 | 5.84k | } |
284 | | |
285 | | // multiply bigint by scalar value. |
286 | | template <uint16_t size> |
287 | | inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec, |
288 | 11.5k | limb y) noexcept { |
289 | 11.5k | limb carry = 0; |
290 | 38.1k | for (size_t index = 0; index < vec.len(); index++) { |
291 | 26.6k | vec[index] = scalar_mul(vec[index], y, carry); |
292 | 26.6k | } |
293 | 11.5k | if (carry != 0) { |
294 | 7.46k | FASTFLOAT_TRY(vec.try_push(carry)); |
295 | 7.46k | } |
296 | 11.5k | return true; |
297 | 11.5k | } |
298 | | |
299 | | // add bigint to bigint starting from index. |
300 | | // used in grade school multiplication |
301 | | template <uint16_t size> |
302 | | FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y, |
303 | 1.83k | size_t start) noexcept { |
304 | | // the effective x buffer is from `xstart..x.len()`, so exit early |
305 | | // if we can't get that current range. |
306 | 1.83k | if (x.len() < start || y.len() > x.len() - start) { |
307 | 1.74k | FASTFLOAT_TRY(x.try_resize(y.len() + start, 0)); |
308 | 1.74k | } |
309 | | |
310 | 1.83k | bool carry = false; |
311 | 8.85k | for (size_t index = 0; index < y.len(); index++) { |
312 | 7.02k | limb xi = x[index + start]; |
313 | 7.02k | limb yi = y[index]; |
314 | 7.02k | bool c1 = false; |
315 | 7.02k | bool c2 = false; |
316 | 7.02k | xi = scalar_add(xi, yi, c1); |
317 | 7.02k | if (carry) { |
318 | 1.83k | xi = scalar_add(xi, 1, c2); |
319 | 1.83k | } |
320 | 7.02k | x[index + start] = xi; |
321 | 7.02k | carry = c1 | c2; |
322 | 7.02k | } |
323 | | |
324 | | // handle overflow |
325 | 1.83k | if (carry) { |
326 | 1 | FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start)); |
327 | 1 | } |
328 | 1.83k | return true; |
329 | 1.83k | } |
330 | | |
331 | | // add bigint to bigint. |
332 | | template <uint16_t size> |
333 | | fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool |
334 | | large_add_from(stackvec<size> &x, limb_span y) noexcept { |
335 | | return large_add_from(x, y, 0); |
336 | | } |
337 | | |
338 | | // grade-school multiplication algorithm |
339 | | template <uint16_t size> |
340 | 459 | FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept { |
341 | 459 | limb_span xs = limb_span(x.data, x.len()); |
342 | 459 | stackvec<size> z(xs); |
343 | 459 | limb_span zs = limb_span(z.data, z.len()); |
344 | | |
345 | 459 | if (y.len() != 0) { |
346 | 459 | limb y0 = y[0]; |
347 | 459 | FASTFLOAT_TRY(small_mul(x, y0)); |
348 | 2.29k | for (size_t index = 1; index < y.len(); index++) { |
349 | 1.83k | limb yi = y[index]; |
350 | 1.83k | stackvec<size> zi; |
351 | 1.83k | if (yi != 0) { |
352 | | // re-use the same buffer throughout |
353 | 1.83k | zi.set_len(0); |
354 | 1.83k | FASTFLOAT_TRY(zi.try_extend(zs)); |
355 | 1.83k | FASTFLOAT_TRY(small_mul(zi, yi)); |
356 | 1.83k | limb_span zis = limb_span(zi.data, zi.len()); |
357 | 1.83k | FASTFLOAT_TRY(large_add_from(x, zis, index)); |
358 | 1.83k | } |
359 | 1.83k | } |
360 | 459 | } |
361 | | |
362 | 459 | x.normalize(); |
363 | 459 | return true; |
364 | 459 | } |
365 | | |
366 | | // grade-school multiplication algorithm |
367 | | template <uint16_t size> |
368 | 459 | FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec<size> &x, limb_span y) noexcept { |
369 | 459 | if (y.len() == 1) { |
370 | 0 | FASTFLOAT_TRY(small_mul(x, y[0])); |
371 | 459 | } else { |
372 | 459 | FASTFLOAT_TRY(long_mul(x, y)); |
373 | 459 | } |
374 | 459 | return true; |
375 | 459 | } |
376 | | |
377 | | template <typename = void> struct pow5_tables { |
378 | | static constexpr uint32_t large_step = 135; |
379 | | static constexpr uint64_t small_power_of_5[] = { |
380 | | 1UL, |
381 | | 5UL, |
382 | | 25UL, |
383 | | 125UL, |
384 | | 625UL, |
385 | | 3125UL, |
386 | | 15625UL, |
387 | | 78125UL, |
388 | | 390625UL, |
389 | | 1953125UL, |
390 | | 9765625UL, |
391 | | 48828125UL, |
392 | | 244140625UL, |
393 | | 1220703125UL, |
394 | | 6103515625UL, |
395 | | 30517578125UL, |
396 | | 152587890625UL, |
397 | | 762939453125UL, |
398 | | 3814697265625UL, |
399 | | 19073486328125UL, |
400 | | 95367431640625UL, |
401 | | 476837158203125UL, |
402 | | 2384185791015625UL, |
403 | | 11920928955078125UL, |
404 | | 59604644775390625UL, |
405 | | 298023223876953125UL, |
406 | | 1490116119384765625UL, |
407 | | 7450580596923828125UL, |
408 | | }; |
409 | | #ifdef FASTFLOAT_64BIT_LIMB |
410 | | constexpr static limb large_power_of_5[] = { |
411 | | 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, |
412 | | 10482974169319127550UL, 198276706040285095UL}; |
413 | | #else |
414 | | constexpr static limb large_power_of_5[] = { |
415 | | 4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U, |
416 | | 1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U}; |
417 | | #endif |
418 | | }; |
419 | | |
420 | | #if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE |
421 | | |
422 | | template <typename T> constexpr uint32_t pow5_tables<T>::large_step; |
423 | | |
424 | | template <typename T> constexpr uint64_t pow5_tables<T>::small_power_of_5[]; |
425 | | |
426 | | template <typename T> constexpr limb pow5_tables<T>::large_power_of_5[]; |
427 | | |
428 | | #endif |
429 | | |
430 | | // big integer type. implements a small subset of big integer |
431 | | // arithmetic, using simple algorithms since asymptotically |
432 | | // faster algorithms are slower for a small number of limbs. |
433 | | // all operations assume the big-integer is normalized. |
434 | | struct bigint : pow5_tables<> { |
435 | | // storage of the limbs, in little-endian order. |
436 | | stackvec<bigint_limbs> vec; |
437 | | |
438 | 2.13k | FASTFLOAT_CONSTEXPR20 bigint() : vec() {} |
439 | | |
440 | | bigint(bigint const &) = delete; |
441 | | bigint &operator=(bigint const &) = delete; |
442 | | bigint(bigint &&) = delete; |
443 | | bigint &operator=(bigint &&other) = delete; |
444 | | |
445 | 1.19k | FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) : vec() { |
446 | 1.19k | #ifdef FASTFLOAT_64BIT_LIMB |
447 | 1.19k | vec.push_unchecked(value); |
448 | | #else |
449 | | vec.push_unchecked(uint32_t(value)); |
450 | | vec.push_unchecked(uint32_t(value >> 32)); |
451 | | #endif |
452 | 1.19k | vec.normalize(); |
453 | 1.19k | } |
454 | | |
455 | | // get the high 64 bits from the vector, and if bits were truncated. |
456 | | // this is to get the significant digits for the float. |
457 | 942 | FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool &truncated) const noexcept { |
458 | 942 | #ifdef FASTFLOAT_64BIT_LIMB |
459 | 942 | if (vec.len() == 0) { |
460 | 0 | return empty_hi64(truncated); |
461 | 942 | } else if (vec.len() == 1) { |
462 | 230 | return uint64_hi64(vec.rindex(0), truncated); |
463 | 712 | } else { |
464 | 712 | uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated); |
465 | 712 | truncated |= vec.nonzero(2); |
466 | 712 | return result; |
467 | 712 | } |
468 | | #else |
469 | | if (vec.len() == 0) { |
470 | | return empty_hi64(truncated); |
471 | | } else if (vec.len() == 1) { |
472 | | return uint32_hi64(vec.rindex(0), truncated); |
473 | | } else if (vec.len() == 2) { |
474 | | return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated); |
475 | | } else { |
476 | | uint64_t result = |
477 | | uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated); |
478 | | truncated |= vec.nonzero(3); |
479 | | return result; |
480 | | } |
481 | | #endif |
482 | 942 | } |
483 | | |
484 | | // compare two big integers, returning the large value. |
485 | | // assumes both are normalized. if the return value is |
486 | | // negative, other is larger, if the return value is |
487 | | // positive, this is larger, otherwise they are equal. |
488 | | // the limbs are stored in little-endian order, so we |
489 | | // must compare the limbs in ever order. |
490 | 1.19k | FASTFLOAT_CONSTEXPR20 int compare(bigint const &other) const noexcept { |
491 | 1.19k | if (vec.len() > other.vec.len()) { |
492 | 0 | return 1; |
493 | 1.19k | } else if (vec.len() < other.vec.len()) { |
494 | 0 | return -1; |
495 | 1.19k | } else { |
496 | 2.63k | for (size_t index = vec.len(); index > 0; index--) { |
497 | 2.59k | limb xi = vec[index - 1]; |
498 | 2.59k | limb yi = other.vec[index - 1]; |
499 | 2.59k | if (xi > yi) { |
500 | 585 | return 1; |
501 | 2.00k | } else if (xi < yi) { |
502 | 564 | return -1; |
503 | 564 | } |
504 | 2.59k | } |
505 | 43 | return 0; |
506 | 1.19k | } |
507 | 1.19k | } |
508 | | |
509 | | // shift left each limb n bits, carrying over to the new limb |
510 | | // returns true if we were able to shift all the digits. |
511 | 1.67k | FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept { |
512 | | // Internally, for each item, we shift left by n, and add the previous |
513 | | // right shifted limb-bits. |
514 | | // For example, we transform (for u8) shifted left 2, to: |
515 | | // b10100100 b01000010 |
516 | | // b10 b10010001 b00001000 |
517 | 1.67k | FASTFLOAT_DEBUG_ASSERT(n != 0); |
518 | 1.67k | FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8); |
519 | | |
520 | 1.67k | size_t shl = n; |
521 | 1.67k | size_t shr = limb_bits - shl; |
522 | 1.67k | limb prev = 0; |
523 | 6.80k | for (size_t index = 0; index < vec.len(); index++) { |
524 | 5.12k | limb xi = vec[index]; |
525 | 5.12k | vec[index] = (xi << shl) | (prev >> shr); |
526 | 5.12k | prev = xi; |
527 | 5.12k | } |
528 | | |
529 | 1.67k | limb carry = prev >> shr; |
530 | 1.67k | if (carry != 0) { |
531 | 647 | return vec.try_push(carry); |
532 | 647 | } |
533 | 1.02k | return true; |
534 | 1.67k | } |
535 | | |
536 | | // move the limbs left by `n` limbs. |
537 | 675 | FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept { |
538 | 675 | FASTFLOAT_DEBUG_ASSERT(n != 0); |
539 | 675 | if (n + vec.len() > vec.capacity()) { |
540 | 0 | return false; |
541 | 675 | } else if (!vec.is_empty()) { |
542 | | // move limbs |
543 | 675 | limb *dst = vec.data + n; |
544 | 675 | limb const *src = vec.data; |
545 | 675 | std::copy_backward(src, src + vec.len(), dst + vec.len()); |
546 | | // fill in empty limbs |
547 | 675 | limb *first = vec.data; |
548 | 675 | limb *last = first + n; |
549 | 675 | ::std::fill(first, last, 0); |
550 | 675 | vec.set_len(n + vec.len()); |
551 | 675 | return true; |
552 | 675 | } else { |
553 | 0 | return true; |
554 | 0 | } |
555 | 675 | } |
556 | | |
557 | | // move the limbs left by `n` bits. |
558 | 2.13k | FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept { |
559 | 2.13k | size_t rem = n % limb_bits; |
560 | 2.13k | size_t div = n / limb_bits; |
561 | 2.13k | if (rem != 0) { |
562 | 1.67k | FASTFLOAT_TRY(shl_bits(rem)); |
563 | 1.67k | } |
564 | 2.13k | if (div != 0) { |
565 | 675 | FASTFLOAT_TRY(shl_limbs(div)); |
566 | 675 | } |
567 | 2.13k | return true; |
568 | 2.13k | } |
569 | | |
570 | | // get the number of leading zeros in the bigint. |
571 | 942 | FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept { |
572 | 942 | if (vec.is_empty()) { |
573 | 0 | return 0; |
574 | 942 | } else { |
575 | 942 | #ifdef FASTFLOAT_64BIT_LIMB |
576 | 942 | return leading_zeroes(vec.rindex(0)); |
577 | | #else |
578 | | // no use defining a specialized leading_zeroes for a 32-bit type. |
579 | | uint64_t r0 = vec.rindex(0); |
580 | | return leading_zeroes(r0 << 32); |
581 | | #endif |
582 | 942 | } |
583 | 942 | } |
584 | | |
585 | | // get the number of bits in the bigint. |
586 | 942 | FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept { |
587 | 942 | int lz = ctlz(); |
588 | 942 | return int(limb_bits * vec.len()) - lz; |
589 | 942 | } |
590 | | |
591 | 5.84k | FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); } |
592 | | |
593 | 5.84k | FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); } |
594 | | |
595 | | // multiply as if by 2 raised to a power. |
596 | 2.13k | FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); } |
597 | | |
598 | | // multiply as if by 5 raised to a power. |
599 | 2.13k | FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept { |
600 | | // multiply by a power of 5 |
601 | 2.13k | size_t large_length = sizeof(large_power_of_5) / sizeof(limb); |
602 | 2.13k | limb_span large = limb_span(large_power_of_5, large_length); |
603 | 2.59k | while (exp >= large_step) { |
604 | 459 | FASTFLOAT_TRY(large_mul(vec, large)); |
605 | 459 | exp -= large_step; |
606 | 459 | } |
607 | 2.13k | #ifdef FASTFLOAT_64BIT_LIMB |
608 | 2.13k | uint32_t small_step = 27; |
609 | 2.13k | limb max_native = 7450580596923828125UL; |
610 | | #else |
611 | | uint32_t small_step = 13; |
612 | | limb max_native = 1220703125U; |
613 | | #endif |
614 | 3.95k | while (exp >= small_step) { |
615 | 1.81k | FASTFLOAT_TRY(small_mul(vec, max_native)); |
616 | 1.81k | exp -= small_step; |
617 | 1.81k | } |
618 | 2.13k | if (exp != 0) { |
619 | | // Work around clang bug https://godbolt.org/z/zedh7rrhc |
620 | | // This is similar to https://github.com/llvm/llvm-project/issues/47746, |
621 | | // except the workaround described there don't work here |
622 | 1.60k | FASTFLOAT_TRY(small_mul( |
623 | 1.60k | vec, limb(((void)small_power_of_5[0], small_power_of_5[exp])))); |
624 | 1.60k | } |
625 | | |
626 | 2.13k | return true; |
627 | 2.13k | } |
628 | | |
629 | | // multiply as if by 10 raised to a power. |
630 | 942 | FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept { |
631 | 942 | FASTFLOAT_TRY(pow5(exp)); |
632 | 942 | return pow2(exp); |
633 | 942 | } |
634 | | }; |
635 | | |
636 | | } // namespace fast_float |
637 | | |
638 | | #endif |