/src/fast_float/include/fast_float/bigint.h

Source (jump to first uncovered line)
#ifndef FASTFLOAT_BIGINT_H
#define FASTFLOAT_BIGINT_H

#include <algorithm>
#include <cstdint>
#include <climits>
#include <cstring>

#include "float_common.h"

namespace fast_float {

// the limb width: we want efficient multiplication of double the bits in
// limb, or for 64-bit limbs, at least 64-bit multiplication where we can
// extract the high and low parts efficiently. this is every 64-bit
// architecture except for sparc, which emulates 128-bit multiplication.
// we might have platforms where `CHAR_BIT` is not 8, so let's avoid
// doing `8 * sizeof(limb)`.
#if defined(FASTFLOAT_64BIT) && !defined(__sparc)
#define FASTFLOAT_64BIT_LIMB 1
typedef uint64_t limb;
constexpr size_t limb_bits = 64;
#else
#define FASTFLOAT_32BIT_LIMB
typedef uint32_t limb;
constexpr size_t limb_bits = 32;
#endif

typedef span<limb> limb_span;

// number of bits in a bigint. this needs to be at least the number
// of bits required to store the largest bigint, which is
// `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or
// ~3600 bits, so we round to 4000.
constexpr size_t bigint_bits = 4000;
constexpr size_t bigint_limbs = bigint_bits / limb_bits;

// vector-like type that is allocated on the stack. the entire
// buffer is pre-allocated, and only the length changes.
template <uint16_t size> struct stackvec {
  limb data[size];
  // we never need more than 150 limbs
  uint16_t length{0};

  stackvec() = default;
  stackvec(stackvec const &) = delete;
  stackvec &operator=(stackvec const &) = delete;
  stackvec(stackvec &&) = delete;
  stackvec &operator=(stackvec &&other) = delete;

  // create stack vector from existing limb span.
  FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) {
    FASTFLOAT_ASSERT(try_extend(s));
  }

  FASTFLOAT_CONSTEXPR14 limb &operator[](size_t index) noexcept {
    FASTFLOAT_DEBUG_ASSERT(index < length);
    return data[index];
  }

  FASTFLOAT_CONSTEXPR14 const limb &operator[](size_t index) const noexcept {
    FASTFLOAT_DEBUG_ASSERT(index < length);
    return data[index];
  }

  // index from the end of the container
  FASTFLOAT_CONSTEXPR14 const limb &rindex(size_t index) const noexcept {
    FASTFLOAT_DEBUG_ASSERT(index < length);
    size_t rindex = length - index - 1;
    return data[rindex];
  }

  // set the length, without bounds checking.
  FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept {
    length = uint16_t(len);
  }

  constexpr size_t len() const noexcept { return length; }

  constexpr bool is_empty() const noexcept { return length == 0; }

  constexpr size_t capacity() const noexcept { return size; }

  // append item to vector, without bounds checking
  FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept {
    data[length] = value;
    length++;
  }

  // append item to vector, returning if item was added
  FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept {
    if (len() < capacity()) {
      push_unchecked(value);
      return true;
    } else {
      return false;
    }
  }

  // add items to the vector, from a span, without bounds checking
  FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept {
    limb *ptr = data + length;
    std::copy_n(s.ptr, s.len(), ptr);
    set_len(len() + s.len());
  }

  // try to add items to the vector, returning if items were added
  FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept {
    if (len() + s.len() <= capacity()) {
      extend_unchecked(s);
      return true;
    } else {
      return false;
    }
  }

  // resize the vector, without bounds checking
  // if the new size is longer than the vector, assign value to each
  // appended item.
  FASTFLOAT_CONSTEXPR20
  void resize_unchecked(size_t new_len, limb value) noexcept {
    if (new_len > len()) {
      size_t count = new_len - len();
      limb *first = data + len();
      limb *last = first + count;
      ::std::fill(first, last, value);
      set_len(new_len);
    } else {
      set_len(new_len);
    }
  }

  // try to resize the vector, returning if the vector was resized.
  FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept {
    if (new_len > capacity()) {
      return false;
    } else {
      resize_unchecked(new_len, value);
      return true;
    }
  }

  // check if any limbs are non-zero after the given index.
  // this needs to be done in reverse order, since the index
  // is relative to the most significant limbs.
  FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept {
    while (index < len()) {
      if (rindex(index) != 0) {
        return true;
      }
      index++;
    }
    return false;
  }

  // normalize the big integer, so most-significant zero limbs are removed.
  FASTFLOAT_CONSTEXPR14 void normalize() noexcept {
    while (len() > 0 && rindex(0) == 0) {
      length--;
    }
  }
};

fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t
empty_hi64(bool &truncated) noexcept {
  truncated = false;
  return 0;
}

fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
uint64_hi64(uint64_t r0, bool &truncated) noexcept {
  truncated = false;
  int shl = leading_zeroes(r0);
  return r0 << shl;
}

fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
uint64_hi64(uint64_t r0, uint64_t r1, bool &truncated) noexcept {
  int shl = leading_zeroes(r0);
  if (shl == 0) {
    truncated = r1 != 0;
    return r0;
  } else {
    int shr = 64 - shl;
    truncated = (r1 << shl) != 0;
    return (r0 << shl) | (r1 >> shr);
  }
}

fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
uint32_hi64(uint32_t r0, bool &truncated) noexcept {
  return uint64_hi64(r0, truncated);
}

fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
uint32_hi64(uint32_t r0, uint32_t r1, bool &truncated) noexcept {
  uint64_t x0 = r0;
  uint64_t x1 = r1;
  return uint64_hi64((x0 << 32) | x1, truncated);
}

fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool &truncated) noexcept {
  uint64_t x0 = r0;
  uint64_t x1 = r1;
  uint64_t x2 = r2;
  return uint64_hi64(x0, (x1 << 32) | x2, truncated);
}

// add two small integers, checking for overflow.
// we want an efficient operation. for msvc, where
// we don't have built-in intrinsics, this is still
// pretty fast.
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb
scalar_add(limb x, limb y, bool &overflow) noexcept {
  limb z;
// gcc and clang
#if defined(__has_builtin)
#if __has_builtin(__builtin_add_overflow)
  if (!cpp20_and_in_constexpr()) {
    overflow = __builtin_add_overflow(x, y, &z);
    return z;
  }
#endif
#endif

  // generic, this still optimizes correctly on MSVC.
  z = x + y;
  overflow = z < x;
  return z;
}

// multiply two small integers, getting both the high and low bits.
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb
scalar_mul(limb x, limb y, limb &carry) noexcept {
#ifdef FASTFLOAT_64BIT_LIMB
#if defined(__SIZEOF_INT128__)
  // GCC and clang both define it as an extension.
  __uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry);
  carry = limb(z >> limb_bits);
  return limb(z);
#else
  // fallback, no native 128-bit integer multiplication with carry.
  // on msvc, this optimizes identically, somehow.
  value128 z = full_multiplication(x, y);
  bool overflow;
  z.low = scalar_add(z.low, carry, overflow);
  z.high += uint64_t(overflow); // cannot overflow
  carry = z.high;
  return z.low;
#endif
#else
  uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry);
  carry = limb(z >> limb_bits);
  return limb(z);
#endif
}

// add scalar value to bigint starting from offset.
// used in grade school multiplication
template <uint16_t size>
inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec<size> &vec, limb y,
                                                 size_t start) noexcept {
  size_t index = start;
  limb carry = y;
  bool overflow;
  while (carry != 0 && index < vec.len()) {
    vec[index] = scalar_add(vec[index], carry, overflow);
    carry = limb(overflow);
    index += 1;
  }
  if (carry != 0) {
    FASTFLOAT_TRY(vec.try_push(carry));
  }
  return true;
}

// add scalar value to bigint.
template <uint16_t size>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
small_add(stackvec<size> &vec, limb y) noexcept {
  return small_add_from(vec, y, 0);
}

// multiply bigint by scalar value.
template <uint16_t size>
inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,
                                            limb y) noexcept {
  limb carry = 0;
  for (size_t index = 0; index < vec.len(); index++) {
    vec[index] = scalar_mul(vec[index], y, carry);
  }
  if (carry != 0) {
    FASTFLOAT_TRY(vec.try_push(carry));
  }
  return true;
}

// add bigint to bigint starting from index.
// used in grade school multiplication
template <uint16_t size>
FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
                                          size_t start) noexcept {
  // the effective x buffer is from `xstart..x.len()`, so exit early
  // if we can't get that current range.
  if (x.len() < start || y.len() > x.len() - start) {
    FASTFLOAT_TRY(x.try_resize(y.len() + start, 0));
  }

  bool carry = false;
  for (size_t index = 0; index < y.len(); index++) {
    limb xi = x[index + start];
    limb yi = y[index];
    bool c1 = false;
    bool c2 = false;
    xi = scalar_add(xi, yi, c1);
    if (carry) {
      xi = scalar_add(xi, 1, c2);
    }
    x[index + start] = xi;
    carry = c1 | c2;
  }

  // handle overflow
  if (carry) {
    FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start));
  }
  return true;
}

// add bigint to bigint.
template <uint16_t size>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
large_add_from(stackvec<size> &x, limb_span y) noexcept {
  return large_add_from(x, y, 0);
}

// grade-school multiplication algorithm
template <uint16_t size>
FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
  limb_span xs = limb_span(x.data, x.len());
  stackvec<size> z(xs);
  limb_span zs = limb_span(z.data, z.len());

  if (y.len() != 0) {
    limb y0 = y[0];
    FASTFLOAT_TRY(small_mul(x, y0));
    for (size_t index = 1; index < y.len(); index++) {
      limb yi = y[index];
      stackvec<size> zi;
      if (yi != 0) {
        // re-use the same buffer throughout
        zi.set_len(0);
        FASTFLOAT_TRY(zi.try_extend(zs));
        FASTFLOAT_TRY(small_mul(zi, yi));
        limb_span zis = limb_span(zi.data, zi.len());
        FASTFLOAT_TRY(large_add_from(x, zis, index));
      }
    }
  }

  x.normalize();
  return true;
}

// grade-school multiplication algorithm
template <uint16_t size>
FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec<size> &x, limb_span y) noexcept {
  if (y.len() == 1) {
    FASTFLOAT_TRY(small_mul(x, y[0]));
  } else {
    FASTFLOAT_TRY(long_mul(x, y));
  }
  return true;
}

template <typename = void> struct pow5_tables {
  static constexpr uint32_t large_step = 135;
  static constexpr uint64_t small_power_of_5[] = {
      1UL,
      5UL,
      25UL,
      125UL,
      625UL,
      3125UL,
      15625UL,
      78125UL,
      390625UL,
      1953125UL,
      9765625UL,
      48828125UL,
      244140625UL,
      1220703125UL,
      6103515625UL,
      30517578125UL,
      152587890625UL,
      762939453125UL,
      3814697265625UL,
      19073486328125UL,
      95367431640625UL,
      476837158203125UL,
      2384185791015625UL,
      11920928955078125UL,
      59604644775390625UL,
      298023223876953125UL,
      1490116119384765625UL,
      7450580596923828125UL,
  };
#ifdef FASTFLOAT_64BIT_LIMB
  constexpr static limb large_power_of_5[] = {
      1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL,
      10482974169319127550UL, 198276706040285095UL};
#else
  constexpr static limb large_power_of_5[] = {
      4279965485U, 329373468U,  4020270615U, 2137533757U, 4287402176U,
      1057042919U, 1071430142U, 2440757623U, 381945767U,  46164893U};
#endif
};

#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE

template <typename T> constexpr uint32_t pow5_tables<T>::large_step;

template <typename T> constexpr uint64_t pow5_tables<T>::small_power_of_5[];

template <typename T> constexpr limb pow5_tables<T>::large_power_of_5[];

#endif

// big integer type. implements a small subset of big integer
// arithmetic, using simple algorithms since asymptotically
// faster algorithms are slower for a small number of limbs.
// all operations assume the big-integer is normalized.
struct bigint : pow5_tables<> {
  // storage of the limbs, in little-endian order.
  stackvec<bigint_limbs> vec;

  FASTFLOAT_CONSTEXPR20 bigint() : vec() {}

  bigint(bigint const &) = delete;
  bigint &operator=(bigint const &) = delete;
  bigint(bigint &&) = delete;
  bigint &operator=(bigint &&other) = delete;

  FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) : vec() {
#ifdef FASTFLOAT_64BIT_LIMB
    vec.push_unchecked(value);
#else
    vec.push_unchecked(uint32_t(value));
    vec.push_unchecked(uint32_t(value >> 32));
#endif
    vec.normalize();
  }

  // get the high 64 bits from the vector, and if bits were truncated.
  // this is to get the significant digits for the float.
  FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool &truncated) const noexcept {
#ifdef FASTFLOAT_64BIT_LIMB
    if (vec.len() == 0) {
      return empty_hi64(truncated);
    } else if (vec.len() == 1) {
      return uint64_hi64(vec.rindex(0), truncated);
    } else {
      uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated);
      truncated |= vec.nonzero(2);
      return result;
    }
#else
    if (vec.len() == 0) {
      return empty_hi64(truncated);
    } else if (vec.len() == 1) {
      return uint32_hi64(vec.rindex(0), truncated);
    } else if (vec.len() == 2) {
      return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated);
    } else {
      uint64_t result =
          uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated);
      truncated |= vec.nonzero(3);
      return result;
    }
#endif
  }

  // compare two big integers, returning the large value.
  // assumes both are normalized. if the return value is
  // negative, other is larger, if the return value is
  // positive, this is larger, otherwise they are equal.
  // the limbs are stored in little-endian order, so we
  // must compare the limbs in ever order.
  FASTFLOAT_CONSTEXPR20 int compare(bigint const &other) const noexcept {
    if (vec.len() > other.vec.len()) {
      return 1;
    } else if (vec.len() < other.vec.len()) {
      return -1;
    } else {
      for (size_t index = vec.len(); index > 0; index--) {
        limb xi = vec[index - 1];
        limb yi = other.vec[index - 1];
        if (xi > yi) {
          return 1;
        } else if (xi < yi) {
          return -1;
        }
      }
      return 0;
    }
  }

  // shift left each limb n bits, carrying over to the new limb
  // returns true if we were able to shift all the digits.
  FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept {
    // Internally, for each item, we shift left by n, and add the previous
    // right shifted limb-bits.
    // For example, we transform (for u8) shifted left 2, to:
    //      b10100100 b01000010
    //      b10 b10010001 b00001000
    FASTFLOAT_DEBUG_ASSERT(n != 0);
    FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8);

    size_t shl = n;
    size_t shr = limb_bits - shl;
    limb prev = 0;
    for (size_t index = 0; index < vec.len(); index++) {
      limb xi = vec[index];
      vec[index] = (xi << shl) | (prev >> shr);
      prev = xi;
    }

    limb carry = prev >> shr;
    if (carry != 0) {
      return vec.try_push(carry);
    }
    return true;
  }

  // move the limbs left by `n` limbs.
  FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept {
    FASTFLOAT_DEBUG_ASSERT(n != 0);
    if (n + vec.len() > vec.capacity()) {
      return false;
    } else if (!vec.is_empty()) {
      // move limbs
      limb *dst = vec.data + n;
      limb const *src = vec.data;
      std::copy_backward(src, src + vec.len(), dst + vec.len());
      // fill in empty limbs
      limb *first = vec.data;
      limb *last = first + n;
      ::std::fill(first, last, 0);
      vec.set_len(n + vec.len());
      return true;
    } else {
      return true;
    }
  }

  // move the limbs left by `n` bits.
  FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept {
    size_t rem = n % limb_bits;
    size_t div = n / limb_bits;
    if (rem != 0) {
      FASTFLOAT_TRY(shl_bits(rem));
    }
    if (div != 0) {
      FASTFLOAT_TRY(shl_limbs(div));
    }
    return true;
  }

  // get the number of leading zeros in the bigint.
  FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept {
    if (vec.is_empty()) {
      return 0;
    } else {
#ifdef FASTFLOAT_64BIT_LIMB
      return leading_zeroes(vec.rindex(0));
#else
      // no use defining a specialized leading_zeroes for a 32-bit type.
      uint64_t r0 = vec.rindex(0);
      return leading_zeroes(r0 << 32);
#endif
    }
  }

  // get the number of bits in the bigint.
  FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept {
    int lz = ctlz();
    return int(limb_bits * vec.len()) - lz;
  }

  FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); }

  FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); }

  // multiply as if by 2 raised to a power.
  FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); }

  // multiply as if by 5 raised to a power.
  FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept {
    // multiply by a power of 5
    size_t large_length = sizeof(large_power_of_5) / sizeof(limb);
    limb_span large = limb_span(large_power_of_5, large_length);
    while (exp >= large_step) {
      FASTFLOAT_TRY(large_mul(vec, large));
      exp -= large_step;
    }
#ifdef FASTFLOAT_64BIT_LIMB
    uint32_t small_step = 27;
    limb max_native = 7450580596923828125UL;
#else
    uint32_t small_step = 13;
    limb max_native = 1220703125U;
#endif
    while (exp >= small_step) {
      FASTFLOAT_TRY(small_mul(vec, max_native));
      exp -= small_step;
    }
    if (exp != 0) {
      // Work around clang bug https://godbolt.org/z/zedh7rrhc
      // This is similar to https://github.com/llvm/llvm-project/issues/47746,
      // except the workaround described there don't work here
      FASTFLOAT_TRY(small_mul(
          vec, limb(((void)small_power_of_5[0], small_power_of_5[exp]))));
    }

    return true;
  }

  // multiply as if by 10 raised to a power.
  FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept {
    FASTFLOAT_TRY(pow5(exp));
    return pow2(exp);
  }
};

} // namespace fast_float

#endif

Coverage Report

Created: 2025-08-03 06:32

Line	Count	Source (jump to first uncovered line)
1		#ifndef FASTFLOAT_BIGINT_H
2		#define FASTFLOAT_BIGINT_H
3
4		#include <algorithm>
5		#include <cstdint>
6		#include <climits>
7		#include <cstring>
8
9		#include "float_common.h"
10
11		namespace fast_float {
12
13		// the limb width: we want efficient multiplication of double the bits in
14		// limb, or for 64-bit limbs, at least 64-bit multiplication where we can
15		// extract the high and low parts efficiently. this is every 64-bit
16		// architecture except for sparc, which emulates 128-bit multiplication.
17		// we might have platforms where `CHAR_BIT` is not 8, so let's avoid
18		// doing `8 * sizeof(limb)`.
19		#if defined(FASTFLOAT_64BIT) && !defined(__sparc)
20		#define FASTFLOAT_64BIT_LIMB 1
21		typedef uint64_t limb;
22		constexpr size_t limb_bits = 64;
23		#else
24		#define FASTFLOAT_32BIT_LIMB
25		typedef uint32_t limb;
26		constexpr size_t limb_bits = 32;
27		#endif
28
29		typedef span<limb> limb_span;
30
31		// number of bits in a bigint. this needs to be at least the number
32		// of bits required to store the largest bigint, which is
33		// `log2(10(digits + max_exp))`, or `log2(10(767 + 342))`, or
34		// ~3600 bits, so we round to 4000.
35		constexpr size_t bigint_bits = 4000;
36		constexpr size_t bigint_limbs = bigint_bits / limb_bits;
37
38		// vector-like type that is allocated on the stack. the entire
39		// buffer is pre-allocated, and only the length changes.
40		template <uint16_t size> struct stackvec {
41		limb data[size];
42		// we never need more than 150 limbs
43		uint16_t length{0};
44
45	5.16k	stackvec() = default;
46		stackvec(stackvec const &) = delete;
47		stackvec &operator=(stackvec const &) = delete;
48		stackvec(stackvec &&) = delete;
49		stackvec &operator=(stackvec &&other) = delete;
50
51		// create stack vector from existing limb span.
52	459	FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) {
53	459	FASTFLOAT_ASSERT(try_extend(s));
54	459	}
55
56	84.0k	FASTFLOAT_CONSTEXPR14 limb &operator[](size_t index) noexcept {
57	84.0k	FASTFLOAT_DEBUG_ASSERT(index < length);
58	84.0k	return data[index];
59	84.0k	}
60
61	5.18k	FASTFLOAT_CONSTEXPR14 const limb &operator[](size_t index) const noexcept {
62	5.18k	FASTFLOAT_DEBUG_ASSERT(index < length);
63	5.18k	return data[index];
64	5.18k	}
65
66		// index from the end of the container
67	4.67k	FASTFLOAT_CONSTEXPR14 const limb &rindex(size_t index) const noexcept {
68	4.67k	FASTFLOAT_DEBUG_ASSERT(index < length);
69	4.67k	size_t rindex = length - index - 1;
70	4.67k	return data[rindex];
71	4.67k	}
72
73		// set the length, without bounds checking.
74	6.55k	FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept {
75	6.55k	length = uint16_t(len);
76	6.55k	}
77
78	90.7k	constexpr size_t len() const noexcept { return length; }
79
80	1.61k	constexpr bool is_empty() const noexcept { return length == 0; }
81
82	14.9k	constexpr size_t capacity() const noexcept { return size; }
83
84		// append item to vector, without bounds checking
85	11.4k	FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept {
86	11.4k	data[length] = value;
87	11.4k	length++;
88	11.4k	}
89
90		// append item to vector, returning if item was added
91	10.2k	FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept {
92	10.2k	if (len() < capacity()) {
93	10.2k	push_unchecked(value);
94	10.2k	return true;
95	10.2k	} else {
96	0	return false;
97	0	}
98	10.2k	}
99
100		// add items to the vector, from a span, without bounds checking
101	2.29k	FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept {
102	2.29k	limb *ptr = data + length;
103	2.29k	std::copy_n(s.ptr, s.len(), ptr);
104	2.29k	set_len(len() + s.len());
105	2.29k	}
106
107		// try to add items to the vector, returning if items were added
108	2.29k	FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept {
109	2.29k	if (len() + s.len() <= capacity()) {
110	2.29k	extend_unchecked(s);
111	2.29k	return true;
112	2.29k	} else {
113	0	return false;
114	0	}
115	2.29k	}
116
117		// resize the vector, without bounds checking
118		// if the new size is longer than the vector, assign value to each
119		// appended item.
120		FASTFLOAT_CONSTEXPR20
121	1.74k	void resize_unchecked(size_t new_len, limb value) noexcept {
122	1.74k	if (new_len > len()) {
123	1.74k	size_t count = new_len - len();
124	1.74k	limb *first = data + len();
125	1.74k	limb *last = first + count;
126	1.74k	::std::fill(first, last, value);
127	1.74k	set_len(new_len);
128	1.74k	} else {
129	0	set_len(new_len);
130	0	}
131	1.74k	}
132
133		// try to resize the vector, returning if the vector was resized.
134	1.74k	FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept {
135	1.74k	if (new_len > capacity()) {
136	0	return false;
137	1.74k	} else {
138	1.74k	resize_unchecked(new_len, value);
139	1.74k	return true;
140	1.74k	}
141	1.74k	}
142
143		// check if any limbs are non-zero after the given index.
144		// this needs to be done in reverse order, since the index
145		// is relative to the most significant limbs.
146	712	FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept {
147	718	while (index < len()) {
148	430	if (rindex(index) != 0) {
149	424	return true;
150	424	}
151	6	index++;
152	6	}
153	288	return false;
154	712	}
155
156		// normalize the big integer, so most-significant zero limbs are removed.
157	1.65k	FASTFLOAT_CONSTEXPR14 void normalize() noexcept {
158	1.65k	while (len() > 0 && rindex(0) == 0) {
159	0	length--;
160	0	}
161	1.65k	}
162		};
163
164		fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t
165	0	empty_hi64(bool &truncated) noexcept {
166	0	truncated = false;
167	0	return 0;
168	0	}
169
170		fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
171	230	uint64_hi64(uint64_t r0, bool &truncated) noexcept {
172	230	truncated = false;
173	230	int shl = leading_zeroes(r0);
174	230	return r0 << shl;
175	230	}
176
177		fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
178	712	uint64_hi64(uint64_t r0, uint64_t r1, bool &truncated) noexcept {
179	712	int shl = leading_zeroes(r0);
180	712	if (shl == 0) {
181	189	truncated = r1 != 0;
182	189	return r0;
183	523	} else {
184	523	int shr = 64 - shl;
185	523	truncated = (r1 << shl) != 0;
186	523	return (r0 << shl) \| (r1 >> shr);
187	523	}
188	712	}
189
190		fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
191	0	uint32_hi64(uint32_t r0, bool &truncated) noexcept {
192	0	return uint64_hi64(r0, truncated);
193	0	}
194
195		fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
196	0	uint32_hi64(uint32_t r0, uint32_t r1, bool &truncated) noexcept {
197	0	uint64_t x0 = r0;
198	0	uint64_t x1 = r1;
199	0	return uint64_hi64((x0 << 32) \| x1, truncated);
200	0	}
201
202		fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
203	0	uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool &truncated) noexcept {
204	0	uint64_t x0 = r0;
205	0	uint64_t x1 = r1;
206	0	uint64_t x2 = r2;
207	0	return uint64_hi64(x0, (x1 << 32) \| x2, truncated);
208	0	}
209
210		// add two small integers, checking for overflow.
211		// we want an efficient operation. for msvc, where
212		// we don't have built-in intrinsics, this is still
213		// pretty fast.
214		fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb
215	12.0k	scalar_add(limb x, limb y, bool &overflow) noexcept {
216	12.0k	limb z;
217		// gcc and clang
218	12.0k	#if defined(__has_builtin)
219	12.0k	#if __has_builtin(__builtin_add_overflow)
220	12.0k	if (!cpp20_and_in_constexpr()) {
221	12.0k	overflow = __builtin_add_overflow(x, y, &z);
222	12.0k	return z;
223	12.0k	}
224	0	#endif
225	0	#endif
226
227		// generic, this still optimizes correctly on MSVC.
228	0	z = x + y;
229	0	overflow = z < x;
230	0	return z;
231	12.0k	}
232
233		// multiply two small integers, getting both the high and low bits.
234		fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb
235	26.6k	scalar_mul(limb x, limb y, limb &carry) noexcept {
236	26.6k	#ifdef FASTFLOAT_64BIT_LIMB
237	26.6k	#if defined(__SIZEOF_INT128__)
238		// GCC and clang both define it as an extension.
239	26.6k	__uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry);
240	26.6k	carry = limb(z >> limb_bits);
241	26.6k	return limb(z);
242		#else
243		// fallback, no native 128-bit integer multiplication with carry.
244		// on msvc, this optimizes identically, somehow.
245		value128 z = full_multiplication(x, y);
246		bool overflow;
247		z.low = scalar_add(z.low, carry, overflow);
248		z.high += uint64_t(overflow); // cannot overflow
249		carry = z.high;
250		return z.low;
251		#endif
252		#else
253		uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry);
254		carry = limb(z >> limb_bits);
255		return limb(z);
256		#endif
257	26.6k	}
258
259		// add scalar value to bigint starting from offset.
260		// used in grade school multiplication
261		template <uint16_t size>
262		inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec<size> &vec, limb y,
263	5.84k	size_t start) noexcept {
264	5.84k	size_t index = start;
265	5.84k	limb carry = y;
266	5.84k	bool overflow;
267	9.07k	while (carry != 0 && index < vec.len()) {
268	3.23k	vec[index] = scalar_add(vec[index], carry, overflow);
269	3.23k	carry = limb(overflow);
270	3.23k	index += 1;
271	3.23k	}
272	5.84k	if (carry != 0) {
273	2.14k	FASTFLOAT_TRY(vec.try_push(carry));
274	2.14k	}
275	5.84k	return true;
276	5.84k	}
277
278		// add scalar value to bigint.
279		template <uint16_t size>
280		fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
281	5.84k	small_add(stackvec<size> &vec, limb y) noexcept {
282	5.84k	return small_add_from(vec, y, 0);
283	5.84k	}
284
285		// multiply bigint by scalar value.
286		template <uint16_t size>
287		inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,
288	11.5k	limb y) noexcept {
289	11.5k	limb carry = 0;
290	38.1k	for (size_t index = 0; index < vec.len(); index++) {
291	26.6k	vec[index] = scalar_mul(vec[index], y, carry);
292	26.6k	}
293	11.5k	if (carry != 0) {
294	7.46k	FASTFLOAT_TRY(vec.try_push(carry));
295	7.46k	}
296	11.5k	return true;
297	11.5k	}
298
299		// add bigint to bigint starting from index.
300		// used in grade school multiplication
301		template <uint16_t size>
302		FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
303	1.83k	size_t start) noexcept {
304		// the effective x buffer is from `xstart..x.len()`, so exit early
305		// if we can't get that current range.
306	1.83k	if (x.len() < start \|\| y.len() > x.len() - start) {
307	1.74k	FASTFLOAT_TRY(x.try_resize(y.len() + start, 0));
308	1.74k	}
309
310	1.83k	bool carry = false;
311	8.85k	for (size_t index = 0; index < y.len(); index++) {
312	7.02k	limb xi = x[index + start];
313	7.02k	limb yi = y[index];
314	7.02k	bool c1 = false;
315	7.02k	bool c2 = false;
316	7.02k	xi = scalar_add(xi, yi, c1);
317	7.02k	if (carry) {
318	1.83k	xi = scalar_add(xi, 1, c2);
319	1.83k	}
320	7.02k	x[index + start] = xi;
321	7.02k	carry = c1 \| c2;
322	7.02k	}
323
324		// handle overflow
325	1.83k	if (carry) {
326	1	FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start));
327	1	}
328	1.83k	return true;
329	1.83k	}
330
331		// add bigint to bigint.
332		template <uint16_t size>
333		fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
334		large_add_from(stackvec<size> &x, limb_span y) noexcept {
335		return large_add_from(x, y, 0);
336		}
337
338		// grade-school multiplication algorithm
339		template <uint16_t size>
340	459	FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
341	459	limb_span xs = limb_span(x.data, x.len());
342	459	stackvec<size> z(xs);
343	459	limb_span zs = limb_span(z.data, z.len());
344
345	459	if (y.len() != 0) {
346	459	limb y0 = y[0];
347	459	FASTFLOAT_TRY(small_mul(x, y0));
348	2.29k	for (size_t index = 1; index < y.len(); index++) {
349	1.83k	limb yi = y[index];
350	1.83k	stackvec<size> zi;
351	1.83k	if (yi != 0) {
352		// re-use the same buffer throughout
353	1.83k	zi.set_len(0);
354	1.83k	FASTFLOAT_TRY(zi.try_extend(zs));
355	1.83k	FASTFLOAT_TRY(small_mul(zi, yi));
356	1.83k	limb_span zis = limb_span(zi.data, zi.len());
357	1.83k	FASTFLOAT_TRY(large_add_from(x, zis, index));
358	1.83k	}
359	1.83k	}
360	459	}
361
362	459	x.normalize();
363	459	return true;
364	459	}
365
366		// grade-school multiplication algorithm
367		template <uint16_t size>
368	459	FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec<size> &x, limb_span y) noexcept {
369	459	if (y.len() == 1) {
370	0	FASTFLOAT_TRY(small_mul(x, y[0]));
371	459	} else {
372	459	FASTFLOAT_TRY(long_mul(x, y));
373	459	}
374	459	return true;
375	459	}
376
377		template <typename = void> struct pow5_tables {
378		static constexpr uint32_t large_step = 135;
379		static constexpr uint64_t small_power_of_5[] = {
380		1UL,
381		5UL,
382		25UL,
383		125UL,
384		625UL,
385		3125UL,
386		15625UL,
387		78125UL,
388		390625UL,
389		1953125UL,
390		9765625UL,
391		48828125UL,
392		244140625UL,
393		1220703125UL,
394		6103515625UL,
395		30517578125UL,
396		152587890625UL,
397		762939453125UL,
398		3814697265625UL,
399		19073486328125UL,
400		95367431640625UL,
401		476837158203125UL,
402		2384185791015625UL,
403		11920928955078125UL,
404		59604644775390625UL,
405		298023223876953125UL,
406		1490116119384765625UL,
407		7450580596923828125UL,
408		};
409		#ifdef FASTFLOAT_64BIT_LIMB
410		constexpr static limb large_power_of_5[] = {
411		1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL,
412		10482974169319127550UL, 198276706040285095UL};
413		#else
414		constexpr static limb large_power_of_5[] = {
415		4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U,
416		1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U};
417		#endif
418		};
419
420		#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
421
422		template <typename T> constexpr uint32_t pow5_tables<T>::large_step;
423
424		template <typename T> constexpr uint64_t pow5_tables<T>::small_power_of_5[];
425
426		template <typename T> constexpr limb pow5_tables<T>::large_power_of_5[];
427
428		#endif
429
430		// big integer type. implements a small subset of big integer
431		// arithmetic, using simple algorithms since asymptotically
432		// faster algorithms are slower for a small number of limbs.
433		// all operations assume the big-integer is normalized.
434		struct bigint : pow5_tables<> {
435		// storage of the limbs, in little-endian order.
436		stackvec<bigint_limbs> vec;
437
438	2.13k	FASTFLOAT_CONSTEXPR20 bigint() : vec() {}
439
440		bigint(bigint const &) = delete;
441		bigint &operator=(bigint const &) = delete;
442		bigint(bigint &&) = delete;
443		bigint &operator=(bigint &&other) = delete;
444
445	1.19k	FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) : vec() {
446	1.19k	#ifdef FASTFLOAT_64BIT_LIMB
447	1.19k	vec.push_unchecked(value);
448		#else
449		vec.push_unchecked(uint32_t(value));
450		vec.push_unchecked(uint32_t(value >> 32));
451		#endif
452	1.19k	vec.normalize();
453	1.19k	}
454
455		// get the high 64 bits from the vector, and if bits were truncated.
456		// this is to get the significant digits for the float.
457	942	FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool &truncated) const noexcept {
458	942	#ifdef FASTFLOAT_64BIT_LIMB
459	942	if (vec.len() == 0) {
460	0	return empty_hi64(truncated);
461	942	} else if (vec.len() == 1) {
462	230	return uint64_hi64(vec.rindex(0), truncated);
463	712	} else {
464	712	uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated);
465	712	truncated \|= vec.nonzero(2);
466	712	return result;
467	712	}
468		#else
469		if (vec.len() == 0) {
470		return empty_hi64(truncated);
471		} else if (vec.len() == 1) {
472		return uint32_hi64(vec.rindex(0), truncated);
473		} else if (vec.len() == 2) {
474		return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated);
475		} else {
476		uint64_t result =
477		uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated);
478		truncated \|= vec.nonzero(3);
479		return result;
480		}
481		#endif
482	942	}
483
484		// compare two big integers, returning the large value.
485		// assumes both are normalized. if the return value is
486		// negative, other is larger, if the return value is
487		// positive, this is larger, otherwise they are equal.
488		// the limbs are stored in little-endian order, so we
489		// must compare the limbs in ever order.
490	1.19k	FASTFLOAT_CONSTEXPR20 int compare(bigint const &other) const noexcept {
491	1.19k	if (vec.len() > other.vec.len()) {
492	0	return 1;
493	1.19k	} else if (vec.len() < other.vec.len()) {
494	0	return -1;
495	1.19k	} else {
496	2.63k	for (size_t index = vec.len(); index > 0; index--) {
497	2.59k	limb xi = vec[index - 1];
498	2.59k	limb yi = other.vec[index - 1];
499	2.59k	if (xi > yi) {
500	585	return 1;
501	2.00k	} else if (xi < yi) {
502	564	return -1;
503	564	}
504	2.59k	}
505	43	return 0;
506	1.19k	}
507	1.19k	}
508
509		// shift left each limb n bits, carrying over to the new limb
510		// returns true if we were able to shift all the digits.
511	1.67k	FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept {
512		// Internally, for each item, we shift left by n, and add the previous
513		// right shifted limb-bits.
514		// For example, we transform (for u8) shifted left 2, to:
515		// b10100100 b01000010
516		// b10 b10010001 b00001000
517	1.67k	FASTFLOAT_DEBUG_ASSERT(n != 0);
518	1.67k	FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8);
519
520	1.67k	size_t shl = n;
521	1.67k	size_t shr = limb_bits - shl;
522	1.67k	limb prev = 0;
523	6.80k	for (size_t index = 0; index < vec.len(); index++) {
524	5.12k	limb xi = vec[index];
525	5.12k	vec[index] = (xi << shl) \| (prev >> shr);
526	5.12k	prev = xi;
527	5.12k	}
528
529	1.67k	limb carry = prev >> shr;
530	1.67k	if (carry != 0) {
531	647	return vec.try_push(carry);
532	647	}
533	1.02k	return true;
534	1.67k	}
535
536		// move the limbs left by `n` limbs.
537	675	FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept {
538	675	FASTFLOAT_DEBUG_ASSERT(n != 0);
539	675	if (n + vec.len() > vec.capacity()) {
540	0	return false;
541	675	} else if (!vec.is_empty()) {
542		// move limbs
543	675	limb *dst = vec.data + n;
544	675	limb const *src = vec.data;
545	675	std::copy_backward(src, src + vec.len(), dst + vec.len());
546		// fill in empty limbs
547	675	limb *first = vec.data;
548	675	limb *last = first + n;
549	675	::std::fill(first, last, 0);
550	675	vec.set_len(n + vec.len());
551	675	return true;
552	675	} else {
553	0	return true;
554	0	}
555	675	}
556
557		// move the limbs left by `n` bits.
558	2.13k	FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept {
559	2.13k	size_t rem = n % limb_bits;
560	2.13k	size_t div = n / limb_bits;
561	2.13k	if (rem != 0) {
562	1.67k	FASTFLOAT_TRY(shl_bits(rem));
563	1.67k	}
564	2.13k	if (div != 0) {
565	675	FASTFLOAT_TRY(shl_limbs(div));
566	675	}
567	2.13k	return true;
568	2.13k	}
569
570		// get the number of leading zeros in the bigint.
571	942	FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept {
572	942	if (vec.is_empty()) {
573	0	return 0;
574	942	} else {
575	942	#ifdef FASTFLOAT_64BIT_LIMB
576	942	return leading_zeroes(vec.rindex(0));
577		#else
578		// no use defining a specialized leading_zeroes for a 32-bit type.
579		uint64_t r0 = vec.rindex(0);
580		return leading_zeroes(r0 << 32);
581		#endif
582	942	}
583	942	}
584
585		// get the number of bits in the bigint.
586	942	FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept {
587	942	int lz = ctlz();
588	942	return int(limb_bits * vec.len()) - lz;
589	942	}
590
591	5.84k	FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); }
592
593	5.84k	FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); }
594
595		// multiply as if by 2 raised to a power.
596	2.13k	FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); }
597
598		// multiply as if by 5 raised to a power.
599	2.13k	FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept {
600		// multiply by a power of 5
601	2.13k	size_t large_length = sizeof(large_power_of_5) / sizeof(limb);
602	2.13k	limb_span large = limb_span(large_power_of_5, large_length);
603	2.59k	while (exp >= large_step) {
604	459	FASTFLOAT_TRY(large_mul(vec, large));
605	459	exp -= large_step;
606	459	}
607	2.13k	#ifdef FASTFLOAT_64BIT_LIMB
608	2.13k	uint32_t small_step = 27;
609	2.13k	limb max_native = 7450580596923828125UL;
610		#else
611		uint32_t small_step = 13;
612		limb max_native = 1220703125U;
613		#endif
614	3.95k	while (exp >= small_step) {
615	1.81k	FASTFLOAT_TRY(small_mul(vec, max_native));
616	1.81k	exp -= small_step;
617	1.81k	}
618	2.13k	if (exp != 0) {
619		// Work around clang bug https://godbolt.org/z/zedh7rrhc
620		// This is similar to https://github.com/llvm/llvm-project/issues/47746,
621		// except the workaround described there don't work here
622	1.60k	FASTFLOAT_TRY(small_mul(
623	1.60k	vec, limb(((void)small_power_of_5[0], small_power_of_5[exp]))));
624	1.60k	}
625
626	2.13k	return true;
627	2.13k	}
628
629		// multiply as if by 10 raised to a power.
630	942	FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept {
631	942	FASTFLOAT_TRY(pow5(exp));
632	942	return pow2(exp);
633	942	}
634		};
635
636		} // namespace fast_float
637
638		#endif