/src/simdjson/src/from_chars.cpp

Source (jump to first uncovered line)
#include <limits>
namespace simdjson {
namespace internal {

/**
 * The code in the internal::from_chars function is meant to handle the floating-point number parsing
 * when we have more than 19 digits in the decimal mantissa. This should only be seen
 * in adversarial scenarios: we do not expect production systems to even produce
 * such floating-point numbers.
 *
 * The parser is based on work by Nigel Tao (at https://github.com/google/wuffs/)
 * who credits Ken Thompson for the design (via a reference to the Go source
 * code). See
 * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c
 * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c
 * It is probably not very fast but it is a fallback that should almost never be
 * called in real life. Google Wuffs is published under APL 2.0.
 **/

namespace {
constexpr uint32_t max_digits = 768;
constexpr int32_t decimal_point_range = 2047;
} // namespace

struct adjusted_mantissa {
  uint64_t mantissa;
  int power2;
  adjusted_mantissa() : mantissa(0), power2(0) {}
};

struct decimal {
  uint32_t num_digits;
  int32_t decimal_point;
  bool negative;
  bool truncated;
  uint8_t digits[max_digits];
};

template <typename T> struct binary_format {
  static constexpr int mantissa_explicit_bits();
  static constexpr int minimum_exponent();
  static constexpr int infinite_power();
  static constexpr int sign_index();
};

template <> constexpr int binary_format<double>::mantissa_explicit_bits() {
  return 52;
}

template <> constexpr int binary_format<double>::minimum_exponent() {
  return -1023;
}
template <> constexpr int binary_format<double>::infinite_power() {
  return 0x7FF;
}

template <> constexpr int binary_format<double>::sign_index() { return 63; }

bool is_integer(char c)  noexcept  { return (c >= '0' && c <= '9'); }

// This should always succeed since it follows a call to parse_number.
decimal parse_decimal(const char *&p) noexcept {
  decimal answer;
  answer.num_digits = 0;
  answer.decimal_point = 0;
  answer.truncated = false;
  answer.negative = (*p == '-');
  if ((*p == '-') || (*p == '+')) {
    ++p;
  }

  while (*p == '0') {
    ++p;
  }
  while (is_integer(*p)) {
    if (answer.num_digits < max_digits) {
      answer.digits[answer.num_digits] = uint8_t(*p - '0');
    }
    answer.num_digits++;
    ++p;
  }
  if (*p == '.') {
    ++p;
    const char *first_after_period = p;
    // if we have not yet encountered a zero, we have to skip it as well
    if (answer.num_digits == 0) {
      // skip zeros
      while (*p == '0') {
        ++p;
      }
    }
    while (is_integer(*p)) {
      if (answer.num_digits < max_digits) {
        answer.digits[answer.num_digits] = uint8_t(*p - '0');
      }
      answer.num_digits++;
      ++p;
    }
    answer.decimal_point = int32_t(first_after_period - p);
  }
  if(answer.num_digits > 0) {
    const char *preverse = p - 1;
    int32_t trailing_zeros = 0;
    while ((*preverse == '0') || (*preverse == '.')) {
      if(*preverse == '0') { trailing_zeros++; };
      --preverse;
    }
    answer.decimal_point += int32_t(answer.num_digits);
    answer.num_digits -= uint32_t(trailing_zeros);
  }
  if(answer.num_digits > max_digits ) {
    answer.num_digits = max_digits;
    answer.truncated = true;
  }
  if (('e' == *p) || ('E' == *p)) {
    ++p;
    bool neg_exp = false;
    if ('-' == *p) {
      neg_exp = true;
      ++p;
    } else if ('+' == *p) {
      ++p;
    }
    int32_t exp_number = 0; // exponential part
    while (is_integer(*p)) {
      uint8_t digit = uint8_t(*p - '0');
      if (exp_number < 0x10000) {
        exp_number = 10 * exp_number + digit;
      }
      ++p;
    }
    answer.decimal_point += (neg_exp ? -exp_number : exp_number);
  }
  return answer;
}

// This should always succeed since it follows a call to parse_number.
// Will not read at or beyond the "end" pointer.
decimal parse_decimal(const char *&p, const char * end) noexcept {
  decimal answer;
  answer.num_digits = 0;
  answer.decimal_point = 0;
  answer.truncated = false;
  if(p == end) { return answer; } // should never happen
  answer.negative = (*p == '-');
  if ((*p == '-') || (*p == '+')) {
    ++p;
  }

  while ((p != end) && (*p == '0')) {
    ++p;
  }
  while ((p != end) && is_integer(*p)) {
    if (answer.num_digits < max_digits) {
      answer.digits[answer.num_digits] = uint8_t(*p - '0');
    }
    answer.num_digits++;
    ++p;
  }
  if ((p != end) && (*p == '.')) {
    ++p;
    if(p == end) { return answer; } // should never happen
    const char *first_after_period = p;
    // if we have not yet encountered a zero, we have to skip it as well
    if (answer.num_digits == 0) {
      // skip zeros
      while (*p == '0') {
        ++p;
      }
    }
    while ((p != end) && is_integer(*p)) {
      if (answer.num_digits < max_digits) {
        answer.digits[answer.num_digits] = uint8_t(*p - '0');
      }
      answer.num_digits++;
      ++p;
    }
    answer.decimal_point = int32_t(first_after_period - p);
  }
  if(answer.num_digits > 0) {
    const char *preverse = p - 1;
    int32_t trailing_zeros = 0;
    while ((*preverse == '0') || (*preverse == '.')) {
      if(*preverse == '0') { trailing_zeros++; };
      --preverse;
    }
    answer.decimal_point += int32_t(answer.num_digits);
    answer.num_digits -= uint32_t(trailing_zeros);
  }
  if(answer.num_digits > max_digits ) {
    answer.num_digits = max_digits;
    answer.truncated = true;
  }
  if ((p != end) && (('e' == *p) || ('E' == *p))) {
    ++p;
    if(p == end) { return answer; } // should never happen
    bool neg_exp = false;
    if ('-' == *p) {
      neg_exp = true;
      ++p;
    } else if ('+' == *p) {
      ++p;
    }
    int32_t exp_number = 0; // exponential part
    while ((p != end) && is_integer(*p)) {
      uint8_t digit = uint8_t(*p - '0');
      if (exp_number < 0x10000) {
        exp_number = 10 * exp_number + digit;
      }
      ++p;
    }
    answer.decimal_point += (neg_exp ? -exp_number : exp_number);
  }
  return answer;
}

namespace {

// remove all final zeroes
inline void trim(decimal &h) {
  while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) {
    h.num_digits--;
  }
}

uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) {
  shift &= 63;
  const static uint16_t number_of_digits_decimal_left_shift_table[65] = {
      0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817,
      0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067,
      0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF,
      0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0,
      0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA,
      0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC,
      0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C,
      0x051C, 0x051C,
  };
  uint32_t x_a = number_of_digits_decimal_left_shift_table[shift];
  uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1];
  uint32_t num_new_digits = x_a >> 11;
  uint32_t pow5_a = 0x7FF & x_a;
  uint32_t pow5_b = 0x7FF & x_b;
  const static uint8_t
      number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = {
          5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5,
          3, 9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8,
          2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2,
          5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1,
          5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5,
          3, 8, 1, 4, 6, 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2,
          8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3,
          7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5,
          6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6,
          0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3,
          8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7,
          6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2,
          5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8,
          6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3,
          2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1,
          2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6,
          4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3,
          2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6,
          6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3,
          8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2, 5, 1, 4, 5,
          5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, 6, 2, 5,
          7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3,
          1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6,
          6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6,
          4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7,
          2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7,
          3, 5, 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5,
          2, 2, 7, 3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5,
          9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0,
          2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8,
          8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5,
          2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4,
          9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2,
          0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5,
          4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7,
          5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9,
          2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5,
          6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9,
          4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3,
          2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8,
          9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2,
          3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1,
          3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1,
          1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3,
          1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2,
          3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5, 4, 1,
          0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, 3,
          5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1,
          3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3,
          9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3,
          9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6,
          7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3,
          6, 1, 4, 1, 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7,
          6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9,
          4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2,
          5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9,
          6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5,
      };
  const uint8_t *pow5 =
      &number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a];
  uint32_t i = 0;
  uint32_t n = pow5_b - pow5_a;
  for (; i < n; i++) {
    if (i >= h.num_digits) {
      return num_new_digits - 1;
    } else if (h.digits[i] == pow5[i]) {
      continue;
    } else if (h.digits[i] < pow5[i]) {
      return num_new_digits - 1;
    } else {
      return num_new_digits;
    }
  }
  return num_new_digits;
}

} // end of anonymous namespace

uint64_t round(decimal &h) {
  if ((h.num_digits == 0) || (h.decimal_point < 0)) {
    return 0;
  } else if (h.decimal_point > 18) {
    return UINT64_MAX;
  }
  // at this point, we know that h.decimal_point >= 0
  uint32_t dp = uint32_t(h.decimal_point);
  uint64_t n = 0;
  for (uint32_t i = 0; i < dp; i++) {
    n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0);
  }
  bool round_up = false;
  if (dp < h.num_digits) {
    round_up = h.digits[dp] >= 5; // normally, we round up
    // but we may need to round to even!
    if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) {
      round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1]));
    }
  }
  if (round_up) {
    n++;
  }
  return n;
}

// computes h * 2^-shift
void decimal_left_shift(decimal &h, uint32_t shift) {
  if (h.num_digits == 0) {
    return;
  }
  uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift);
  int32_t read_index = int32_t(h.num_digits - 1);
  uint32_t write_index = h.num_digits - 1 + num_new_digits;
  uint64_t n = 0;

  while (read_index >= 0) {
    n += uint64_t(h.digits[read_index]) << shift;
    uint64_t quotient = n / 10;
    uint64_t remainder = n - (10 * quotient);
    if (write_index < max_digits) {
      h.digits[write_index] = uint8_t(remainder);
    } else if (remainder > 0) {
      h.truncated = true;
    }
    n = quotient;
    write_index--;
    read_index--;
  }
  while (n > 0) {
    uint64_t quotient = n / 10;
    uint64_t remainder = n - (10 * quotient);
    if (write_index < max_digits) {
      h.digits[write_index] = uint8_t(remainder);
    } else if (remainder > 0) {
      h.truncated = true;
    }
    n = quotient;
    write_index--;
  }
  h.num_digits += num_new_digits;
  if (h.num_digits > max_digits) {
    h.num_digits = max_digits;
  }
  h.decimal_point += int32_t(num_new_digits);
  trim(h);
}

// computes h * 2^shift
void decimal_right_shift(decimal &h, uint32_t shift) {
  uint32_t read_index = 0;
  uint32_t write_index = 0;

  uint64_t n = 0;

  while ((n >> shift) == 0) {
    if (read_index < h.num_digits) {
      n = (10 * n) + h.digits[read_index++];
    } else if (n == 0) {
      return;
    } else {
      while ((n >> shift) == 0) {
        n = 10 * n;
        read_index++;
      }
      break;
    }
  }
  h.decimal_point -= int32_t(read_index - 1);
  if (h.decimal_point < -decimal_point_range) { // it is zero
    h.num_digits = 0;
    h.decimal_point = 0;
    h.negative = false;
    h.truncated = false;
    return;
  }
  uint64_t mask = (uint64_t(1) << shift) - 1;
  while (read_index < h.num_digits) {
    uint8_t new_digit = uint8_t(n >> shift);
    n = (10 * (n & mask)) + h.digits[read_index++];
    h.digits[write_index++] = new_digit;
  }
  while (n > 0) {
    uint8_t new_digit = uint8_t(n >> shift);
    n = 10 * (n & mask);
    if (write_index < max_digits) {
      h.digits[write_index++] = new_digit;
    } else if (new_digit > 0) {
      h.truncated = true;
    }
  }
  h.num_digits = write_index;
  trim(h);
}

template <typename binary> adjusted_mantissa compute_float(decimal &d) {
  adjusted_mantissa answer;
  if (d.num_digits == 0) {
    // should be zero
    answer.power2 = 0;
    answer.mantissa = 0;
    return answer;
  }
  // At this point, going further, we can assume that d.num_digits > 0.
  // We want to guard against excessive decimal point values because
  // they can result in long running times. Indeed, we do
  // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22
  // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not
  // fine (runs for a long time).
  //
  if(d.decimal_point < -324) {
    // We have something smaller than 1e-324 which is always zero
    // in binary64 and binary32.
    // It should be zero.
    answer.power2 = 0;
    answer.mantissa = 0;
    return answer;
  } else if(d.decimal_point >= 310) {
    // We have something at least as large as 0.1e310 which is
    // always infinite.
    answer.power2 = binary::infinite_power();
    answer.mantissa = 0;
    return answer;
  }

  static const uint32_t max_shift = 60;
  static const uint32_t num_powers = 19;
  static const uint8_t powers[19] = {
      0,  3,  6,  9,  13, 16, 19, 23, 26, 29, //
      33, 36, 39, 43, 46, 49, 53, 56, 59,     //
  };
  int32_t exp2 = 0;
  while (d.decimal_point > 0) {
    uint32_t n = uint32_t(d.decimal_point);
    uint32_t shift = (n < num_powers) ? powers[n] : max_shift;
    decimal_right_shift(d, shift);
    if (d.decimal_point < -decimal_point_range) {
      // should be zero
      answer.power2 = 0;
      answer.mantissa = 0;
      return answer;
    }
    exp2 += int32_t(shift);
  }
  // We shift left toward [1/2 ... 1].
  while (d.decimal_point <= 0) {
    uint32_t shift;
    if (d.decimal_point == 0) {
      if (d.digits[0] >= 5) {
        break;
      }
      shift = (d.digits[0] < 2) ? 2 : 1;
    } else {
      uint32_t n = uint32_t(-d.decimal_point);
      shift = (n < num_powers) ? powers[n] : max_shift;
    }
    decimal_left_shift(d, shift);
    if (d.decimal_point > decimal_point_range) {
      // we want to get infinity:
      answer.power2 = 0xFF;
      answer.mantissa = 0;
      return answer;
    }
    exp2 -= int32_t(shift);
  }
  // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2].
  exp2--;
  constexpr int32_t minimum_exponent = binary::minimum_exponent();
  while ((minimum_exponent + 1) > exp2) {
    uint32_t n = uint32_t((minimum_exponent + 1) - exp2);
    if (n > max_shift) {
      n = max_shift;
    }
    decimal_right_shift(d, n);
    exp2 += int32_t(n);
  }
  if ((exp2 - minimum_exponent) >= binary::infinite_power()) {
    answer.power2 = binary::infinite_power();
    answer.mantissa = 0;
    return answer;
  }

  const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1;
  decimal_left_shift(d, mantissa_size_in_bits);

  uint64_t mantissa = round(d);
  // It is possible that we have an overflow, in which case we need
  // to shift back.
  if (mantissa >= (uint64_t(1) << mantissa_size_in_bits)) {
    decimal_right_shift(d, 1);
    exp2 += 1;
    mantissa = round(d);
    if ((exp2 - minimum_exponent) >= binary::infinite_power()) {
      answer.power2 = binary::infinite_power();
      answer.mantissa = 0;
      return answer;
    }
  }
  answer.power2 = exp2 - binary::minimum_exponent();
  if (mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) {
    answer.power2--;
  }
  answer.mantissa =
      mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1);
  return answer;
}

template <typename binary>
adjusted_mantissa parse_long_mantissa(const char *first) {
  decimal d = parse_decimal(first);
  return compute_float<binary>(d);
}

template <typename binary>
adjusted_mantissa parse_long_mantissa(const char *first, const char *end) {
  decimal d = parse_decimal(first, end);
  return compute_float<binary>(d);
}

double from_chars(const char *first) noexcept {
  bool negative = first[0] == '-';
  if (negative) {
    first++;
  }
  adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first);
  uint64_t word = am.mantissa;
  word |= uint64_t(am.power2)
          << binary_format<double>::mantissa_explicit_bits();
  word = negative ? word | (uint64_t(1) << binary_format<double>::sign_index())
                  : word;
  double value;
  std::memcpy(&value, &word, sizeof(double));
  return value;
}


double from_chars(const char *first, const char *end) noexcept {
  bool negative = first[0] == '-';
  if (negative) {
    first++;
  }
  adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first, end);
  uint64_t word = am.mantissa;
  word |= uint64_t(am.power2)
          << binary_format<double>::mantissa_explicit_bits();
  word = negative ? word | (uint64_t(1) << binary_format<double>::sign_index())
                  : word;
  double value;
  std::memcpy(&value, &word, sizeof(double));
  return value;
}

} // internal
} // simdjson

Coverage Report

Created: 2023-06-07 06:07

Line	Count	Source (jump to first uncovered line)
1		#include <limits>
2		namespace simdjson {
3		namespace internal {
4
5		/**
6		* The code in the internal::from_chars function is meant to handle the floating-point number parsing
7		* when we have more than 19 digits in the decimal mantissa. This should only be seen
8		* in adversarial scenarios: we do not expect production systems to even produce
9		* such floating-point numbers.
10		*
11		* The parser is based on work by Nigel Tao (at https://github.com/google/wuffs/)
12		* who credits Ken Thompson for the design (via a reference to the Go source
13		* code). See
14		* https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c
15		* https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c
16		* It is probably not very fast but it is a fallback that should almost never be
17		* called in real life. Google Wuffs is published under APL 2.0.
18		**/
19
20		namespace {
21		constexpr uint32_t max_digits = 768;
22		constexpr int32_t decimal_point_range = 2047;
23		} // namespace
24
25		struct adjusted_mantissa {
26		uint64_t mantissa;
27		int power2;
28	0	adjusted_mantissa() : mantissa(0), power2(0) {}
29		};
30
31		struct decimal {
32		uint32_t num_digits;
33		int32_t decimal_point;
34		bool negative;
35		bool truncated;
36		uint8_t digits[max_digits];
37		};
38
39		template <typename T> struct binary_format {
40		static constexpr int mantissa_explicit_bits();
41		static constexpr int minimum_exponent();
42		static constexpr int infinite_power();
43		static constexpr int sign_index();
44		};
45
46	0	template <> constexpr int binary_format<double>::mantissa_explicit_bits() {
47	0	return 52;
48	0	}
49
50	0	template <> constexpr int binary_format<double>::minimum_exponent() {
51	0	return -1023;
52	0	}
53	0	template <> constexpr int binary_format<double>::infinite_power() {
54	0	return 0x7FF;
55	0	}
56
57	0	template <> constexpr int binary_format<double>::sign_index() { return 63; }
58
59	0	bool is_integer(char c) noexcept { return (c >= '0' && c <= '9'); }
60
61		// This should always succeed since it follows a call to parse_number.
62	0	decimal parse_decimal(const char *&p) noexcept {
63	0	decimal answer;
64	0	answer.num_digits = 0;
65	0	answer.decimal_point = 0;
66	0	answer.truncated = false;
67	0	answer.negative = (*p == '-');
68	0	if ((p == '-') \|\| (p == '+')) {
69	0	++p;
70	0	}
71
72	0	while (*p == '0') {
73	0	++p;
74	0	}
75	0	while (is_integer(*p)) {
76	0	if (answer.num_digits < max_digits) {
77	0	answer.digits[answer.num_digits] = uint8_t(*p - '0');
78	0	}
79	0	answer.num_digits++;
80	0	++p;
81	0	}
82	0	if (*p == '.') {
83	0	++p;
84	0	const char *first_after_period = p;
85		// if we have not yet encountered a zero, we have to skip it as well
86	0	if (answer.num_digits == 0) {
87		// skip zeros
88	0	while (*p == '0') {
89	0	++p;
90	0	}
91	0	}
92	0	while (is_integer(*p)) {
93	0	if (answer.num_digits < max_digits) {
94	0	answer.digits[answer.num_digits] = uint8_t(*p - '0');
95	0	}
96	0	answer.num_digits++;
97	0	++p;
98	0	}
99	0	answer.decimal_point = int32_t(first_after_period - p);
100	0	}
101	0	if(answer.num_digits > 0) {
102	0	const char *preverse = p - 1;
103	0	int32_t trailing_zeros = 0;
104	0	while ((preverse == '0') \|\| (preverse == '.')) {
105	0	if(*preverse == '0') { trailing_zeros++; };
106	0	--preverse;
107	0	}
108	0	answer.decimal_point += int32_t(answer.num_digits);
109	0	answer.num_digits -= uint32_t(trailing_zeros);
110	0	}
111	0	if(answer.num_digits > max_digits ) {
112	0	answer.num_digits = max_digits;
113	0	answer.truncated = true;
114	0	}
115	0	if (('e' == p) \|\| ('E' == p)) {
116	0	++p;
117	0	bool neg_exp = false;
118	0	if ('-' == *p) {
119	0	neg_exp = true;
120	0	++p;
121	0	} else if ('+' == *p) {
122	0	++p;
123	0	}
124	0	int32_t exp_number = 0; // exponential part
125	0	while (is_integer(*p)) {
126	0	uint8_t digit = uint8_t(*p - '0');
127	0	if (exp_number < 0x10000) {
128	0	exp_number = 10 * exp_number + digit;
129	0	}
130	0	++p;
131	0	}
132	0	answer.decimal_point += (neg_exp ? -exp_number : exp_number);
133	0	}
134	0	return answer;
135	0	}
136
137		// This should always succeed since it follows a call to parse_number.
138		// Will not read at or beyond the "end" pointer.
139	0	decimal parse_decimal(const char &p, const char end) noexcept {
140	0	decimal answer;
141	0	answer.num_digits = 0;
142	0	answer.decimal_point = 0;
143	0	answer.truncated = false;
144	0	if(p == end) { return answer; } // should never happen
145	0	answer.negative = (*p == '-');
146	0	if ((p == '-') \|\| (p == '+')) {
147	0	++p;
148	0	}
149
150	0	while ((p != end) && (*p == '0')) {
151	0	++p;
152	0	}
153	0	while ((p != end) && is_integer(*p)) {
154	0	if (answer.num_digits < max_digits) {
155	0	answer.digits[answer.num_digits] = uint8_t(*p - '0');
156	0	}
157	0	answer.num_digits++;
158	0	++p;
159	0	}
160	0	if ((p != end) && (*p == '.')) {
161	0	++p;
162	0	if(p == end) { return answer; } // should never happen
163	0	const char *first_after_period = p;
164		// if we have not yet encountered a zero, we have to skip it as well
165	0	if (answer.num_digits == 0) {
166		// skip zeros
167	0	while (*p == '0') {
168	0	++p;
169	0	}
170	0	}
171	0	while ((p != end) && is_integer(*p)) {
172	0	if (answer.num_digits < max_digits) {
173	0	answer.digits[answer.num_digits] = uint8_t(*p - '0');
174	0	}
175	0	answer.num_digits++;
176	0	++p;
177	0	}
178	0	answer.decimal_point = int32_t(first_after_period - p);
179	0	}
180	0	if(answer.num_digits > 0) {
181	0	const char *preverse = p - 1;
182	0	int32_t trailing_zeros = 0;
183	0	while ((preverse == '0') \|\| (preverse == '.')) {
184	0	if(*preverse == '0') { trailing_zeros++; };
185	0	--preverse;
186	0	}
187	0	answer.decimal_point += int32_t(answer.num_digits);
188	0	answer.num_digits -= uint32_t(trailing_zeros);
189	0	}
190	0	if(answer.num_digits > max_digits ) {
191	0	answer.num_digits = max_digits;
192	0	answer.truncated = true;
193	0	}
194	0	if ((p != end) && (('e' == p) \|\| ('E' == p))) {
195	0	++p;
196	0	if(p == end) { return answer; } // should never happen
197	0	bool neg_exp = false;
198	0	if ('-' == *p) {
199	0	neg_exp = true;
200	0	++p;
201	0	} else if ('+' == *p) {
202	0	++p;
203	0	}
204	0	int32_t exp_number = 0; // exponential part
205	0	while ((p != end) && is_integer(*p)) {
206	0	uint8_t digit = uint8_t(*p - '0');
207	0	if (exp_number < 0x10000) {
208	0	exp_number = 10 * exp_number + digit;
209	0	}
210	0	++p;
211	0	}
212	0	answer.decimal_point += (neg_exp ? -exp_number : exp_number);
213	0	}
214	0	return answer;
215	0	}
216
217		namespace {
218
219		// remove all final zeroes
220	0	inline void trim(decimal &h) {
221	0	while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) {
222	0	h.num_digits--;
223	0	}
224	0	}
225
226	0	uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) {
227	0	shift &= 63;
228	0	const static uint16_t number_of_digits_decimal_left_shift_table[65] = {
229	0	0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817,
230	0	0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067,
231	0	0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF,
232	0	0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0,
233	0	0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA,
234	0	0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC,
235	0	0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C,
236	0	0x051C, 0x051C,
237	0	};
238	0	uint32_t x_a = number_of_digits_decimal_left_shift_table[shift];
239	0	uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1];
240	0	uint32_t num_new_digits = x_a >> 11;
241	0	uint32_t pow5_a = 0x7FF & x_a;
242	0	uint32_t pow5_b = 0x7FF & x_b;
243	0	const static uint8_t
244	0	number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = {
245	0	5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5,
246	0	3, 9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8,
247	0	2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2,
248	0	5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1,
249	0	5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5,
250	0	3, 8, 1, 4, 6, 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2,
251	0	8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3,
252	0	7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5,
253	0	6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6,
254	0	0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3,
255	0	8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7,
256	0	6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2,
257	0	5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8,
258	0	6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3,
259	0	2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1,
260	0	2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6,
261	0	4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3,
262	0	2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6,
263	0	6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3,
264	0	8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2, 5, 1, 4, 5,
265	0	5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, 6, 2, 5,
266	0	7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3,
267	0	1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6,
268	0	6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6,
269	0	4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7,
270	0	2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7,
271	0	3, 5, 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5,
272	0	2, 2, 7, 3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5,
273	0	9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0,
274	0	2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8,
275	0	8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5,
276	0	2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4,
277	0	9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2,
278	0	0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5,
279	0	4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7,
280	0	5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9,
281	0	2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5,
282	0	6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9,
283	0	4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3,
284	0	2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8,
285	0	9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2,
286	0	3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1,
287	0	3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1,
288	0	1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3,
289	0	1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2,
290	0	3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5, 4, 1,
291	0	0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, 3,
292	0	5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1,
293	0	3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3,
294	0	9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3,
295	0	9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6,
296	0	7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3,
297	0	6, 1, 4, 1, 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7,
298	0	6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9,
299	0	4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2,
300	0	5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9,
301	0	6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5,
302	0	};
303	0	const uint8_t *pow5 =
304	0	&number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a];
305	0	uint32_t i = 0;
306	0	uint32_t n = pow5_b - pow5_a;
307	0	for (; i < n; i++) {
308	0	if (i >= h.num_digits) {
309	0	return num_new_digits - 1;
310	0	} else if (h.digits[i] == pow5[i]) {
311	0	continue;
312	0	} else if (h.digits[i] < pow5[i]) {
313	0	return num_new_digits - 1;
314	0	} else {
315	0	return num_new_digits;
316	0	}
317	0	}
318	0	return num_new_digits;
319	0	}
320
321		} // end of anonymous namespace
322
323	0	uint64_t round(decimal &h) {
324	0	if ((h.num_digits == 0) \|\| (h.decimal_point < 0)) {
325	0	return 0;
326	0	} else if (h.decimal_point > 18) {
327	0	return UINT64_MAX;
328	0	}
329		// at this point, we know that h.decimal_point >= 0
330	0	uint32_t dp = uint32_t(h.decimal_point);
331	0	uint64_t n = 0;
332	0	for (uint32_t i = 0; i < dp; i++) {
333	0	n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0);
334	0	}
335	0	bool round_up = false;
336	0	if (dp < h.num_digits) {
337	0	round_up = h.digits[dp] >= 5; // normally, we round up
338		// but we may need to round to even!
339	0	if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) {
340	0	round_up = h.truncated \|\| ((dp > 0) && (1 & h.digits[dp - 1]));
341	0	}
342	0	}
343	0	if (round_up) {
344	0	n++;
345	0	}
346	0	return n;
347	0	}
348
349		// computes h * 2^-shift
350	0	void decimal_left_shift(decimal &h, uint32_t shift) {
351	0	if (h.num_digits == 0) {
352	0	return;
353	0	}
354	0	uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift);
355	0	int32_t read_index = int32_t(h.num_digits - 1);
356	0	uint32_t write_index = h.num_digits - 1 + num_new_digits;
357	0	uint64_t n = 0;
358
359	0	while (read_index >= 0) {
360	0	n += uint64_t(h.digits[read_index]) << shift;
361	0	uint64_t quotient = n / 10;
362	0	uint64_t remainder = n - (10 * quotient);
363	0	if (write_index < max_digits) {
364	0	h.digits[write_index] = uint8_t(remainder);
365	0	} else if (remainder > 0) {
366	0	h.truncated = true;
367	0	}
368	0	n = quotient;
369	0	write_index--;
370	0	read_index--;
371	0	}
372	0	while (n > 0) {
373	0	uint64_t quotient = n / 10;
374	0	uint64_t remainder = n - (10 * quotient);
375	0	if (write_index < max_digits) {
376	0	h.digits[write_index] = uint8_t(remainder);
377	0	} else if (remainder > 0) {
378	0	h.truncated = true;
379	0	}
380	0	n = quotient;
381	0	write_index--;
382	0	}
383	0	h.num_digits += num_new_digits;
384	0	if (h.num_digits > max_digits) {
385	0	h.num_digits = max_digits;
386	0	}
387	0	h.decimal_point += int32_t(num_new_digits);
388	0	trim(h);
389	0	}
390
391		// computes h * 2^shift
392	0	void decimal_right_shift(decimal &h, uint32_t shift) {
393	0	uint32_t read_index = 0;
394	0	uint32_t write_index = 0;
395
396	0	uint64_t n = 0;
397
398	0	while ((n >> shift) == 0) {
399	0	if (read_index < h.num_digits) {
400	0	n = (10 * n) + h.digits[read_index++];
401	0	} else if (n == 0) {
402	0	return;
403	0	} else {
404	0	while ((n >> shift) == 0) {
405	0	n = 10 * n;
406	0	read_index++;
407	0	}
408	0	break;
409	0	}
410	0	}
411	0	h.decimal_point -= int32_t(read_index - 1);
412	0	if (h.decimal_point < -decimal_point_range) { // it is zero
413	0	h.num_digits = 0;
414	0	h.decimal_point = 0;
415	0	h.negative = false;
416	0	h.truncated = false;
417	0	return;
418	0	}
419	0	uint64_t mask = (uint64_t(1) << shift) - 1;
420	0	while (read_index < h.num_digits) {
421	0	uint8_t new_digit = uint8_t(n >> shift);
422	0	n = (10 * (n & mask)) + h.digits[read_index++];
423	0	h.digits[write_index++] = new_digit;
424	0	}
425	0	while (n > 0) {
426	0	uint8_t new_digit = uint8_t(n >> shift);
427	0	n = 10 * (n & mask);
428	0	if (write_index < max_digits) {
429	0	h.digits[write_index++] = new_digit;
430	0	} else if (new_digit > 0) {
431	0	h.truncated = true;
432	0	}
433	0	}
434	0	h.num_digits = write_index;
435	0	trim(h);
436	0	}
437
438	0	template <typename binary> adjusted_mantissa compute_float(decimal &d) {
439	0	adjusted_mantissa answer;
440	0	if (d.num_digits == 0) {
441		// should be zero
442	0	answer.power2 = 0;
443	0	answer.mantissa = 0;
444	0	return answer;
445	0	}
446		// At this point, going further, we can assume that d.num_digits > 0.
447		// We want to guard against excessive decimal point values because
448		// they can result in long running times. Indeed, we do
449		// shifts by at most 60 bits. We have that log(10400)/log(260) ~= 22
450		// which is fine, but log(10299995)/log(260) ~= 16609 which is not
451		// fine (runs for a long time).
452		//
453	0	if(d.decimal_point < -324) {
454		// We have something smaller than 1e-324 which is always zero
455		// in binary64 and binary32.
456		// It should be zero.
457	0	answer.power2 = 0;
458	0	answer.mantissa = 0;
459	0	return answer;
460	0	} else if(d.decimal_point >= 310) {
461		// We have something at least as large as 0.1e310 which is
462		// always infinite.
463	0	answer.power2 = binary::infinite_power();
464	0	answer.mantissa = 0;
465	0	return answer;
466	0	}
467
468	0	static const uint32_t max_shift = 60;
469	0	static const uint32_t num_powers = 19;
470	0	static const uint8_t powers[19] = {
471	0	0, 3, 6, 9, 13, 16, 19, 23, 26, 29, //
472	0	33, 36, 39, 43, 46, 49, 53, 56, 59, //
473	0	};
474	0	int32_t exp2 = 0;
475	0	while (d.decimal_point > 0) {
476	0	uint32_t n = uint32_t(d.decimal_point);
477	0	uint32_t shift = (n < num_powers) ? powers[n] : max_shift;
478	0	decimal_right_shift(d, shift);
479	0	if (d.decimal_point < -decimal_point_range) {
480		// should be zero
481	0	answer.power2 = 0;
482	0	answer.mantissa = 0;
483	0	return answer;
484	0	}
485	0	exp2 += int32_t(shift);
486	0	}
487		// We shift left toward [1/2 ... 1].
488	0	while (d.decimal_point <= 0) {
489	0	uint32_t shift;
490	0	if (d.decimal_point == 0) {
491	0	if (d.digits[0] >= 5) {
492	0	break;
493	0	}
494	0	shift = (d.digits[0] < 2) ? 2 : 1;
495	0	} else {
496	0	uint32_t n = uint32_t(-d.decimal_point);
497	0	shift = (n < num_powers) ? powers[n] : max_shift;
498	0	}
499	0	decimal_left_shift(d, shift);
500	0	if (d.decimal_point > decimal_point_range) {
501		// we want to get infinity:
502	0	answer.power2 = 0xFF;
503	0	answer.mantissa = 0;
504	0	return answer;
505	0	}
506	0	exp2 -= int32_t(shift);
507	0	}
508		// We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2].
509	0	exp2--;
510	0	constexpr int32_t minimum_exponent = binary::minimum_exponent();
511	0	while ((minimum_exponent + 1) > exp2) {
512	0	uint32_t n = uint32_t((minimum_exponent + 1) - exp2);
513	0	if (n > max_shift) {
514	0	n = max_shift;
515	0	}
516	0	decimal_right_shift(d, n);
517	0	exp2 += int32_t(n);
518	0	}
519	0	if ((exp2 - minimum_exponent) >= binary::infinite_power()) {
520	0	answer.power2 = binary::infinite_power();
521	0	answer.mantissa = 0;
522	0	return answer;
523	0	}
524
525	0	const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1;
526	0	decimal_left_shift(d, mantissa_size_in_bits);
527
528	0	uint64_t mantissa = round(d);
529		// It is possible that we have an overflow, in which case we need
530		// to shift back.
531	0	if (mantissa >= (uint64_t(1) << mantissa_size_in_bits)) {
532	0	decimal_right_shift(d, 1);
533	0	exp2 += 1;
534	0	mantissa = round(d);
535	0	if ((exp2 - minimum_exponent) >= binary::infinite_power()) {
536	0	answer.power2 = binary::infinite_power();
537	0	answer.mantissa = 0;
538	0	return answer;
539	0	}
540	0	}
541	0	answer.power2 = exp2 - binary::minimum_exponent();
542	0	if (mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) {
543	0	answer.power2--;
544	0	}
545	0	answer.mantissa =
546	0	mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1);
547	0	return answer;
548	0	}
549
550		template <typename binary>
551	0	adjusted_mantissa parse_long_mantissa(const char *first) {
552	0	decimal d = parse_decimal(first);
553	0	return compute_float<binary>(d);
554	0	}
555
556		template <typename binary>
557	0	adjusted_mantissa parse_long_mantissa(const char first, const char end) {
558	0	decimal d = parse_decimal(first, end);
559	0	return compute_float<binary>(d);
560	0	}
561
562	0	double from_chars(const char *first) noexcept {
563	0	bool negative = first[0] == '-';
564	0	if (negative) {
565	0	first++;
566	0	}
567	0	adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first);
568	0	uint64_t word = am.mantissa;
569	0	word \|= uint64_t(am.power2)
570	0	<< binary_format<double>::mantissa_explicit_bits();
571	0	word = negative ? word \| (uint64_t(1) << binary_format<double>::sign_index())
572	0	: word;
573	0	double value;
574	0	std::memcpy(&value, &word, sizeof(double));
575	0	return value;
576	0	}
577
578
579	0	double from_chars(const char first, const char end) noexcept {
580	0	bool negative = first[0] == '-';
581	0	if (negative) {
582	0	first++;
583	0	}
584	0	adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first, end);
585	0	uint64_t word = am.mantissa;
586	0	word \|= uint64_t(am.power2)
587	0	<< binary_format<double>::mantissa_explicit_bits();
588	0	word = negative ? word \| (uint64_t(1) << binary_format<double>::sign_index())
589	0	: word;
590	0	double value;
591	0	std::memcpy(&value, &word, sizeof(double));
592	0	return value;
593	0	}
594
595		} // internal
596		} // simdjson