/src/boost/boost/json/detail/charconv/detail/fast_float/parse_number.hpp

Source (jump to first uncovered line)
// Copyright 2020-2023 Daniel Lemire
// Copyright 2023 Matt Borland
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
//
// Derivative of: https://github.com/fastfloat/fast_float

#ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_PARSE_NUMBER_HPP
#define BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_PARSE_NUMBER_HPP

#include <boost/json/detail/charconv/detail/fast_float/ascii_number.hpp>
#include <boost/json/detail/charconv/detail/fast_float/decimal_to_binary.hpp>
#include <boost/json/detail/charconv/detail/fast_float/digit_comparison.hpp>
#include <boost/json/detail/charconv/detail/fast_float/float_common.hpp>

#include <cmath>
#include <cstring>
#include <limits>
#include <system_error>

namespace boost { namespace json { namespace detail { namespace charconv { namespace detail { namespace fast_float {


namespace detail {
/**
 * Special case +inf, -inf, nan, infinity, -infinity.
 * The case comparisons could be made much faster given that we know that the
 * strings a null-free and fixed.
 **/

#if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#endif

template <typename T, typename UC>
from_chars_result_t<UC> BOOST_JSON_CXX14_CONSTEXPR
parse_infnan(UC const * first, UC const * last, T &value)  noexcept  {
  from_chars_result_t<UC> answer{};
  answer.ptr = first;
  answer.ec = std::errc(); // be optimistic
  bool minusSign = false;
  if (*first == UC('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here
      minusSign = true;
      ++first;
  }
  if (last - first >= 3) {
    if (fastfloat_strncasecmp(first, str_const_nan<UC>(), 3)) {
      answer.ptr = (first += 3);
      value = minusSign ? -std::numeric_limits<T>::quiet_NaN() : std::numeric_limits<T>::quiet_NaN();
      // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan).
      if(first != last && *first == UC('(')) {
        for(UC const * ptr = first + 1; ptr != last; ++ptr) {
          if (*ptr == UC(')')) {
            answer.ptr = ptr + 1; // valid nan(n-char-seq-opt)
            break;
          }
          else if(!((UC('a') <= *ptr && *ptr <= UC('z')) || (UC('A') <= *ptr && *ptr <= UC('Z')) || (UC('0') <= *ptr && *ptr <= UC('9')) || *ptr == UC('_')))
            break; // forbidden char, not nan(n-char-seq-opt)
        }
      }
      return answer;
    }
    if (fastfloat_strncasecmp(first, str_const_inf<UC>(), 3)) {
      if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, str_const_inf<UC>() + 3, 5)) {
        answer.ptr = first + 8;
      } else {
        answer.ptr = first + 3;
      }
      value = minusSign ? -std::numeric_limits<T>::infinity() : std::numeric_limits<T>::infinity();
      return answer;
    }
  }
  answer.ec = std::errc::invalid_argument;
  return answer;
}

#if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
# pragma GCC diagnostic pop
#endif

/**
 * Returns true if the floating-pointing rounding mode is to 'nearest'.
 * It is the default on most system. This function is meant to be inexpensive.
 * Credit : @mwalcott3
 */
BOOST_FORCEINLINE bool rounds_to_nearest() noexcept {
  // https://lemire.me/blog/2020/06/26/gcc-not-nearest/
#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
  return false;
#endif
  // See
  // A fast function to check your floating-point rounding mode
  // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/
  //
  // This function is meant to be equivalent to :
  // prior: #include <cfenv>
  //  return fegetround() == FE_TONEAREST;
  // However, it is expected to be much faster than the fegetround()
  // function call.
  //
  // The volatile keywoard prevents the compiler from computing the function
  // at compile-time.
  // There might be other ways to prevent compile-time optimizations (e.g., asm).
  // The value does not need to be std::numeric_limits<float>::min(), any small
  // value so that 1 + x should round to 1 would do (after accounting for excess
  // precision, as in 387 instructions).
  static volatile float fmin = (std::numeric_limits<float>::min)();
  float fmini = fmin; // we copy it so that it gets loaded at most once.
  //
  // Explanation:
  // Only when fegetround() == FE_TONEAREST do we have that
  // fmin + 1.0f == 1.0f - fmin.
  //
  // FE_UPWARD:
  //  fmin + 1.0f > 1
  //  1.0f - fmin == 1
  //
  // FE_DOWNWARD or  FE_TOWARDZERO:
  //  fmin + 1.0f == 1
  //  1.0f - fmin < 1
  //
  // Note: This may fail to be accurate if fast-math has been
  // enabled, as rounding conventions may not apply.
  #ifdef BOOST_JSON_FASTFLOAT_VISUAL_STUDIO
  #   pragma warning(push)
  //  todo: is there a VS warning?
  //  see https://stackoverflow.com/questions/46079446/is-there-a-warning-for-floating-point-equality-checking-in-visual-studio-2013
  #elif defined(__clang__)
  #   pragma clang diagnostic push
  #   pragma clang diagnostic ignored "-Wfloat-equal"
  #elif defined(__GNUC__)
  #   pragma GCC diagnostic push
  #   pragma GCC diagnostic ignored "-Wfloat-equal"
  #endif
  return (fmini + 1.0f == 1.0f - fmini);
  #ifdef BOOST_JSON_FASTFLOAT_VISUAL_STUDIO
  #   pragma warning(pop)
  #elif defined(__clang__)
  #   pragma clang diagnostic pop
  #elif defined(__GNUC__)
  #   pragma GCC diagnostic pop
  #endif
}

} // namespace detail

template<typename T, typename UC>
BOOST_JSON_FASTFLOAT_CONSTEXPR20
from_chars_result_t<UC> from_chars(UC const * first, UC const * last,
                             T &value, chars_format fmt /*= chars_format::general*/)  noexcept  {
  return from_chars_advanced(first, last, value, parse_options_t<UC>{fmt});
}

template<typename T, typename UC>
BOOST_JSON_FASTFLOAT_CONSTEXPR20
from_chars_result_t<UC> from_chars_advanced(UC const * first, UC const * last,
                                      T &value, parse_options_t<UC> options)  noexcept  {

  static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");
  static_assert (std::is_same<UC, char>::value ||
                 std::is_same<UC, wchar_t>::value ||
                 std::is_same<UC, char16_t>::value ||
                 std::is_same<UC, char32_t>::value , "only char, wchar_t, char16_t and char32_t are supported");

  from_chars_result_t<UC> answer;
  if (first == last) {
    answer.ec = std::errc::invalid_argument;
    answer.ptr = first;
    return answer;
  }
  parsed_number_string_t<UC> pns = parse_number_string<UC>(first, last, options);
  if (!pns.valid) {
    return detail::parse_infnan(first, last, value);
  }
  answer.ec = std::errc(); // be optimistic
  answer.ptr = pns.lastmatch;
  // The implementation of the Clinger's fast path is convoluted because
  // we want round-to-nearest in all cases, irrespective of the rounding mode
  // selected on the thread.
  // We proceed optimistically, assuming that detail::rounds_to_nearest() returns
  // true.
  if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && !pns.too_many_digits) {
    // Unfortunately, the conventional Clinger's fast path is only possible
    // when the system rounds to the nearest float.
    //
    // We expect the next branch to almost always be selected.
    // We could check it first (before the previous branch), but
    // there might be performance advantages at having the check
    // be last.
    if(!cpp20_and_in_constexpr() && detail::rounds_to_nearest())  {
      // We have that fegetround() == FE_TONEAREST.
      // Next is Clinger's fast path.
      if (pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
        value = T(pns.mantissa);
        if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
        else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); }
        if (pns.negative) { value = -value; }
        return answer;
      }
    } else {
      // We do not have that fegetround() == FE_TONEAREST.
      // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal
      if (pns.exponent >= 0 && pns.mantissa <=binary_format<T>::max_mantissa_fast_path(pns.exponent)) {
#if defined(__clang__)
        // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD
        if(pns.mantissa == 0) {
          value = pns.negative ? -0. : 0.;
          return answer;
        }
#endif
        value = T(pns.mantissa) * binary_format<T>::exact_power_of_ten(pns.exponent);
        if (pns.negative) { value = -value; }
        return answer;
      }
    }
  }
  adjusted_mantissa am = compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
  if(pns.too_many_digits && am.power2 >= 0) {
    if(am != compute_float<binary_format<T>>(pns.exponent, pns.mantissa + 1)) {
      am = compute_error<binary_format<T>>(pns.exponent, pns.mantissa);
    }
  }
  // If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
  // then we need to go the long way around again. This is very uncommon.
  if(am.power2 < 0) { am = digit_comp<T>(pns, am); }
  to_float(pns.negative, am, value);
  // Test for over/underflow.
  if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) || am.power2 == binary_format<T>::infinite_power()) {
    answer.ec = std::errc::result_out_of_range;
  }
  return answer;
}

}}}}}} // namespace fast_float

#endif

Coverage Report

Created: 2023-11-19 06:56

Line	Count	Source (jump to first uncovered line)
1		// Copyright 2020-2023 Daniel Lemire
2		// Copyright 2023 Matt Borland
3		// Distributed under the Boost Software License, Version 1.0.
4		// https://www.boost.org/LICENSE_1_0.txt
5		//
6		// Derivative of: https://github.com/fastfloat/fast_float
7
8		#ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_PARSE_NUMBER_HPP
9		#define BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_PARSE_NUMBER_HPP
10
11		#include <boost/json/detail/charconv/detail/fast_float/ascii_number.hpp>
12		#include <boost/json/detail/charconv/detail/fast_float/decimal_to_binary.hpp>
13		#include <boost/json/detail/charconv/detail/fast_float/digit_comparison.hpp>
14		#include <boost/json/detail/charconv/detail/fast_float/float_common.hpp>
15
16		#include <cmath>
17		#include <cstring>
18		#include <limits>
19		#include <system_error>
20
21		namespace boost { namespace json { namespace detail { namespace charconv { namespace detail { namespace fast_float {
22
23
24		namespace detail {
25		/**
26		* Special case +inf, -inf, nan, infinity, -infinity.
27		* The case comparisons could be made much faster given that we know that the
28		* strings a null-free and fixed.
29		**/
30
31		#if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
32		# pragma GCC diagnostic push
33		# pragma GCC diagnostic ignored "-Wmissing-field-initializers"
34		#endif
35
36		template <typename T, typename UC>
37		from_chars_result_t<UC> BOOST_JSON_CXX14_CONSTEXPR
38	0	parse_infnan(UC const * first, UC const * last, T &value) noexcept {
39	0	from_chars_result_t<UC> answer{};
40	0	answer.ptr = first;
41	0	answer.ec = std::errc(); // be optimistic
42	0	bool minusSign = false;
43	0	if (*first == UC('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here
44	0	minusSign = true;
45	0	++first;
46	0	}
47	0	if (last - first >= 3) {
48	0	if (fastfloat_strncasecmp(first, str_const_nan<UC>(), 3)) {
49	0	answer.ptr = (first += 3);
50	0	value = minusSign ? -std::numeric_limits<T>::quiet_NaN() : std::numeric_limits<T>::quiet_NaN();
51		// Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan).
52	0	if(first != last && *first == UC('(')) {
53	0	for(UC const * ptr = first + 1; ptr != last; ++ptr) {
54	0	if (*ptr == UC(')')) {
55	0	answer.ptr = ptr + 1; // valid nan(n-char-seq-opt)
56	0	break;
57	0	}
58	0	else if(!((UC('a') <= ptr && ptr <= UC('z')) \|\| (UC('A') <= ptr && ptr <= UC('Z')) \|\| (UC('0') <= ptr && ptr <= UC('9')) \|\| *ptr == UC('_')))
59	0	break; // forbidden char, not nan(n-char-seq-opt)
60	0	}
61	0	}
62	0	return answer;
63	0	}
64	0	if (fastfloat_strncasecmp(first, str_const_inf<UC>(), 3)) {
65	0	if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, str_const_inf<UC>() + 3, 5)) {
66	0	answer.ptr = first + 8;
67	0	} else {
68	0	answer.ptr = first + 3;
69	0	}
70	0	value = minusSign ? -std::numeric_limits<T>::infinity() : std::numeric_limits<T>::infinity();
71	0	return answer;
72	0	}
73	0	}
74	0	answer.ec = std::errc::invalid_argument;
75	0	return answer;
76	0	}
77
78		#if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
79		# pragma GCC diagnostic pop
80		#endif
81
82		/**
83		* Returns true if the floating-pointing rounding mode is to 'nearest'.
84		* It is the default on most system. This function is meant to be inexpensive.
85		* Credit : @mwalcott3
86		*/
87	0	BOOST_FORCEINLINE bool rounds_to_nearest() noexcept {
88		// https://lemire.me/blog/2020/06/26/gcc-not-nearest/
89		#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
90		return false;
91		#endif
92		// See
93		// A fast function to check your floating-point rounding mode
94		// https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/
95		//
96		// This function is meant to be equivalent to :
97		// prior: #include <cfenv>
98		// return fegetround() == FE_TONEAREST;
99		// However, it is expected to be much faster than the fegetround()
100		// function call.
101		//
102		// The volatile keywoard prevents the compiler from computing the function
103		// at compile-time.
104		// There might be other ways to prevent compile-time optimizations (e.g., asm).
105		// The value does not need to be std::numeric_limits<float>::min(), any small
106		// value so that 1 + x should round to 1 would do (after accounting for excess
107		// precision, as in 387 instructions).
108	0	static volatile float fmin = (std::numeric_limits<float>::min)();
109	0	float fmini = fmin; // we copy it so that it gets loaded at most once.
110		//
111		// Explanation:
112		// Only when fegetround() == FE_TONEAREST do we have that
113		// fmin + 1.0f == 1.0f - fmin.
114		//
115		// FE_UPWARD:
116		// fmin + 1.0f > 1
117		// 1.0f - fmin == 1
118		//
119		// FE_DOWNWARD or FE_TOWARDZERO:
120		// fmin + 1.0f == 1
121		// 1.0f - fmin < 1
122		//
123		// Note: This may fail to be accurate if fast-math has been
124		// enabled, as rounding conventions may not apply.
125		#ifdef BOOST_JSON_FASTFLOAT_VISUAL_STUDIO
126		# pragma warning(push)
127		// todo: is there a VS warning?
128		// see https://stackoverflow.com/questions/46079446/is-there-a-warning-for-floating-point-equality-checking-in-visual-studio-2013
129		#elif defined(__clang__)
130	0	# pragma clang diagnostic push
131	0	# pragma clang diagnostic ignored "-Wfloat-equal"
132		#elif defined(__GNUC__)
133		# pragma GCC diagnostic push
134		# pragma GCC diagnostic ignored "-Wfloat-equal"
135		#endif
136	0	return (fmini + 1.0f == 1.0f - fmini);
137		#ifdef BOOST_JSON_FASTFLOAT_VISUAL_STUDIO
138		# pragma warning(pop)
139		#elif defined(__clang__)
140	0	# pragma clang diagnostic pop
141		#elif defined(__GNUC__)
142		# pragma GCC diagnostic pop
143		#endif
144	0	}
145
146		} // namespace detail
147
148		template<typename T, typename UC>
149		BOOST_JSON_FASTFLOAT_CONSTEXPR20
150		from_chars_result_t<UC> from_chars(UC const * first, UC const * last,
151	0	T &value, chars_format fmt /= chars_format::general/) noexcept {
152	0	return from_chars_advanced(first, last, value, parse_options_t<UC>{fmt});
153	0	}
154
155		template<typename T, typename UC>
156		BOOST_JSON_FASTFLOAT_CONSTEXPR20
157		from_chars_result_t<UC> from_chars_advanced(UC const * first, UC const * last,
158	0	T &value, parse_options_t<UC> options) noexcept {
159
160	0	static_assert (std::is_same<T, double>::value \|\| std::is_same<T, float>::value, "only float and double are supported");
161	0	static_assert (std::is_same<UC, char>::value \|\|
162	0	std::is_same<UC, wchar_t>::value \|\|
163	0	std::is_same<UC, char16_t>::value \|\|
164	0	std::is_same<UC, char32_t>::value , "only char, wchar_t, char16_t and char32_t are supported");
165
166	0	from_chars_result_t<UC> answer;
167	0	if (first == last) {
168	0	answer.ec = std::errc::invalid_argument;
169	0	answer.ptr = first;
170	0	return answer;
171	0	}
172	0	parsed_number_string_t<UC> pns = parse_number_string<UC>(first, last, options);
173	0	if (!pns.valid) {
174	0	return detail::parse_infnan(first, last, value);
175	0	}
176	0	answer.ec = std::errc(); // be optimistic
177	0	answer.ptr = pns.lastmatch;
178		// The implementation of the Clinger's fast path is convoluted because
179		// we want round-to-nearest in all cases, irrespective of the rounding mode
180		// selected on the thread.
181		// We proceed optimistically, assuming that detail::rounds_to_nearest() returns
182		// true.
183	0	if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && !pns.too_many_digits) {
184		// Unfortunately, the conventional Clinger's fast path is only possible
185		// when the system rounds to the nearest float.
186		//
187		// We expect the next branch to almost always be selected.
188		// We could check it first (before the previous branch), but
189		// there might be performance advantages at having the check
190		// be last.
191	0	if(!cpp20_and_in_constexpr() && detail::rounds_to_nearest()) {
192		// We have that fegetround() == FE_TONEAREST.
193		// Next is Clinger's fast path.
194	0	if (pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
195	0	value = T(pns.mantissa);
196	0	if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
197	0	else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); }
198	0	if (pns.negative) { value = -value; }
199	0	return answer;
200	0	}
201	0	} else {
202		// We do not have that fegetround() == FE_TONEAREST.
203		// Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal
204	0	if (pns.exponent >= 0 && pns.mantissa <=binary_format<T>::max_mantissa_fast_path(pns.exponent)) {
205	0	#if defined(__clang__)
206		// Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD
207	0	if(pns.mantissa == 0) {
208	0	value = pns.negative ? -0. : 0.;
209	0	return answer;
210	0	}
211	0	#endif
212	0	value = T(pns.mantissa) * binary_format<T>::exact_power_of_ten(pns.exponent);
213	0	if (pns.negative) { value = -value; }
214	0	return answer;
215	0	}
216	0	}
217	0	}
218	0	adjusted_mantissa am = compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
219	0	if(pns.too_many_digits && am.power2 >= 0) {
220	0	if(am != compute_float<binary_format<T>>(pns.exponent, pns.mantissa + 1)) {
221	0	am = compute_error<binary_format<T>>(pns.exponent, pns.mantissa);
222	0	}
223	0	}
224		// If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
225		// then we need to go the long way around again. This is very uncommon.
226	0	if(am.power2 < 0) { am = digit_comp<T>(pns, am); }
227	0	to_float(pns.negative, am, value);
228		// Test for over/underflow.
229	0	if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) \|\| am.power2 == binary_format<T>::infinite_power()) {
230	0	answer.ec = std::errc::result_out_of_range;
231	0	}
232	0	return answer;
233	0	}
234
235		}}}}}} // namespace fast_float
236
237		#endif