Coverage Report

Created: 2025-10-12 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/abseil-cpp/absl/strings/internal/charconv_parse.cc
Line
Count
Source
1
// Copyright 2018 The Abseil Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "absl/strings/internal/charconv_parse.h"
16
#include "absl/strings/charconv.h"
17
18
#include <cassert>
19
#include <cstdint>
20
#include <limits>
21
22
#include "absl/strings/internal/memutil.h"
23
24
namespace absl {
25
ABSL_NAMESPACE_BEGIN
26
namespace {
27
28
// ParseFloat<10> will read the first 19 significant digits of the mantissa.
29
// This number was chosen for multiple reasons.
30
//
31
// (a) First, for whatever integer type we choose to represent the mantissa, we
32
// want to choose the largest possible number of decimal digits for that integer
33
// type.  We are using uint64_t, which can express any 19-digit unsigned
34
// integer.
35
//
36
// (b) Second, we need to parse enough digits that the binary value of any
37
// mantissa we capture has more bits of resolution than the mantissa
38
// representation in the target float.  Our algorithm requires at least 3 bits
39
// of headway, but 19 decimal digits give a little more than that.
40
//
41
// The following static assertions verify the above comments:
42
constexpr int kDecimalMantissaDigitsMax = 19;
43
44
static_assert(std::numeric_limits<uint64_t>::digits10 ==
45
                  kDecimalMantissaDigitsMax,
46
              "(a) above");
47
48
// IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa.
49
static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed");
50
static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact");
51
static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact");
52
53
// The lowest valued 19-digit decimal mantissa we can read still contains
54
// sufficient information to reconstruct a binary mantissa.
55
static_assert(1000000000000000000u > (uint64_t{1} << (53 + 3)), "(b) above");
56
57
// ParseFloat<16> will read the first 15 significant digits of the mantissa.
58
//
59
// Because a base-16-to-base-2 conversion can be done exactly, we do not need
60
// to maximize the number of scanned hex digits to improve our conversion.  What
61
// is required is to scan two more bits than the mantissa can represent, so that
62
// we always round correctly.
63
//
64
// (One extra bit does not suffice to perform correct rounding, since a number
65
// exactly halfway between two representable floats has unique rounding rules,
66
// so we need to differentiate between a "halfway between" number and a "closer
67
// to the larger value" number.)
68
constexpr int kHexadecimalMantissaDigitsMax = 15;
69
70
// The minimum number of significant bits that will be read from
71
// kHexadecimalMantissaDigitsMax hex digits.  We must subtract by three, since
72
// the most significant digit can be a "1", which only contributes a single
73
// significant bit.
74
constexpr int kGuaranteedHexadecimalMantissaBitPrecision =
75
    4 * kHexadecimalMantissaDigitsMax - 3;
76
77
static_assert(kGuaranteedHexadecimalMantissaBitPrecision >
78
                  std::numeric_limits<double>::digits + 2,
79
              "kHexadecimalMantissaDigitsMax too small");
80
81
// We also impose a limit on the number of significant digits we will read from
82
// an exponent, to avoid having to deal with integer overflow.  We use 9 for
83
// this purpose.
84
//
85
// If we read a 9 digit exponent, the end result of the conversion will
86
// necessarily be infinity or zero, depending on the sign of the exponent.
87
// Therefore we can just drop extra digits on the floor without any extra
88
// logic.
89
constexpr int kDecimalExponentDigitsMax = 9;
90
static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax,
91
              "int type too small");
92
93
// To avoid incredibly large inputs causing integer overflow for our exponent,
94
// we impose an arbitrary but very large limit on the number of significant
95
// digits we will accept.  The implementation refuses to match a string with
96
// more consecutive significant mantissa digits than this.
97
constexpr int kDecimalDigitLimit = 50000000;
98
99
// Corresponding limit for hexadecimal digit inputs.  This is one fourth the
100
// amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires
101
// a binary exponent adjustment of 4.
102
constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4;
103
104
// The largest exponent we can read is 999999999 (per
105
// kDecimalExponentDigitsMax), and the largest exponent adjustment we can get
106
// from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these
107
// comfortably fits in an integer.
108
//
109
// We count kDecimalDigitLimit twice because there are independent limits for
110
// numbers before and after the decimal point.  (In the case where there are no
111
// significant digits before the decimal point, there are independent limits for
112
// post-decimal-point leading zeroes and for significant digits.)
113
static_assert(999999999 + 2 * kDecimalDigitLimit <
114
                  std::numeric_limits<int>::max(),
115
              "int type too small");
116
static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) <
117
                  std::numeric_limits<int>::max(),
118
              "int type too small");
119
120
// Returns true if the provided bitfield allows parsing an exponent value
121
// (e.g., "1.5e100").
122
9.93M
bool AllowExponent(chars_format flags) {
123
9.93M
  bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
124
9.93M
  bool scientific =
125
9.93M
      (flags & chars_format::scientific) == chars_format::scientific;
126
9.93M
  return scientific || !fixed;
127
9.93M
}
128
129
// Returns true if the provided bitfield requires an exponent value be present.
130
8.03M
bool RequireExponent(chars_format flags) {
131
8.03M
  bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
132
8.03M
  bool scientific =
133
8.03M
      (flags & chars_format::scientific) == chars_format::scientific;
134
8.03M
  return scientific && !fixed;
135
8.03M
}
136
137
const int8_t kAsciiToInt[256] = {
138
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
139
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
140
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0,  1,  2,  3,  4,  5,  6,  7,  8,
141
    9,  -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1,
142
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
143
    -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
144
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
145
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
146
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
147
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
148
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
149
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
150
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
151
    -1, -1, -1, -1, -1, -1, -1, -1, -1};
152
153
// Returns true if `ch` is a digit in the given base
154
template <int base>
155
bool IsDigit(char ch);
156
157
// Converts a valid `ch` to its digit value in the given base.
158
template <int base>
159
unsigned ToDigit(char ch);
160
161
// Returns true if `ch` is the exponent delimiter for the given base.
162
template <int base>
163
bool IsExponentCharacter(char ch);
164
165
// Returns the maximum number of significant digits we will read for a float
166
// in the given base.
167
template <int base>
168
constexpr int MantissaDigitsMax();
169
170
// Returns the largest consecutive run of digits we will accept when parsing a
171
// number in the given base.
172
template <int base>
173
constexpr int DigitLimit();
174
175
// Returns the amount the exponent must be adjusted by for each dropped digit.
176
// (For decimal this is 1, since the digits are in base 10 and the exponent base
177
// is also 10, but for hexadecimal this is 4, since the digits are base 16 but
178
// the exponent base is 2.)
179
template <int base>
180
constexpr int DigitMagnitude();
181
182
template <>
183
35.5M
bool IsDigit<10>(char ch) {
184
35.5M
  return ch >= '0' && ch <= '9';
185
35.5M
}
186
template <>
187
9.54M
bool IsDigit<16>(char ch) {
188
9.54M
  return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0;
189
9.54M
}
190
191
template <>
192
21.2M
unsigned ToDigit<10>(char ch) {
193
21.2M
  return static_cast<unsigned>(ch - '0');
194
21.2M
}
195
template <>
196
71.9k
unsigned ToDigit<16>(char ch) {
197
71.9k
  return static_cast<unsigned>(kAsciiToInt[static_cast<unsigned char>(ch)]);
198
71.9k
}
199
200
template <>
201
1.89M
bool IsExponentCharacter<10>(char ch) {
202
1.89M
  return ch == 'e' || ch == 'E';
203
1.89M
}
204
205
template <>
206
4.25k
bool IsExponentCharacter<16>(char ch) {
207
4.25k
  return ch == 'p' || ch == 'P';
208
4.25k
}
209
210
template <>
211
29.7M
constexpr int MantissaDigitsMax<10>() {
212
29.7M
  return kDecimalMantissaDigitsMax;
213
29.7M
}
214
template <>
215
34.8k
constexpr int MantissaDigitsMax<16>() {
216
34.8k
  return kHexadecimalMantissaDigitsMax;
217
34.8k
}
218
219
template <>
220
10.0M
constexpr int DigitLimit<10>() {
221
10.0M
  return kDecimalDigitLimit;
222
10.0M
}
223
template <>
224
18.2k
constexpr int DigitLimit<16>() {
225
18.2k
  return kHexadecimalDigitLimit;
226
18.2k
}
227
228
template <>
229
9.75M
constexpr int DigitMagnitude<10>() {
230
9.75M
  return 1;
231
9.75M
}
232
template <>
233
9.97k
constexpr int DigitMagnitude<16>() {
234
9.97k
  return 4;
235
9.97k
}
236
237
// Reads decimal digits from [begin, end) into *out.  Returns the number of
238
// digits consumed.
239
//
240
// After max_digits has been read, keeps consuming characters, but no longer
241
// adjusts *out.  If a nonzero digit is dropped this way, *dropped_nonzero_digit
242
// is set; otherwise, it is left unmodified.
243
//
244
// If no digits are matched, returns 0 and leaves *out unchanged.
245
//
246
// ConsumeDigits does not protect against overflow on *out; max_digits must
247
// be chosen with respect to type T to avoid the possibility of overflow.
248
template <int base, typename T>
249
int ConsumeDigits(const char* begin, const char* end, int max_digits, T* out,
250
11.9M
                  bool* dropped_nonzero_digit) {
251
11.9M
  if (base == 10) {
252
11.8M
    assert(max_digits <= std::numeric_limits<T>::digits10);
253
11.8M
  } else if (base == 16) {
254
16.4k
    assert(max_digits * 4 <= std::numeric_limits<T>::digits);
255
16.4k
  }
256
11.9M
  const char* const original_begin = begin;
257
258
  // Skip leading zeros, but only if *out is zero.
259
  // They don't cause an overflow so we don't have to count them for
260
  // `max_digits`.
261
12.0M
  while (!*out && end != begin && *begin == '0') ++begin;
262
263
11.9M
  T accumulator = *out;
264
11.9M
  const char* significant_digits_end =
265
11.9M
      (end - begin > max_digits) ? begin + max_digits : end;
266
33.2M
  while (begin < significant_digits_end && IsDigit<base>(*begin)) {
267
    // Do not guard against *out overflow; max_digits was chosen to avoid this.
268
    // Do assert against it, to detect problems in debug builds.
269
21.3M
    auto digit = static_cast<T>(ToDigit<base>(*begin));
270
21.3M
    assert(accumulator * base >= accumulator);
271
21.3M
    accumulator *= base;
272
21.3M
    assert(accumulator + digit >= accumulator);
273
21.3M
    accumulator += digit;
274
21.3M
    ++begin;
275
21.3M
  }
276
11.9M
  bool dropped_nonzero = false;
277
31.7M
  while (begin < end && IsDigit<base>(*begin)) {
278
19.8M
    dropped_nonzero = dropped_nonzero || (*begin != '0');
279
19.8M
    ++begin;
280
19.8M
  }
281
11.9M
  if (dropped_nonzero && dropped_nonzero_digit != nullptr) {
282
31.5k
    *dropped_nonzero_digit = true;
283
31.5k
  }
284
11.9M
  *out = accumulator;
285
11.9M
  return static_cast<int>(begin - original_begin);
286
11.9M
}
charconv_parse.cc:int absl::(anonymous namespace)::ConsumeDigits<10, unsigned long>(char const*, char const*, int, unsigned long*, bool*)
Line
Count
Source
250
9.98M
                  bool* dropped_nonzero_digit) {
251
9.98M
  if (base == 10) {
252
9.98M
    assert(max_digits <= std::numeric_limits<T>::digits10);
253
9.98M
  } else if (base == 16) {
254
0
    assert(max_digits * 4 <= std::numeric_limits<T>::digits);
255
0
  }
256
9.98M
  const char* const original_begin = begin;
257
258
  // Skip leading zeros, but only if *out is zero.
259
  // They don't cause an overflow so we don't have to count them for
260
  // `max_digits`.
261
9.98M
  while (!*out && end != begin && *begin == '0') ++begin;
262
263
9.98M
  T accumulator = *out;
264
9.98M
  const char* significant_digits_end =
265
9.98M
      (end - begin > max_digits) ? begin + max_digits : end;
266
26.7M
  while (begin < significant_digits_end && IsDigit<base>(*begin)) {
267
    // Do not guard against *out overflow; max_digits was chosen to avoid this.
268
    // Do assert against it, to detect problems in debug builds.
269
16.7M
    auto digit = static_cast<T>(ToDigit<base>(*begin));
270
16.7M
    assert(accumulator * base >= accumulator);
271
16.7M
    accumulator *= base;
272
16.7M
    assert(accumulator + digit >= accumulator);
273
16.7M
    accumulator += digit;
274
16.7M
    ++begin;
275
16.7M
  }
276
9.98M
  bool dropped_nonzero = false;
277
20.1M
  while (begin < end && IsDigit<base>(*begin)) {
278
10.1M
    dropped_nonzero = dropped_nonzero || (*begin != '0');
279
10.1M
    ++begin;
280
10.1M
  }
281
9.98M
  if (dropped_nonzero && dropped_nonzero_digit != nullptr) {
282
29.4k
    *dropped_nonzero_digit = true;
283
29.4k
  }
284
9.98M
  *out = accumulator;
285
9.98M
  return static_cast<int>(begin - original_begin);
286
9.98M
}
charconv_parse.cc:int absl::(anonymous namespace)::ConsumeDigits<10, int>(char const*, char const*, int, int*, bool*)
Line
Count
Source
250
1.90M
                  bool* dropped_nonzero_digit) {
251
1.90M
  if (base == 10) {
252
1.90M
    assert(max_digits <= std::numeric_limits<T>::digits10);
253
1.90M
  } else if (base == 16) {
254
0
    assert(max_digits * 4 <= std::numeric_limits<T>::digits);
255
0
  }
256
1.90M
  const char* const original_begin = begin;
257
258
  // Skip leading zeros, but only if *out is zero.
259
  // They don't cause an overflow so we don't have to count them for
260
  // `max_digits`.
261
2.00M
  while (!*out && end != begin && *begin == '0') ++begin;
262
263
1.90M
  T accumulator = *out;
264
1.90M
  const char* significant_digits_end =
265
1.90M
      (end - begin > max_digits) ? begin + max_digits : end;
266
6.45M
  while (begin < significant_digits_end && IsDigit<base>(*begin)) {
267
    // Do not guard against *out overflow; max_digits was chosen to avoid this.
268
    // Do assert against it, to detect problems in debug builds.
269
4.55M
    auto digit = static_cast<T>(ToDigit<base>(*begin));
270
4.55M
    assert(accumulator * base >= accumulator);
271
4.55M
    accumulator *= base;
272
4.55M
    assert(accumulator + digit >= accumulator);
273
4.55M
    accumulator += digit;
274
4.55M
    ++begin;
275
4.55M
  }
276
1.90M
  bool dropped_nonzero = false;
277
2.12M
  while (begin < end && IsDigit<base>(*begin)) {
278
227k
    dropped_nonzero = dropped_nonzero || (*begin != '0');
279
227k
    ++begin;
280
227k
  }
281
1.90M
  if (dropped_nonzero && dropped_nonzero_digit != nullptr) {
282
0
    *dropped_nonzero_digit = true;
283
0
  }
284
1.90M
  *out = accumulator;
285
1.90M
  return static_cast<int>(begin - original_begin);
286
1.90M
}
charconv_parse.cc:int absl::(anonymous namespace)::ConsumeDigits<16, unsigned long>(char const*, char const*, int, unsigned long*, bool*)
Line
Count
Source
250
16.4k
                  bool* dropped_nonzero_digit) {
251
16.4k
  if (base == 10) {
252
0
    assert(max_digits <= std::numeric_limits<T>::digits10);
253
16.4k
  } else if (base == 16) {
254
16.4k
    assert(max_digits * 4 <= std::numeric_limits<T>::digits);
255
16.4k
  }
256
16.4k
  const char* const original_begin = begin;
257
258
  // Skip leading zeros, but only if *out is zero.
259
  // They don't cause an overflow so we don't have to count them for
260
  // `max_digits`.
261
16.4k
  while (!*out && end != begin && *begin == '0') ++begin;
262
263
16.4k
  T accumulator = *out;
264
16.4k
  const char* significant_digits_end =
265
16.4k
      (end - begin > max_digits) ? begin + max_digits : end;
266
88.4k
  while (begin < significant_digits_end && IsDigit<base>(*begin)) {
267
    // Do not guard against *out overflow; max_digits was chosen to avoid this.
268
    // Do assert against it, to detect problems in debug builds.
269
71.9k
    auto digit = static_cast<T>(ToDigit<base>(*begin));
270
71.9k
    assert(accumulator * base >= accumulator);
271
71.9k
    accumulator *= base;
272
71.9k
    assert(accumulator + digit >= accumulator);
273
71.9k
    accumulator += digit;
274
71.9k
    ++begin;
275
71.9k
  }
276
16.4k
  bool dropped_nonzero = false;
277
9.47M
  while (begin < end && IsDigit<base>(*begin)) {
278
9.45M
    dropped_nonzero = dropped_nonzero || (*begin != '0');
279
9.45M
    ++begin;
280
9.45M
  }
281
16.4k
  if (dropped_nonzero && dropped_nonzero_digit != nullptr) {
282
2.09k
    *dropped_nonzero_digit = true;
283
2.09k
  }
284
16.4k
  *out = accumulator;
285
16.4k
  return static_cast<int>(begin - original_begin);
286
16.4k
}
287
288
// Returns true if `v` is one of the chars allowed inside parentheses following
289
// a NaN.
290
257k
bool IsNanChar(char v) {
291
257k
  return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') ||
292
208k
         (v >= 'A' && v <= 'Z');
293
257k
}
294
295
// Checks the range [begin, end) for a strtod()-formatted infinity or NaN.  If
296
// one is found, sets `out` appropriately and returns true.
297
bool ParseInfinityOrNan(const char* begin, const char* end,
298
9.93M
                        strings_internal::ParsedFloat* out) {
299
9.93M
  if (end - begin < 3) {
300
6.45M
    return false;
301
6.45M
  }
302
3.48M
  switch (*begin) {
303
10
    case 'i':
304
218
    case 'I': {
305
      // An infinity string consists of the characters "inf" or "infinity",
306
      // case insensitive.
307
218
      if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) {
308
19
        return false;
309
19
      }
310
199
      out->type = strings_internal::FloatType::kInfinity;
311
199
      if (end - begin >= 8 &&
312
21
          strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) {
313
0
        out->end = begin + 8;
314
199
      } else {
315
199
        out->end = begin + 3;
316
199
      }
317
199
      return true;
318
218
    }
319
2.85k
    case 'n':
320
2.96k
    case 'N': {
321
      // A NaN consists of the characters "nan", case insensitive, optionally
322
      // followed by a parenthesized sequence of zero or more alphanumeric
323
      // characters and/or underscores.
324
2.96k
      if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) {
325
27
        return false;
326
27
      }
327
2.93k
      out->type = strings_internal::FloatType::kNan;
328
2.93k
      out->end = begin + 3;
329
      // NaN is allowed to be followed by a parenthesized string, consisting of
330
      // only the characters [a-zA-Z0-9_].  Match that if it's present.
331
2.93k
      begin += 3;
332
2.93k
      if (begin < end && *begin == '(') {
333
2.52k
        const char* nan_begin = begin + 1;
334
257k
        while (nan_begin < end && IsNanChar(*nan_begin)) {
335
255k
          ++nan_begin;
336
255k
        }
337
2.52k
        if (nan_begin < end && *nan_begin == ')') {
338
          // We found an extra NaN specifier range
339
2.46k
          out->subrange_begin = begin + 1;
340
2.46k
          out->subrange_end = nan_begin;
341
2.46k
          out->end = nan_begin + 1;
342
2.46k
        }
343
2.52k
      }
344
2.93k
      return true;
345
2.96k
    }
346
3.48M
    default:
347
3.48M
      return false;
348
3.48M
  }
349
3.48M
}
350
}  // namespace
351
352
namespace strings_internal {
353
354
template <int base>
355
strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end,
356
9.93M
                                         chars_format format_flags) {
357
9.93M
  strings_internal::ParsedFloat result;
358
359
  // Exit early if we're given an empty range.
360
9.93M
  if (begin == end) return result;
361
362
  // Handle the infinity and NaN cases.
363
9.93M
  if (ParseInfinityOrNan(begin, end, &result)) {
364
3.13k
    return result;
365
3.13k
  }
366
367
9.93M
  const char* const mantissa_begin = begin;
368
10.3M
  while (begin < end && *begin == '0') {
369
413k
    ++begin;  // skip leading zeros
370
413k
  }
371
9.93M
  uint64_t mantissa = 0;
372
373
9.93M
  int exponent_adjustment = 0;
374
9.93M
  bool mantissa_is_inexact = false;
375
9.93M
  int pre_decimal_digits = ConsumeDigits<base>(
376
9.93M
      begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact);
377
9.93M
  begin += pre_decimal_digits;
378
9.93M
  int digits_left;
379
9.93M
  if (pre_decimal_digits >= DigitLimit<base>()) {
380
    // refuse to parse pathological inputs
381
0
    return result;
382
9.93M
  } else if (pre_decimal_digits > MantissaDigitsMax<base>()) {
383
    // We dropped some non-fraction digits on the floor.  Adjust our exponent
384
    // to compensate.
385
14.9k
    exponent_adjustment =
386
14.9k
        static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>());
387
14.9k
    digits_left = 0;
388
9.92M
  } else {
389
9.92M
    digits_left =
390
9.92M
        static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits);
391
9.92M
  }
392
9.93M
  if (begin < end && *begin == '.') {
393
68.1k
    ++begin;
394
68.1k
    if (mantissa == 0) {
395
      // If we haven't seen any nonzero digits yet, keep skipping zeros.  We
396
      // have to adjust the exponent to reflect the changed place value.
397
29.2k
      const char* begin_zeros = begin;
398
673k
      while (begin < end && *begin == '0') {
399
643k
        ++begin;
400
643k
      }
401
29.2k
      int zeros_skipped = static_cast<int>(begin - begin_zeros);
402
29.2k
      if (zeros_skipped >= DigitLimit<base>()) {
403
        // refuse to parse pathological inputs
404
0
        return result;
405
0
      }
406
29.2k
      exponent_adjustment -= static_cast<int>(zeros_skipped);
407
29.2k
    }
408
68.1k
    int post_decimal_digits = ConsumeDigits<base>(
409
68.1k
        begin, end, digits_left, &mantissa, &mantissa_is_inexact);
410
68.1k
    begin += post_decimal_digits;
411
412
    // Since `mantissa` is an integer, each significant digit we read after
413
    // the decimal point requires an adjustment to the exponent. "1.23e0" will
414
    // be stored as `mantissa` == 123 and `exponent` == -2 (that is,
415
    // "123e-2").
416
68.1k
    if (post_decimal_digits >= DigitLimit<base>()) {
417
      // refuse to parse pathological inputs
418
0
      return result;
419
68.1k
    } else if (post_decimal_digits > digits_left) {
420
18.6k
      exponent_adjustment -= digits_left;
421
49.4k
    } else {
422
49.4k
      exponent_adjustment -= post_decimal_digits;
423
49.4k
    }
424
68.1k
  }
425
  // If we've found no mantissa whatsoever, this isn't a number.
426
9.93M
  if (mantissa_begin == begin) {
427
157
    return result;
428
157
  }
429
  // A bare "." doesn't count as a mantissa either.
430
9.93M
  if (begin - mantissa_begin == 1 && *mantissa_begin == '.') {
431
22
    return result;
432
22
  }
433
434
9.93M
  if (mantissa_is_inexact) {
435
    // We dropped significant digits on the floor.  Handle this appropriately.
436
30.8k
    if (base == 10) {
437
      // If we truncated significant decimal digits, store the full range of the
438
      // mantissa for future big integer math for exact rounding.
439
28.9k
      result.subrange_begin = mantissa_begin;
440
28.9k
      result.subrange_end = begin;
441
28.9k
    } else if (base == 16) {
442
      // If we truncated hex digits, reflect this fact by setting the low
443
      // ("sticky") bit.  This allows for correct rounding in all cases.
444
1.90k
      mantissa |= 1;
445
1.90k
    }
446
30.8k
  }
447
9.93M
  result.mantissa = mantissa;
448
449
9.93M
  const char* const exponent_begin = begin;
450
9.93M
  result.literal_exponent = 0;
451
9.93M
  bool found_exponent = false;
452
9.93M
  if (AllowExponent(format_flags) && begin < end &&
453
1.90M
      IsExponentCharacter<base>(*begin)) {
454
1.90M
    bool negative_exponent = false;
455
1.90M
    ++begin;
456
1.90M
    if (begin < end && *begin == '-') {
457
124k
      negative_exponent = true;
458
124k
      ++begin;
459
1.77M
    } else if (begin < end && *begin == '+') {
460
1.11k
      ++begin;
461
1.11k
    }
462
1.90M
    const char* const exponent_digits_begin = begin;
463
    // Exponent is always expressed in decimal, even for hexadecimal floats.
464
1.90M
    begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax,
465
1.90M
                               &result.literal_exponent, nullptr);
466
1.90M
    if (begin == exponent_digits_begin) {
467
      // there were no digits where we expected an exponent.  We failed to read
468
      // an exponent and should not consume the 'e' after all.  Rewind 'begin'.
469
65
      found_exponent = false;
470
65
      begin = exponent_begin;
471
1.90M
    } else {
472
1.90M
      found_exponent = true;
473
1.90M
      if (negative_exponent) {
474
124k
        result.literal_exponent = -result.literal_exponent;
475
124k
      }
476
1.90M
    }
477
1.90M
  }
478
479
9.93M
  if (!found_exponent && RequireExponent(format_flags)) {
480
    // Provided flags required an exponent, but none was found.  This results
481
    // in a failure to scan.
482
0
    return result;
483
0
  }
484
485
  // Success!
486
9.93M
  result.type = strings_internal::FloatType::kNumber;
487
9.93M
  if (result.mantissa > 0) {
488
9.76M
    result.exponent = result.literal_exponent +
489
9.76M
                      (DigitMagnitude<base>() * exponent_adjustment);
490
9.76M
  } else {
491
168k
    result.exponent = 0;
492
168k
  }
493
9.93M
  result.end = begin;
494
9.93M
  return result;
495
9.93M
}
absl::strings_internal::ParsedFloat absl::strings_internal::ParseFloat<10>(char const*, char const*, absl::chars_format)
Line
Count
Source
356
9.92M
                                         chars_format format_flags) {
357
9.92M
  strings_internal::ParsedFloat result;
358
359
  // Exit early if we're given an empty range.
360
9.92M
  if (begin == end) return result;
361
362
  // Handle the infinity and NaN cases.
363
9.92M
  if (ParseInfinityOrNan(begin, end, &result)) {
364
3.13k
    return result;
365
3.13k
  }
366
367
9.92M
  const char* const mantissa_begin = begin;
368
10.3M
  while (begin < end && *begin == '0') {
369
401k
    ++begin;  // skip leading zeros
370
401k
  }
371
9.92M
  uint64_t mantissa = 0;
372
373
9.92M
  int exponent_adjustment = 0;
374
9.92M
  bool mantissa_is_inexact = false;
375
9.92M
  int pre_decimal_digits = ConsumeDigits<base>(
376
9.92M
      begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact);
377
9.92M
  begin += pre_decimal_digits;
378
9.92M
  int digits_left;
379
9.92M
  if (pre_decimal_digits >= DigitLimit<base>()) {
380
    // refuse to parse pathological inputs
381
0
    return result;
382
9.92M
  } else if (pre_decimal_digits > MantissaDigitsMax<base>()) {
383
    // We dropped some non-fraction digits on the floor.  Adjust our exponent
384
    // to compensate.
385
14.0k
    exponent_adjustment =
386
14.0k
        static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>());
387
14.0k
    digits_left = 0;
388
9.90M
  } else {
389
9.90M
    digits_left =
390
9.90M
        static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits);
391
9.90M
  }
392
9.92M
  if (begin < end && *begin == '.') {
393
63.3k
    ++begin;
394
63.3k
    if (mantissa == 0) {
395
      // If we haven't seen any nonzero digits yet, keep skipping zeros.  We
396
      // have to adjust the exponent to reflect the changed place value.
397
27.4k
      const char* begin_zeros = begin;
398
650k
      while (begin < end && *begin == '0') {
399
623k
        ++begin;
400
623k
      }
401
27.4k
      int zeros_skipped = static_cast<int>(begin - begin_zeros);
402
27.4k
      if (zeros_skipped >= DigitLimit<base>()) {
403
        // refuse to parse pathological inputs
404
0
        return result;
405
0
      }
406
27.4k
      exponent_adjustment -= static_cast<int>(zeros_skipped);
407
27.4k
    }
408
63.3k
    int post_decimal_digits = ConsumeDigits<base>(
409
63.3k
        begin, end, digits_left, &mantissa, &mantissa_is_inexact);
410
63.3k
    begin += post_decimal_digits;
411
412
    // Since `mantissa` is an integer, each significant digit we read after
413
    // the decimal point requires an adjustment to the exponent. "1.23e0" will
414
    // be stored as `mantissa` == 123 and `exponent` == -2 (that is,
415
    // "123e-2").
416
63.3k
    if (post_decimal_digits >= DigitLimit<base>()) {
417
      // refuse to parse pathological inputs
418
0
      return result;
419
63.3k
    } else if (post_decimal_digits > digits_left) {
420
17.3k
      exponent_adjustment -= digits_left;
421
46.0k
    } else {
422
46.0k
      exponent_adjustment -= post_decimal_digits;
423
46.0k
    }
424
63.3k
  }
425
  // If we've found no mantissa whatsoever, this isn't a number.
426
9.92M
  if (mantissa_begin == begin) {
427
147
    return result;
428
147
  }
429
  // A bare "." doesn't count as a mantissa either.
430
9.92M
  if (begin - mantissa_begin == 1 && *mantissa_begin == '.') {
431
15
    return result;
432
15
  }
433
434
9.92M
  if (mantissa_is_inexact) {
435
    // We dropped significant digits on the floor.  Handle this appropriately.
436
28.9k
    if (base == 10) {
437
      // If we truncated significant decimal digits, store the full range of the
438
      // mantissa for future big integer math for exact rounding.
439
28.9k
      result.subrange_begin = mantissa_begin;
440
28.9k
      result.subrange_end = begin;
441
28.9k
    } else if (base == 16) {
442
      // If we truncated hex digits, reflect this fact by setting the low
443
      // ("sticky") bit.  This allows for correct rounding in all cases.
444
0
      mantissa |= 1;
445
0
    }
446
28.9k
  }
447
9.92M
  result.mantissa = mantissa;
448
449
9.92M
  const char* const exponent_begin = begin;
450
9.92M
  result.literal_exponent = 0;
451
9.92M
  bool found_exponent = false;
452
9.92M
  if (AllowExponent(format_flags) && begin < end &&
453
1.89M
      IsExponentCharacter<base>(*begin)) {
454
1.89M
    bool negative_exponent = false;
455
1.89M
    ++begin;
456
1.89M
    if (begin < end && *begin == '-') {
457
120k
      negative_exponent = true;
458
120k
      ++begin;
459
1.77M
    } else if (begin < end && *begin == '+') {
460
827
      ++begin;
461
827
    }
462
1.89M
    const char* const exponent_digits_begin = begin;
463
    // Exponent is always expressed in decimal, even for hexadecimal floats.
464
1.89M
    begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax,
465
1.89M
                               &result.literal_exponent, nullptr);
466
1.89M
    if (begin == exponent_digits_begin) {
467
      // there were no digits where we expected an exponent.  We failed to read
468
      // an exponent and should not consume the 'e' after all.  Rewind 'begin'.
469
51
      found_exponent = false;
470
51
      begin = exponent_begin;
471
1.89M
    } else {
472
1.89M
      found_exponent = true;
473
1.89M
      if (negative_exponent) {
474
120k
        result.literal_exponent = -result.literal_exponent;
475
120k
      }
476
1.89M
    }
477
1.89M
  }
478
479
9.92M
  if (!found_exponent && RequireExponent(format_flags)) {
480
    // Provided flags required an exponent, but none was found.  This results
481
    // in a failure to scan.
482
0
    return result;
483
0
  }
484
485
  // Success!
486
9.92M
  result.type = strings_internal::FloatType::kNumber;
487
9.92M
  if (result.mantissa > 0) {
488
9.75M
    result.exponent = result.literal_exponent +
489
9.75M
                      (DigitMagnitude<base>() * exponent_adjustment);
490
9.75M
  } else {
491
166k
    result.exponent = 0;
492
166k
  }
493
9.92M
  result.end = begin;
494
9.92M
  return result;
495
9.92M
}
absl::strings_internal::ParsedFloat absl::strings_internal::ParseFloat<16>(char const*, char const*, absl::chars_format)
Line
Count
Source
356
11.6k
                                         chars_format format_flags) {
357
11.6k
  strings_internal::ParsedFloat result;
358
359
  // Exit early if we're given an empty range.
360
11.6k
  if (begin == end) return result;
361
362
  // Handle the infinity and NaN cases.
363
11.6k
  if (ParseInfinityOrNan(begin, end, &result)) {
364
3
    return result;
365
3
  }
366
367
11.6k
  const char* const mantissa_begin = begin;
368
23.2k
  while (begin < end && *begin == '0') {
369
11.5k
    ++begin;  // skip leading zeros
370
11.5k
  }
371
11.6k
  uint64_t mantissa = 0;
372
373
11.6k
  int exponent_adjustment = 0;
374
11.6k
  bool mantissa_is_inexact = false;
375
11.6k
  int pre_decimal_digits = ConsumeDigits<base>(
376
11.6k
      begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact);
377
11.6k
  begin += pre_decimal_digits;
378
11.6k
  int digits_left;
379
11.6k
  if (pre_decimal_digits >= DigitLimit<base>()) {
380
    // refuse to parse pathological inputs
381
0
    return result;
382
11.6k
  } else if (pre_decimal_digits > MantissaDigitsMax<base>()) {
383
    // We dropped some non-fraction digits on the floor.  Adjust our exponent
384
    // to compensate.
385
847
    exponent_adjustment =
386
847
        static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>());
387
847
    digits_left = 0;
388
10.7k
  } else {
389
10.7k
    digits_left =
390
10.7k
        static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits);
391
10.7k
  }
392
11.6k
  if (begin < end && *begin == '.') {
393
4.80k
    ++begin;
394
4.80k
    if (mantissa == 0) {
395
      // If we haven't seen any nonzero digits yet, keep skipping zeros.  We
396
      // have to adjust the exponent to reflect the changed place value.
397
1.79k
      const char* begin_zeros = begin;
398
22.4k
      while (begin < end && *begin == '0') {
399
20.6k
        ++begin;
400
20.6k
      }
401
1.79k
      int zeros_skipped = static_cast<int>(begin - begin_zeros);
402
1.79k
      if (zeros_skipped >= DigitLimit<base>()) {
403
        // refuse to parse pathological inputs
404
0
        return result;
405
0
      }
406
1.79k
      exponent_adjustment -= static_cast<int>(zeros_skipped);
407
1.79k
    }
408
4.80k
    int post_decimal_digits = ConsumeDigits<base>(
409
4.80k
        begin, end, digits_left, &mantissa, &mantissa_is_inexact);
410
4.80k
    begin += post_decimal_digits;
411
412
    // Since `mantissa` is an integer, each significant digit we read after
413
    // the decimal point requires an adjustment to the exponent. "1.23e0" will
414
    // be stored as `mantissa` == 123 and `exponent` == -2 (that is,
415
    // "123e-2").
416
4.80k
    if (post_decimal_digits >= DigitLimit<base>()) {
417
      // refuse to parse pathological inputs
418
0
      return result;
419
4.80k
    } else if (post_decimal_digits > digits_left) {
420
1.34k
      exponent_adjustment -= digits_left;
421
3.46k
    } else {
422
3.46k
      exponent_adjustment -= post_decimal_digits;
423
3.46k
    }
424
4.80k
  }
425
  // If we've found no mantissa whatsoever, this isn't a number.
426
11.6k
  if (mantissa_begin == begin) {
427
10
    return result;
428
10
  }
429
  // A bare "." doesn't count as a mantissa either.
430
11.6k
  if (begin - mantissa_begin == 1 && *mantissa_begin == '.') {
431
7
    return result;
432
7
  }
433
434
11.6k
  if (mantissa_is_inexact) {
435
    // We dropped significant digits on the floor.  Handle this appropriately.
436
1.90k
    if (base == 10) {
437
      // If we truncated significant decimal digits, store the full range of the
438
      // mantissa for future big integer math for exact rounding.
439
0
      result.subrange_begin = mantissa_begin;
440
0
      result.subrange_end = begin;
441
1.90k
    } else if (base == 16) {
442
      // If we truncated hex digits, reflect this fact by setting the low
443
      // ("sticky") bit.  This allows for correct rounding in all cases.
444
1.90k
      mantissa |= 1;
445
1.90k
    }
446
1.90k
  }
447
11.6k
  result.mantissa = mantissa;
448
449
11.6k
  const char* const exponent_begin = begin;
450
11.6k
  result.literal_exponent = 0;
451
11.6k
  bool found_exponent = false;
452
11.6k
  if (AllowExponent(format_flags) && begin < end &&
453
4.25k
      IsExponentCharacter<base>(*begin)) {
454
4.17k
    bool negative_exponent = false;
455
4.17k
    ++begin;
456
4.17k
    if (begin < end && *begin == '-') {
457
3.43k
      negative_exponent = true;
458
3.43k
      ++begin;
459
3.43k
    } else if (begin < end && *begin == '+') {
460
289
      ++begin;
461
289
    }
462
4.17k
    const char* const exponent_digits_begin = begin;
463
    // Exponent is always expressed in decimal, even for hexadecimal floats.
464
4.17k
    begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax,
465
4.17k
                               &result.literal_exponent, nullptr);
466
4.17k
    if (begin == exponent_digits_begin) {
467
      // there were no digits where we expected an exponent.  We failed to read
468
      // an exponent and should not consume the 'e' after all.  Rewind 'begin'.
469
14
      found_exponent = false;
470
14
      begin = exponent_begin;
471
4.16k
    } else {
472
4.16k
      found_exponent = true;
473
4.16k
      if (negative_exponent) {
474
3.43k
        result.literal_exponent = -result.literal_exponent;
475
3.43k
      }
476
4.16k
    }
477
4.17k
  }
478
479
11.6k
  if (!found_exponent && RequireExponent(format_flags)) {
480
    // Provided flags required an exponent, but none was found.  This results
481
    // in a failure to scan.
482
0
    return result;
483
0
  }
484
485
  // Success!
486
11.6k
  result.type = strings_internal::FloatType::kNumber;
487
11.6k
  if (result.mantissa > 0) {
488
9.97k
    result.exponent = result.literal_exponent +
489
9.97k
                      (DigitMagnitude<base>() * exponent_adjustment);
490
9.97k
  } else {
491
1.62k
    result.exponent = 0;
492
1.62k
  }
493
11.6k
  result.end = begin;
494
11.6k
  return result;
495
11.6k
}
496
497
template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
498
                                    chars_format format_flags);
499
template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
500
                                    chars_format format_flags);
501
502
}  // namespace strings_internal
503
ABSL_NAMESPACE_END
504
}  // namespace absl