/proc/self/cwd/external/com_google_absl/absl/strings/internal/charconv_parse.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2018 The Abseil Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "absl/strings/internal/charconv_parse.h" |
16 | | #include "absl/strings/charconv.h" |
17 | | |
18 | | #include <cassert> |
19 | | #include <cstdint> |
20 | | #include <limits> |
21 | | |
22 | | #include "absl/strings/internal/memutil.h" |
23 | | |
24 | | namespace absl { |
25 | | ABSL_NAMESPACE_BEGIN |
26 | | namespace { |
27 | | |
28 | | // ParseFloat<10> will read the first 19 significant digits of the mantissa. |
29 | | // This number was chosen for multiple reasons. |
30 | | // |
31 | | // (a) First, for whatever integer type we choose to represent the mantissa, we |
32 | | // want to choose the largest possible number of decimal digits for that integer |
33 | | // type. We are using uint64_t, which can express any 19-digit unsigned |
34 | | // integer. |
35 | | // |
36 | | // (b) Second, we need to parse enough digits that the binary value of any |
37 | | // mantissa we capture has more bits of resolution than the mantissa |
38 | | // representation in the target float. Our algorithm requires at least 3 bits |
39 | | // of headway, but 19 decimal digits give a little more than that. |
40 | | // |
41 | | // The following static assertions verify the above comments: |
42 | | constexpr int kDecimalMantissaDigitsMax = 19; |
43 | | |
44 | | static_assert(std::numeric_limits<uint64_t>::digits10 == |
45 | | kDecimalMantissaDigitsMax, |
46 | | "(a) above"); |
47 | | |
48 | | // IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa. |
49 | | static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed"); |
50 | | static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact"); |
51 | | static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact"); |
52 | | |
53 | | // The lowest valued 19-digit decimal mantissa we can read still contains |
54 | | // sufficient information to reconstruct a binary mantissa. |
55 | | static_assert(1000000000000000000u > (uint64_t{1} << (53 + 3)), "(b) above"); |
56 | | |
57 | | // ParseFloat<16> will read the first 15 significant digits of the mantissa. |
58 | | // |
59 | | // Because a base-16-to-base-2 conversion can be done exactly, we do not need |
60 | | // to maximize the number of scanned hex digits to improve our conversion. What |
61 | | // is required is to scan two more bits than the mantissa can represent, so that |
62 | | // we always round correctly. |
63 | | // |
64 | | // (One extra bit does not suffice to perform correct rounding, since a number |
65 | | // exactly halfway between two representable floats has unique rounding rules, |
66 | | // so we need to differentiate between a "halfway between" number and a "closer |
67 | | // to the larger value" number.) |
68 | | constexpr int kHexadecimalMantissaDigitsMax = 15; |
69 | | |
70 | | // The minimum number of significant bits that will be read from |
71 | | // kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since |
72 | | // the most significant digit can be a "1", which only contributes a single |
73 | | // significant bit. |
74 | | constexpr int kGuaranteedHexadecimalMantissaBitPrecision = |
75 | | 4 * kHexadecimalMantissaDigitsMax - 3; |
76 | | |
77 | | static_assert(kGuaranteedHexadecimalMantissaBitPrecision > |
78 | | std::numeric_limits<double>::digits + 2, |
79 | | "kHexadecimalMantissaDigitsMax too small"); |
80 | | |
81 | | // We also impose a limit on the number of significant digits we will read from |
82 | | // an exponent, to avoid having to deal with integer overflow. We use 9 for |
83 | | // this purpose. |
84 | | // |
85 | | // If we read a 9 digit exponent, the end result of the conversion will |
86 | | // necessarily be infinity or zero, depending on the sign of the exponent. |
87 | | // Therefore we can just drop extra digits on the floor without any extra |
88 | | // logic. |
89 | | constexpr int kDecimalExponentDigitsMax = 9; |
90 | | static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax, |
91 | | "int type too small"); |
92 | | |
93 | | // To avoid incredibly large inputs causing integer overflow for our exponent, |
94 | | // we impose an arbitrary but very large limit on the number of significant |
95 | | // digits we will accept. The implementation refuses to match a string with |
96 | | // more consecutive significant mantissa digits than this. |
97 | | constexpr int kDecimalDigitLimit = 50000000; |
98 | | |
99 | | // Corresponding limit for hexadecimal digit inputs. This is one fourth the |
100 | | // amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires |
101 | | // a binary exponent adjustment of 4. |
102 | | constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4; |
103 | | |
104 | | // The largest exponent we can read is 999999999 (per |
105 | | // kDecimalExponentDigitsMax), and the largest exponent adjustment we can get |
106 | | // from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these |
107 | | // comfortably fits in an integer. |
108 | | // |
109 | | // We count kDecimalDigitLimit twice because there are independent limits for |
110 | | // numbers before and after the decimal point. (In the case where there are no |
111 | | // significant digits before the decimal point, there are independent limits for |
112 | | // post-decimal-point leading zeroes and for significant digits.) |
113 | | static_assert(999999999 + 2 * kDecimalDigitLimit < |
114 | | std::numeric_limits<int>::max(), |
115 | | "int type too small"); |
116 | | static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) < |
117 | | std::numeric_limits<int>::max(), |
118 | | "int type too small"); |
119 | | |
120 | | // Returns true if the provided bitfield allows parsing an exponent value |
121 | | // (e.g., "1.5e100"). |
122 | 2.48k | bool AllowExponent(chars_format flags) { |
123 | 2.48k | bool fixed = (flags & chars_format::fixed) == chars_format::fixed; |
124 | 2.48k | bool scientific = |
125 | 2.48k | (flags & chars_format::scientific) == chars_format::scientific; |
126 | 2.48k | return scientific || !fixed; |
127 | 2.48k | } |
128 | | |
129 | | // Returns true if the provided bitfield requires an exponent value be present. |
130 | 37 | bool RequireExponent(chars_format flags) { |
131 | 37 | bool fixed = (flags & chars_format::fixed) == chars_format::fixed; |
132 | 37 | bool scientific = |
133 | 37 | (flags & chars_format::scientific) == chars_format::scientific; |
134 | 37 | return scientific && !fixed; |
135 | 37 | } |
136 | | |
137 | | const int8_t kAsciiToInt[256] = { |
138 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
139 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
140 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, |
141 | | 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, |
142 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
143 | | -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
144 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
145 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
146 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
147 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
148 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
149 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
150 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
151 | | -1, -1, -1, -1, -1, -1, -1, -1, -1}; |
152 | | |
153 | | // Returns true if `ch` is a digit in the given base |
154 | | template <int base> |
155 | | bool IsDigit(char ch); |
156 | | |
157 | | // Converts a valid `ch` to its digit value in the given base. |
158 | | template <int base> |
159 | | unsigned ToDigit(char ch); |
160 | | |
161 | | // Returns true if `ch` is the exponent delimiter for the given base. |
162 | | template <int base> |
163 | | bool IsExponentCharacter(char ch); |
164 | | |
165 | | // Returns the maximum number of significant digits we will read for a float |
166 | | // in the given base. |
167 | | template <int base> |
168 | | constexpr int MantissaDigitsMax(); |
169 | | |
170 | | // Returns the largest consecutive run of digits we will accept when parsing a |
171 | | // number in the given base. |
172 | | template <int base> |
173 | | constexpr int DigitLimit(); |
174 | | |
175 | | // Returns the amount the exponent must be adjusted by for each dropped digit. |
176 | | // (For decimal this is 1, since the digits are in base 10 and the exponent base |
177 | | // is also 10, but for hexadecimal this is 4, since the digits are base 16 but |
178 | | // the exponent base is 2.) |
179 | | template <int base> |
180 | | constexpr int DigitMagnitude(); |
181 | | |
182 | | template <> |
183 | 29.0k | bool IsDigit<10>(char ch) { |
184 | 29.0k | return ch >= '0' && ch <= '9'; |
185 | 29.0k | } |
186 | | template <> |
187 | 0 | bool IsDigit<16>(char ch) { |
188 | 0 | return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0; |
189 | 0 | } |
190 | | |
191 | | template <> |
192 | 19.5k | unsigned ToDigit<10>(char ch) { |
193 | 19.5k | return static_cast<unsigned>(ch - '0'); |
194 | 19.5k | } |
195 | | template <> |
196 | 0 | unsigned ToDigit<16>(char ch) { |
197 | 0 | return static_cast<unsigned>(kAsciiToInt[static_cast<unsigned char>(ch)]); |
198 | 0 | } |
199 | | |
200 | | template <> |
201 | 2.44k | bool IsExponentCharacter<10>(char ch) { |
202 | 2.44k | return ch == 'e' || ch == 'E'; |
203 | 2.44k | } |
204 | | |
205 | | template <> |
206 | 0 | bool IsExponentCharacter<16>(char ch) { |
207 | 0 | return ch == 'p' || ch == 'P'; |
208 | 0 | } |
209 | | |
210 | | template <> |
211 | 7.44k | constexpr int MantissaDigitsMax<10>() { |
212 | 7.44k | return kDecimalMantissaDigitsMax; |
213 | 7.44k | } |
214 | | template <> |
215 | 0 | constexpr int MantissaDigitsMax<16>() { |
216 | 0 | return kHexadecimalMantissaDigitsMax; |
217 | 0 | } |
218 | | |
219 | | template <> |
220 | 4.75k | constexpr int DigitLimit<10>() { |
221 | 4.75k | return kDecimalDigitLimit; |
222 | 4.75k | } |
223 | | template <> |
224 | 0 | constexpr int DigitLimit<16>() { |
225 | 0 | return kHexadecimalDigitLimit; |
226 | 0 | } |
227 | | |
228 | | template <> |
229 | 2.44k | constexpr int DigitMagnitude<10>() { |
230 | 2.44k | return 1; |
231 | 2.44k | } |
232 | | template <> |
233 | 0 | constexpr int DigitMagnitude<16>() { |
234 | 0 | return 4; |
235 | 0 | } |
236 | | |
237 | | // Reads decimal digits from [begin, end) into *out. Returns the number of |
238 | | // digits consumed. |
239 | | // |
240 | | // After max_digits has been read, keeps consuming characters, but no longer |
241 | | // adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit |
242 | | // is set; otherwise, it is left unmodified. |
243 | | // |
244 | | // If no digits are matched, returns 0 and leaves *out unchanged. |
245 | | // |
246 | | // ConsumeDigits does not protect against overflow on *out; max_digits must |
247 | | // be chosen with respect to type T to avoid the possibility of overflow. |
248 | | template <int base, typename T> |
249 | | int ConsumeDigits(const char* begin, const char* end, int max_digits, T* out, |
250 | 7.20k | bool* dropped_nonzero_digit) { |
251 | 7.20k | if (base == 10) { |
252 | 7.20k | assert(max_digits <= std::numeric_limits<T>::digits10); |
253 | 7.20k | } else if (base == 16) { |
254 | 0 | assert(max_digits * 4 <= std::numeric_limits<T>::digits); |
255 | 0 | } |
256 | 7.20k | const char* const original_begin = begin; |
257 | | |
258 | | // Skip leading zeros, but only if *out is zero. |
259 | | // They don't cause an overflow so we don't have to count them for |
260 | | // `max_digits`. |
261 | 7.20k | while (!*out && end != begin && *begin == '0') ++begin; |
262 | | |
263 | 7.20k | T accumulator = *out; |
264 | 7.20k | const char* significant_digits_end = |
265 | 7.20k | (end - begin > max_digits) ? begin + max_digits : end; |
266 | 26.7k | while (begin < significant_digits_end && IsDigit<base>(*begin)) { |
267 | | // Do not guard against *out overflow; max_digits was chosen to avoid this. |
268 | | // Do assert against it, to detect problems in debug builds. |
269 | 19.5k | auto digit = static_cast<T>(ToDigit<base>(*begin)); |
270 | 19.5k | assert(accumulator * base >= accumulator); |
271 | 19.5k | accumulator *= base; |
272 | 19.5k | assert(accumulator + digit >= accumulator); |
273 | 19.5k | accumulator += digit; |
274 | 19.5k | ++begin; |
275 | 19.5k | } |
276 | 7.20k | bool dropped_nonzero = false; |
277 | 7.20k | while (begin < end && IsDigit<base>(*begin)) { |
278 | 0 | dropped_nonzero = dropped_nonzero || (*begin != '0'); |
279 | 0 | ++begin; |
280 | 0 | } |
281 | 7.20k | if (dropped_nonzero && dropped_nonzero_digit != nullptr) { |
282 | 0 | *dropped_nonzero_digit = true; |
283 | 0 | } |
284 | 7.20k | *out = accumulator; |
285 | 7.20k | return static_cast<int>(begin - original_begin); |
286 | 7.20k | } charconv_parse.cc:int absl::(anonymous namespace)::ConsumeDigits<10, unsigned long>(char const*, char const*, int, unsigned long*, bool*) Line | Count | Source | 250 | 4.75k | bool* dropped_nonzero_digit) { | 251 | 4.75k | if (base == 10) { | 252 | 4.75k | assert(max_digits <= std::numeric_limits<T>::digits10); | 253 | 4.75k | } else if (base == 16) { | 254 | 0 | assert(max_digits * 4 <= std::numeric_limits<T>::digits); | 255 | 0 | } | 256 | 4.75k | const char* const original_begin = begin; | 257 | | | 258 | | // Skip leading zeros, but only if *out is zero. | 259 | | // They don't cause an overflow so we don't have to count them for | 260 | | // `max_digits`. | 261 | 4.75k | while (!*out && end != begin && *begin == '0') ++begin; | 262 | | | 263 | 4.75k | T accumulator = *out; | 264 | 4.75k | const char* significant_digits_end = | 265 | 4.75k | (end - begin > max_digits) ? begin + max_digits : end; | 266 | 18.2k | while (begin < significant_digits_end && IsDigit<base>(*begin)) { | 267 | | // Do not guard against *out overflow; max_digits was chosen to avoid this. | 268 | | // Do assert against it, to detect problems in debug builds. | 269 | 13.4k | auto digit = static_cast<T>(ToDigit<base>(*begin)); | 270 | 13.4k | assert(accumulator * base >= accumulator); | 271 | 13.4k | accumulator *= base; | 272 | 13.4k | assert(accumulator + digit >= accumulator); | 273 | 13.4k | accumulator += digit; | 274 | 13.4k | ++begin; | 275 | 13.4k | } | 276 | 4.75k | bool dropped_nonzero = false; | 277 | 4.75k | while (begin < end && IsDigit<base>(*begin)) { | 278 | 0 | dropped_nonzero = dropped_nonzero || (*begin != '0'); | 279 | 0 | ++begin; | 280 | 0 | } | 281 | 4.75k | if (dropped_nonzero && dropped_nonzero_digit != nullptr) { | 282 | 0 | *dropped_nonzero_digit = true; | 283 | 0 | } | 284 | 4.75k | *out = accumulator; | 285 | 4.75k | return static_cast<int>(begin - original_begin); | 286 | 4.75k | } |
charconv_parse.cc:int absl::(anonymous namespace)::ConsumeDigits<10, int>(char const*, char const*, int, int*, bool*) Line | Count | Source | 250 | 2.44k | bool* dropped_nonzero_digit) { | 251 | 2.44k | if (base == 10) { | 252 | 2.44k | assert(max_digits <= std::numeric_limits<T>::digits10); | 253 | 2.44k | } else if (base == 16) { | 254 | 0 | assert(max_digits * 4 <= std::numeric_limits<T>::digits); | 255 | 0 | } | 256 | 2.44k | const char* const original_begin = begin; | 257 | | | 258 | | // Skip leading zeros, but only if *out is zero. | 259 | | // They don't cause an overflow so we don't have to count them for | 260 | | // `max_digits`. | 261 | 2.44k | while (!*out && end != begin && *begin == '0') ++begin; | 262 | | | 263 | 2.44k | T accumulator = *out; | 264 | 2.44k | const char* significant_digits_end = | 265 | 2.44k | (end - begin > max_digits) ? begin + max_digits : end; | 266 | 8.56k | while (begin < significant_digits_end && IsDigit<base>(*begin)) { | 267 | | // Do not guard against *out overflow; max_digits was chosen to avoid this. | 268 | | // Do assert against it, to detect problems in debug builds. | 269 | 6.11k | auto digit = static_cast<T>(ToDigit<base>(*begin)); | 270 | 6.11k | assert(accumulator * base >= accumulator); | 271 | 6.11k | accumulator *= base; | 272 | 6.11k | assert(accumulator + digit >= accumulator); | 273 | 6.11k | accumulator += digit; | 274 | 6.11k | ++begin; | 275 | 6.11k | } | 276 | 2.44k | bool dropped_nonzero = false; | 277 | 2.44k | while (begin < end && IsDigit<base>(*begin)) { | 278 | 0 | dropped_nonzero = dropped_nonzero || (*begin != '0'); | 279 | 0 | ++begin; | 280 | 0 | } | 281 | 2.44k | if (dropped_nonzero && dropped_nonzero_digit != nullptr) { | 282 | 0 | *dropped_nonzero_digit = true; | 283 | 0 | } | 284 | 2.44k | *out = accumulator; | 285 | 2.44k | return static_cast<int>(begin - original_begin); | 286 | 2.44k | } |
Unexecuted instantiation: charconv_parse.cc:int absl::(anonymous namespace)::ConsumeDigits<16, unsigned long>(char const*, char const*, int, unsigned long*, bool*) |
287 | | |
288 | | // Returns true if `v` is one of the chars allowed inside parentheses following |
289 | | // a NaN. |
290 | 0 | bool IsNanChar(char v) { |
291 | 0 | return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') || |
292 | 0 | (v >= 'A' && v <= 'Z'); |
293 | 0 | } |
294 | | |
295 | | // Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If |
296 | | // one is found, sets `out` appropriately and returns true. |
297 | | bool ParseInfinityOrNan(const char* begin, const char* end, |
298 | 2.48k | strings_internal::ParsedFloat* out) { |
299 | 2.48k | if (end - begin < 3) { |
300 | 37 | return false; |
301 | 37 | } |
302 | 2.44k | switch (*begin) { |
303 | 0 | case 'i': |
304 | 0 | case 'I': { |
305 | | // An infinity string consists of the characters "inf" or "infinity", |
306 | | // case insensitive. |
307 | 0 | if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) { |
308 | 0 | return false; |
309 | 0 | } |
310 | 0 | out->type = strings_internal::FloatType::kInfinity; |
311 | 0 | if (end - begin >= 8 && |
312 | 0 | strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) { |
313 | 0 | out->end = begin + 8; |
314 | 0 | } else { |
315 | 0 | out->end = begin + 3; |
316 | 0 | } |
317 | 0 | return true; |
318 | 0 | } |
319 | 0 | case 'n': |
320 | 0 | case 'N': { |
321 | | // A NaN consists of the characters "nan", case insensitive, optionally |
322 | | // followed by a parenthesized sequence of zero or more alphanumeric |
323 | | // characters and/or underscores. |
324 | 0 | if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) { |
325 | 0 | return false; |
326 | 0 | } |
327 | 0 | out->type = strings_internal::FloatType::kNan; |
328 | 0 | out->end = begin + 3; |
329 | | // NaN is allowed to be followed by a parenthesized string, consisting of |
330 | | // only the characters [a-zA-Z0-9_]. Match that if it's present. |
331 | 0 | begin += 3; |
332 | 0 | if (begin < end && *begin == '(') { |
333 | 0 | const char* nan_begin = begin + 1; |
334 | 0 | while (nan_begin < end && IsNanChar(*nan_begin)) { |
335 | 0 | ++nan_begin; |
336 | 0 | } |
337 | 0 | if (nan_begin < end && *nan_begin == ')') { |
338 | | // We found an extra NaN specifier range |
339 | 0 | out->subrange_begin = begin + 1; |
340 | 0 | out->subrange_end = nan_begin; |
341 | 0 | out->end = nan_begin + 1; |
342 | 0 | } |
343 | 0 | } |
344 | 0 | return true; |
345 | 0 | } |
346 | 2.44k | default: |
347 | 2.44k | return false; |
348 | 2.44k | } |
349 | 2.44k | } |
350 | | } // namespace |
351 | | |
352 | | namespace strings_internal { |
353 | | |
354 | | template <int base> |
355 | | strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, |
356 | 2.48k | chars_format format_flags) { |
357 | 2.48k | strings_internal::ParsedFloat result; |
358 | | |
359 | | // Exit early if we're given an empty range. |
360 | 2.48k | if (begin == end) return result; |
361 | | |
362 | | // Handle the infinity and NaN cases. |
363 | 2.48k | if (ParseInfinityOrNan(begin, end, &result)) { |
364 | 0 | return result; |
365 | 0 | } |
366 | | |
367 | 2.48k | const char* const mantissa_begin = begin; |
368 | 2.51k | while (begin < end && *begin == '0') { |
369 | 37 | ++begin; // skip leading zeros |
370 | 37 | } |
371 | 2.48k | uint64_t mantissa = 0; |
372 | | |
373 | 2.48k | int exponent_adjustment = 0; |
374 | 2.48k | bool mantissa_is_inexact = false; |
375 | 2.48k | int pre_decimal_digits = ConsumeDigits<base>( |
376 | 2.48k | begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact); |
377 | 2.48k | begin += pre_decimal_digits; |
378 | 2.48k | int digits_left; |
379 | 2.48k | if (pre_decimal_digits >= DigitLimit<base>()) { |
380 | | // refuse to parse pathological inputs |
381 | 0 | return result; |
382 | 2.48k | } else if (pre_decimal_digits > MantissaDigitsMax<base>()) { |
383 | | // We dropped some non-fraction digits on the floor. Adjust our exponent |
384 | | // to compensate. |
385 | 0 | exponent_adjustment = |
386 | 0 | static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>()); |
387 | 0 | digits_left = 0; |
388 | 2.48k | } else { |
389 | 2.48k | digits_left = |
390 | 2.48k | static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits); |
391 | 2.48k | } |
392 | 2.48k | if (begin < end && *begin == '.') { |
393 | 2.27k | ++begin; |
394 | 2.27k | if (mantissa == 0) { |
395 | | // If we haven't seen any nonzero digits yet, keep skipping zeros. We |
396 | | // have to adjust the exponent to reflect the changed place value. |
397 | 0 | const char* begin_zeros = begin; |
398 | 0 | while (begin < end && *begin == '0') { |
399 | 0 | ++begin; |
400 | 0 | } |
401 | 0 | int zeros_skipped = static_cast<int>(begin - begin_zeros); |
402 | 0 | if (zeros_skipped >= DigitLimit<base>()) { |
403 | | // refuse to parse pathological inputs |
404 | 0 | return result; |
405 | 0 | } |
406 | 0 | exponent_adjustment -= static_cast<int>(zeros_skipped); |
407 | 0 | } |
408 | 2.27k | int post_decimal_digits = ConsumeDigits<base>( |
409 | 2.27k | begin, end, digits_left, &mantissa, &mantissa_is_inexact); |
410 | 2.27k | begin += post_decimal_digits; |
411 | | |
412 | | // Since `mantissa` is an integer, each significant digit we read after |
413 | | // the decimal point requires an adjustment to the exponent. "1.23e0" will |
414 | | // be stored as `mantissa` == 123 and `exponent` == -2 (that is, |
415 | | // "123e-2"). |
416 | 2.27k | if (post_decimal_digits >= DigitLimit<base>()) { |
417 | | // refuse to parse pathological inputs |
418 | 0 | return result; |
419 | 2.27k | } else if (post_decimal_digits > digits_left) { |
420 | 0 | exponent_adjustment -= digits_left; |
421 | 2.27k | } else { |
422 | 2.27k | exponent_adjustment -= post_decimal_digits; |
423 | 2.27k | } |
424 | 2.27k | } |
425 | | // If we've found no mantissa whatsoever, this isn't a number. |
426 | 2.48k | if (mantissa_begin == begin) { |
427 | 0 | return result; |
428 | 0 | } |
429 | | // A bare "." doesn't count as a mantissa either. |
430 | 2.48k | if (begin - mantissa_begin == 1 && *mantissa_begin == '.') { |
431 | 0 | return result; |
432 | 0 | } |
433 | | |
434 | 2.48k | if (mantissa_is_inexact) { |
435 | | // We dropped significant digits on the floor. Handle this appropriately. |
436 | 0 | if (base == 10) { |
437 | | // If we truncated significant decimal digits, store the full range of the |
438 | | // mantissa for future big integer math for exact rounding. |
439 | 0 | result.subrange_begin = mantissa_begin; |
440 | 0 | result.subrange_end = begin; |
441 | 0 | } else if (base == 16) { |
442 | | // If we truncated hex digits, reflect this fact by setting the low |
443 | | // ("sticky") bit. This allows for correct rounding in all cases. |
444 | 0 | mantissa |= 1; |
445 | 0 | } |
446 | 0 | } |
447 | 2.48k | result.mantissa = mantissa; |
448 | | |
449 | 2.48k | const char* const exponent_begin = begin; |
450 | 2.48k | result.literal_exponent = 0; |
451 | 2.48k | bool found_exponent = false; |
452 | 2.48k | if (AllowExponent(format_flags) && begin < end && |
453 | 2.48k | IsExponentCharacter<base>(*begin)) { |
454 | 2.44k | bool negative_exponent = false; |
455 | 2.44k | ++begin; |
456 | 2.44k | if (begin < end && *begin == '-') { |
457 | 0 | negative_exponent = true; |
458 | 0 | ++begin; |
459 | 2.44k | } else if (begin < end && *begin == '+') { |
460 | 2.44k | ++begin; |
461 | 2.44k | } |
462 | 2.44k | const char* const exponent_digits_begin = begin; |
463 | | // Exponent is always expressed in decimal, even for hexadecimal floats. |
464 | 2.44k | begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax, |
465 | 2.44k | &result.literal_exponent, nullptr); |
466 | 2.44k | if (begin == exponent_digits_begin) { |
467 | | // there were no digits where we expected an exponent. We failed to read |
468 | | // an exponent and should not consume the 'e' after all. Rewind 'begin'. |
469 | 0 | found_exponent = false; |
470 | 0 | begin = exponent_begin; |
471 | 2.44k | } else { |
472 | 2.44k | found_exponent = true; |
473 | 2.44k | if (negative_exponent) { |
474 | 0 | result.literal_exponent = -result.literal_exponent; |
475 | 0 | } |
476 | 2.44k | } |
477 | 2.44k | } |
478 | | |
479 | 2.48k | if (!found_exponent && RequireExponent(format_flags)) { |
480 | | // Provided flags required an exponent, but none was found. This results |
481 | | // in a failure to scan. |
482 | 0 | return result; |
483 | 0 | } |
484 | | |
485 | | // Success! |
486 | 2.48k | result.type = strings_internal::FloatType::kNumber; |
487 | 2.48k | if (result.mantissa > 0) { |
488 | 2.44k | result.exponent = result.literal_exponent + |
489 | 2.44k | (DigitMagnitude<base>() * exponent_adjustment); |
490 | 2.44k | } else { |
491 | 37 | result.exponent = 0; |
492 | 37 | } |
493 | 2.48k | result.end = begin; |
494 | 2.48k | return result; |
495 | 2.48k | } absl::strings_internal::ParsedFloat absl::strings_internal::ParseFloat<10>(char const*, char const*, absl::chars_format) Line | Count | Source | 356 | 2.48k | chars_format format_flags) { | 357 | 2.48k | strings_internal::ParsedFloat result; | 358 | | | 359 | | // Exit early if we're given an empty range. | 360 | 2.48k | if (begin == end) return result; | 361 | | | 362 | | // Handle the infinity and NaN cases. | 363 | 2.48k | if (ParseInfinityOrNan(begin, end, &result)) { | 364 | 0 | return result; | 365 | 0 | } | 366 | | | 367 | 2.48k | const char* const mantissa_begin = begin; | 368 | 2.51k | while (begin < end && *begin == '0') { | 369 | 37 | ++begin; // skip leading zeros | 370 | 37 | } | 371 | 2.48k | uint64_t mantissa = 0; | 372 | | | 373 | 2.48k | int exponent_adjustment = 0; | 374 | 2.48k | bool mantissa_is_inexact = false; | 375 | 2.48k | int pre_decimal_digits = ConsumeDigits<base>( | 376 | 2.48k | begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact); | 377 | 2.48k | begin += pre_decimal_digits; | 378 | 2.48k | int digits_left; | 379 | 2.48k | if (pre_decimal_digits >= DigitLimit<base>()) { | 380 | | // refuse to parse pathological inputs | 381 | 0 | return result; | 382 | 2.48k | } else if (pre_decimal_digits > MantissaDigitsMax<base>()) { | 383 | | // We dropped some non-fraction digits on the floor. Adjust our exponent | 384 | | // to compensate. | 385 | 0 | exponent_adjustment = | 386 | 0 | static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>()); | 387 | 0 | digits_left = 0; | 388 | 2.48k | } else { | 389 | 2.48k | digits_left = | 390 | 2.48k | static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits); | 391 | 2.48k | } | 392 | 2.48k | if (begin < end && *begin == '.') { | 393 | 2.27k | ++begin; | 394 | 2.27k | if (mantissa == 0) { | 395 | | // If we haven't seen any nonzero digits yet, keep skipping zeros. We | 396 | | // have to adjust the exponent to reflect the changed place value. | 397 | 0 | const char* begin_zeros = begin; | 398 | 0 | while (begin < end && *begin == '0') { | 399 | 0 | ++begin; | 400 | 0 | } | 401 | 0 | int zeros_skipped = static_cast<int>(begin - begin_zeros); | 402 | 0 | if (zeros_skipped >= DigitLimit<base>()) { | 403 | | // refuse to parse pathological inputs | 404 | 0 | return result; | 405 | 0 | } | 406 | 0 | exponent_adjustment -= static_cast<int>(zeros_skipped); | 407 | 0 | } | 408 | 2.27k | int post_decimal_digits = ConsumeDigits<base>( | 409 | 2.27k | begin, end, digits_left, &mantissa, &mantissa_is_inexact); | 410 | 2.27k | begin += post_decimal_digits; | 411 | | | 412 | | // Since `mantissa` is an integer, each significant digit we read after | 413 | | // the decimal point requires an adjustment to the exponent. "1.23e0" will | 414 | | // be stored as `mantissa` == 123 and `exponent` == -2 (that is, | 415 | | // "123e-2"). | 416 | 2.27k | if (post_decimal_digits >= DigitLimit<base>()) { | 417 | | // refuse to parse pathological inputs | 418 | 0 | return result; | 419 | 2.27k | } else if (post_decimal_digits > digits_left) { | 420 | 0 | exponent_adjustment -= digits_left; | 421 | 2.27k | } else { | 422 | 2.27k | exponent_adjustment -= post_decimal_digits; | 423 | 2.27k | } | 424 | 2.27k | } | 425 | | // If we've found no mantissa whatsoever, this isn't a number. | 426 | 2.48k | if (mantissa_begin == begin) { | 427 | 0 | return result; | 428 | 0 | } | 429 | | // A bare "." doesn't count as a mantissa either. | 430 | 2.48k | if (begin - mantissa_begin == 1 && *mantissa_begin == '.') { | 431 | 0 | return result; | 432 | 0 | } | 433 | | | 434 | 2.48k | if (mantissa_is_inexact) { | 435 | | // We dropped significant digits on the floor. Handle this appropriately. | 436 | 0 | if (base == 10) { | 437 | | // If we truncated significant decimal digits, store the full range of the | 438 | | // mantissa for future big integer math for exact rounding. | 439 | 0 | result.subrange_begin = mantissa_begin; | 440 | 0 | result.subrange_end = begin; | 441 | 0 | } else if (base == 16) { | 442 | | // If we truncated hex digits, reflect this fact by setting the low | 443 | | // ("sticky") bit. This allows for correct rounding in all cases. | 444 | 0 | mantissa |= 1; | 445 | 0 | } | 446 | 0 | } | 447 | 2.48k | result.mantissa = mantissa; | 448 | | | 449 | 2.48k | const char* const exponent_begin = begin; | 450 | 2.48k | result.literal_exponent = 0; | 451 | 2.48k | bool found_exponent = false; | 452 | 2.48k | if (AllowExponent(format_flags) && begin < end && | 453 | 2.48k | IsExponentCharacter<base>(*begin)) { | 454 | 2.44k | bool negative_exponent = false; | 455 | 2.44k | ++begin; | 456 | 2.44k | if (begin < end && *begin == '-') { | 457 | 0 | negative_exponent = true; | 458 | 0 | ++begin; | 459 | 2.44k | } else if (begin < end && *begin == '+') { | 460 | 2.44k | ++begin; | 461 | 2.44k | } | 462 | 2.44k | const char* const exponent_digits_begin = begin; | 463 | | // Exponent is always expressed in decimal, even for hexadecimal floats. | 464 | 2.44k | begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax, | 465 | 2.44k | &result.literal_exponent, nullptr); | 466 | 2.44k | if (begin == exponent_digits_begin) { | 467 | | // there were no digits where we expected an exponent. We failed to read | 468 | | // an exponent and should not consume the 'e' after all. Rewind 'begin'. | 469 | 0 | found_exponent = false; | 470 | 0 | begin = exponent_begin; | 471 | 2.44k | } else { | 472 | 2.44k | found_exponent = true; | 473 | 2.44k | if (negative_exponent) { | 474 | 0 | result.literal_exponent = -result.literal_exponent; | 475 | 0 | } | 476 | 2.44k | } | 477 | 2.44k | } | 478 | | | 479 | 2.48k | if (!found_exponent && RequireExponent(format_flags)) { | 480 | | // Provided flags required an exponent, but none was found. This results | 481 | | // in a failure to scan. | 482 | 0 | return result; | 483 | 0 | } | 484 | | | 485 | | // Success! | 486 | 2.48k | result.type = strings_internal::FloatType::kNumber; | 487 | 2.48k | if (result.mantissa > 0) { | 488 | 2.44k | result.exponent = result.literal_exponent + | 489 | 2.44k | (DigitMagnitude<base>() * exponent_adjustment); | 490 | 2.44k | } else { | 491 | 37 | result.exponent = 0; | 492 | 37 | } | 493 | 2.48k | result.end = begin; | 494 | 2.48k | return result; | 495 | 2.48k | } |
Unexecuted instantiation: absl::strings_internal::ParsedFloat absl::strings_internal::ParseFloat<16>(char const*, char const*, absl::chars_format) |
496 | | |
497 | | template ParsedFloat ParseFloat<10>(const char* begin, const char* end, |
498 | | chars_format format_flags); |
499 | | template ParsedFloat ParseFloat<16>(const char* begin, const char* end, |
500 | | chars_format format_flags); |
501 | | |
502 | | } // namespace strings_internal |
503 | | ABSL_NAMESPACE_END |
504 | | } // namespace absl |