Coverage Report

Created: 2023-03-26 06:03

/src/simdjson/include/simdjson/generic/numberparsing.h
Line
Count
Source (jump to first uncovered line)
1
#include "simdjson/internal/numberparsing_tables.h"
2
#include <limits>
3
4
namespace simdjson {
5
namespace SIMDJSON_IMPLEMENTATION {
6
7
namespace ondemand {
8
/**
9
 * The type of a JSON number
10
 */
11
enum class number_type {
12
    floating_point_number=1, /// a binary64 number
13
    signed_integer,          /// a signed integer that fits in a 64-bit word using two's complement
14
    unsigned_integer         /// a positive integer larger or equal to 1<<63
15
};
16
}
17
18
namespace {
19
/// @private
20
namespace numberparsing {
21
22
23
24
#ifdef JSON_TEST_NUMBERS
25
#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
26
#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
27
#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
28
#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
29
#else
30
#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
31
#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
32
#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
33
#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
34
#endif
35
36
namespace {
37
// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
38
// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
39
// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
40
0
simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
41
0
    double d;
42
0
    mantissa &= ~(1ULL << 52);
43
0
    mantissa |= real_exponent << 52;
44
0
    mantissa |= ((static_cast<uint64_t>(negative)) << 63);
45
0
    std::memcpy(&d, &mantissa, sizeof(d));
46
0
    return d;
47
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::to_double(unsigned long, unsigned long, bool)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::to_double(unsigned long, unsigned long, bool)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::to_double(unsigned long, unsigned long, bool)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::to_double(unsigned long, unsigned long, bool)
48
}
49
// Attempts to compute i * 10^(power) exactly; and if "negative" is
50
// true, negate the result.
51
// This function will only work in some cases, when it does not work, success is
52
// set to false. This should work *most of the time* (like 99% of the time).
53
// We assume that power is in the [smallest_power,
54
// largest_power] interval: the caller is responsible for this check.
55
0
simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
56
0
  // we start with a fast path
57
0
  // It was described in
58
0
  // Clinger WD. How to read floating point numbers accurately.
59
0
  // ACM SIGPLAN Notices. 1990
60
0
#ifndef FLT_EVAL_METHOD
61
0
#error "FLT_EVAL_METHOD should be defined, please include cfloat."
62
0
#endif
63
0
#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
64
0
  // We cannot be certain that x/y is rounded to nearest.
65
0
  if (0 <= power && power <= 22 && i <= 9007199254740991) {
66
0
#else
67
0
  if (-22 <= power && power <= 22 && i <= 9007199254740991) {
68
0
#endif
69
0
    // convert the integer into a double. This is lossless since
70
0
    // 0 <= i <= 2^53 - 1.
71
0
    d = double(i);
72
0
    //
73
0
    // The general idea is as follows.
74
0
    // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
75
0
    // 1) Both s and p can be represented exactly as 64-bit floating-point
76
0
    // values
77
0
    // (binary64).
78
0
    // 2) Because s and p can be represented exactly as floating-point values,
79
0
    // then s * p
80
0
    // and s / p will produce correctly rounded values.
81
0
    //
82
0
    if (power < 0) {
83
0
      d = d / simdjson::internal::power_of_ten[-power];
84
0
    } else {
85
0
      d = d * simdjson::internal::power_of_ten[power];
86
0
    }
87
0
    if (negative) {
88
0
      d = -d;
89
0
    }
90
0
    return true;
91
0
  }
92
0
  // When 22 < power && power <  22 + 16, we could
93
0
  // hope for another, secondary fast path.  It was
94
0
  // described by David M. Gay in  "Correctly rounded
95
0
  // binary-decimal and decimal-binary conversions." (1990)
96
0
  // If you need to compute i * 10^(22 + x) for x < 16,
97
0
  // first compute i * 10^x, if you know that result is exact
98
0
  // (e.g., when i * 10^x < 2^53),
99
0
  // then you can still proceed and do (i * 10^x) * 10^22.
100
0
  // Is this worth your time?
101
0
  // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
102
0
  // for this second fast path to work.
103
0
  // If you you have 22 < power *and* power <  22 + 16, and then you
104
0
  // optimistically compute "i * 10^(x-22)", there is still a chance that you
105
0
  // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
106
0
  // this optimization maybe less common than we would like. Source:
107
0
  // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
108
0
  // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
109
0
110
0
  // The fast path has now failed, so we are failing back on the slower path.
111
0
112
0
  // In the slow path, we need to adjust i so that it is > 1<<63 which is always
113
0
  // possible, except if i == 0, so we handle i == 0 separately.
114
0
  if(i == 0) {
115
0
    d = negative ? -0.0 : 0.0;
116
0
    return true;
117
0
  }
118
0
119
0
120
0
  // The exponent is 1024 + 63 + power
121
0
  //     + floor(log(5**power)/log(2)).
122
0
  // The 1024 comes from the ieee64 standard.
123
0
  // The 63 comes from the fact that we use a 64-bit word.
124
0
  //
125
0
  // Computing floor(log(5**power)/log(2)) could be
126
0
  // slow. Instead we use a fast function.
127
0
  //
128
0
  // For power in (-400,350), we have that
129
0
  // (((152170 + 65536) * power ) >> 16);
130
0
  // is equal to
131
0
  //  floor(log(5**power)/log(2)) + power when power >= 0
132
0
  // and it is equal to
133
0
  //  ceil(log(5**-power)/log(2)) + power when power < 0
134
0
  //
135
0
  // The 65536 is (1<<16) and corresponds to
136
0
  // (65536 * power) >> 16 ---> power
137
0
  //
138
0
  // ((152170 * power ) >> 16) is equal to
139
0
  // floor(log(5**power)/log(2))
140
0
  //
141
0
  // Note that this is not magic: 152170/(1<<16) is
142
0
  // approximatively equal to log(5)/log(2).
143
0
  // The 1<<16 value is a power of two; we could use a
144
0
  // larger power of 2 if we wanted to.
145
0
  //
146
0
  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
147
0
148
0
149
0
  // We want the most significant bit of i to be 1. Shift if needed.
150
0
  int lz = leading_zeroes(i);
151
0
  i <<= lz;
152
0
153
0
154
0
  // We are going to need to do some 64-bit arithmetic to get a precise product.
155
0
  // We use a table lookup approach.
156
0
  // It is safe because
157
0
  // power >= smallest_power
158
0
  // and power <= largest_power
159
0
  // We recover the mantissa of the power, it has a leading 1. It is always
160
0
  // rounded down.
161
0
  //
162
0
  // We want the most significant 64 bits of the product. We know
163
0
  // this will be non-zero because the most significant bit of i is
164
0
  // 1.
165
0
  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
166
0
  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
167
0
  //
168
0
  // The full_multiplication function computes the 128-bit product of two 64-bit words
169
0
  // with a returned value of type value128 with a "low component" corresponding to the
170
0
  // 64-bit least significant bits of the product and with a "high component" corresponding
171
0
  // to the 64-bit most significant bits of the product.
172
0
  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
173
0
  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
174
0
  // implies that the either the most or the second most significant bit of the product
175
0
  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
176
0
  // we make of the product. It also makes it easy to reason about the product: there
177
0
  // is 0 or 1 leading zero in the product.
178
0
179
0
  // Unless the least significant 9 bits of the high (64-bit) part of the full
180
0
  // product are all 1s, then we know that the most significant 55 bits are
181
0
  // exact and no further work is needed. Having 55 bits is necessary because
182
0
  // we need 53 bits for the mantissa but we have to have one rounding bit and
183
0
  // we can waste a bit if the most significant bit of the product is zero.
184
0
  if((firstproduct.high & 0x1FF) == 0x1FF) {
185
0
    // We want to compute i * 5^q, but only care about the top 55 bits at most.
186
0
    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
187
0
    // the full computation is wasteful. So we do what is called a "truncated
188
0
    // multiplication".
189
0
    // We take the most significant 64-bits, and we put them in
190
0
    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
191
0
    // to the desired approximation using one multiplication. Sometimes it does not suffice.
192
0
    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
193
0
    // then we get a better approximation to i * 5^q. In very rare cases, even that
194
0
    // will not suffice, though it is seemingly very hard to find such a scenario.
195
0
    //
196
0
    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
197
0
    // more complicated.
198
0
    //
199
0
    // There is an extra layer of complexity in that we need more than 55 bits of
200
0
    // accuracy in the round-to-even scenario.
201
0
    //
202
0
    // The full_multiplication function computes the 128-bit product of two 64-bit words
203
0
    // with a returned value of type value128 with a "low component" corresponding to the
204
0
    // 64-bit least significant bits of the product and with a "high component" corresponding
205
0
    // to the 64-bit most significant bits of the product.
206
0
    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
207
0
    firstproduct.low += secondproduct.high;
208
0
    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
209
0
    // At this point, we might need to add at most one to firstproduct, but this
210
0
    // can only change the value of firstproduct.high if firstproduct.low is maximal.
211
0
    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
212
0
      // This is very unlikely, but if so, we need to do much more work!
213
0
      return false;
214
0
    }
215
0
  }
216
0
  uint64_t lower = firstproduct.low;
217
0
  uint64_t upper = firstproduct.high;
218
0
  // The final mantissa should be 53 bits with a leading 1.
219
0
  // We shift it so that it occupies 54 bits with a leading 1.
220
0
  ///////
221
0
  uint64_t upperbit = upper >> 63;
222
0
  uint64_t mantissa = upper >> (upperbit + 9);
223
0
  lz += int(1 ^ upperbit);
224
0
225
0
  // Here we have mantissa < (1<<54).
226
0
  int64_t real_exponent = exponent - lz;
227
0
  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
228
0
    // Here have that real_exponent <= 0 so -real_exponent >= 0
229
0
    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
230
0
      d = negative ? -0.0 : 0.0;
231
0
      return true;
232
0
    }
233
0
    // next line is safe because -real_exponent + 1 < 0
234
0
    mantissa >>= -real_exponent + 1;
235
0
    // Thankfully, we can't have both "round-to-even" and subnormals because
236
0
    // "round-to-even" only occurs for powers close to 0.
237
0
    mantissa += (mantissa & 1); // round up
238
0
    mantissa >>= 1;
239
0
    // There is a weird scenario where we don't have a subnormal but just.
240
0
    // Suppose we start with 2.2250738585072013e-308, we end up
241
0
    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
242
0
    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
243
0
    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
244
0
    // subnormal, but we can only know this after rounding.
245
0
    // So we only declare a subnormal if we are smaller than the threshold.
246
0
    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
247
0
    d = to_double(mantissa, real_exponent, negative);
248
0
    return true;
249
0
  }
250
0
  // We have to round to even. The "to even" part
251
0
  // is only a problem when we are right in between two floats
252
0
  // which we guard against.
253
0
  // If we have lots of trailing zeros, we may fall right between two
254
0
  // floating-point values.
255
0
  //
256
0
  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
257
0
  // times a power of two. That is, it is right between a number with binary significand
258
0
  // m and another number with binary significand m+1; and it must be the case
259
0
  // that it cannot be represented by a float itself.
260
0
  //
261
0
  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
262
0
  // Recall that 10^q = 5^q * 2^q.
263
0
  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
264
0
  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
265
0
  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
266
0
  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
267
0
  // 2^{53} x 5^{-q} < 2^{64}.
268
0
  // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
269
0
  //
270
0
  // We require lower <= 1 and not lower == 0 because we could not prove that
271
0
  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
272
0
  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
273
0
    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
274
0
      mantissa &= ~1;             // flip it so that we do not round up
275
0
    }
276
0
  }
277
0
278
0
  mantissa += mantissa & 1;
279
0
  mantissa >>= 1;
280
0
281
0
  // Here we have mantissa < (1<<53), unless there was an overflow
282
0
  if (mantissa >= (1ULL << 53)) {
283
0
    //////////
284
0
    // This will happen when parsing values such as 7.2057594037927933e+16
285
0
    ////////
286
0
    mantissa = (1ULL << 52);
287
0
    real_exponent++;
288
0
  }
289
0
  mantissa &= ~(1ULL << 52);
290
0
  // we have to check that real_exponent is in range, otherwise we bail out
291
0
  if (simdjson_unlikely(real_exponent > 2046)) {
292
0
    // We have an infinite value!!! We could actually throw an error here if we could.
293
0
    return false;
294
0
  }
295
0
  d = to_double(mantissa, real_exponent, negative);
296
0
  return true;
297
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::compute_float_64(long, unsigned long, bool, double&)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::compute_float_64(long, unsigned long, bool, double&)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::compute_float_64(long, unsigned long, bool, double&)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::compute_float_64(long, unsigned long, bool, double&)
298
299
// We call a fallback floating-point parser that might be slow. Note
300
// it will accept JSON numbers, but the JSON spec. is more restrictive so
301
// before you call parse_float_fallback, you need to have validated the input
302
// string with the JSON grammar.
303
// It will return an error (false) if the parsed number is infinite.
304
// The string parsing itself always succeeds. We know that there is at least
305
// one digit.
306
0
static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
307
0
  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr));
308
0
  // We do not accept infinite values.
309
0
310
0
  // Detecting finite values in a portable manner is ridiculously hard, ideally
311
0
  // we would want to do:
312
0
  // return !std::isfinite(*outDouble);
313
0
  // but that mysteriously fails under legacy/old libc++ libraries, see
314
0
  // https://github.com/simdjson/simdjson/issues/1286
315
0
  //
316
0
  // Therefore, fall back to this solution (the extra parens are there
317
0
  // to handle that max may be a macro on windows).
318
0
  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
319
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::parse_float_fallback(unsigned char const*, double*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::parse_float_fallback(unsigned char const*, double*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::parse_float_fallback(unsigned char const*, double*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::parse_float_fallback(unsigned char const*, double*)
320
0
static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
321
0
  *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
322
0
  // We do not accept infinite values.
323
0
324
0
  // Detecting finite values in a portable manner is ridiculously hard, ideally
325
0
  // we would want to do:
326
0
  // return !std::isfinite(*outDouble);
327
0
  // but that mysteriously fails under legacy/old libc++ libraries, see
328
0
  // https://github.com/simdjson/simdjson/issues/1286
329
0
  //
330
0
  // Therefore, fall back to this solution (the extra parens are there
331
0
  // to handle that max may be a macro on windows).
332
0
  return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
333
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::parse_float_fallback(unsigned char const*, unsigned char const*, double*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::parse_float_fallback(unsigned char const*, unsigned char const*, double*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::parse_float_fallback(unsigned char const*, unsigned char const*, double*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::parse_float_fallback(unsigned char const*, unsigned char const*, double*)
334
335
// check quickly whether the next 8 chars are made of digits
336
// at a glance, it looks better than Mula's
337
// http://0x80.pl/articles/swar-digits-validate.html
338
0
simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
339
0
  uint64_t val;
340
0
  // this can read up to 7 bytes beyond the buffer size, but we require
341
0
  // SIMDJSON_PADDING of padding
342
0
  static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
343
0
  std::memcpy(&val, chars, 8);
344
0
  // a branchy method might be faster:
345
0
  // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
346
0
  //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
347
0
  //  0x3030303030303030);
348
0
  return (((val & 0xF0F0F0F0F0F0F0F0) |
349
0
           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
350
0
          0x3333333333333333);
351
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::is_made_of_eight_digits_fast(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::is_made_of_eight_digits_fast(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::is_made_of_eight_digits_fast(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::is_made_of_eight_digits_fast(unsigned char const*)
352
353
template<typename W>
354
0
error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
355
0
  double d;
356
0
  if (parse_float_fallback(src, &d)) {
357
0
    writer.append_double(d);
358
0
    return SUCCESS;
359
0
  }
360
0
  return INVALID_NUMBER(src);
361
0
}
362
363
template<typename I>
364
SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
365
0
simdjson_inline bool parse_digit(const uint8_t c, I &i) {
366
0
  const uint8_t digit = static_cast<uint8_t>(c - '0');
367
0
  if (digit > 9) {
368
0
    return false;
369
0
  }
370
0
  // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
371
0
  i = 10 * i + digit; // might overflow, we will handle the overflow later
372
0
  return true;
373
0
}
Unexecuted instantiation: fuzz_padded.cpp:bool simdjson::fallback::(anonymous namespace)::numberparsing::parse_digit<unsigned long>(unsigned char, unsigned long&)
Unexecuted instantiation: fuzz_padded.cpp:bool simdjson::fallback::(anonymous namespace)::numberparsing::parse_digit<long>(unsigned char, long&)
Unexecuted instantiation: fuzz_padded.cpp:bool simdjson::icelake::(anonymous namespace)::numberparsing::parse_digit<unsigned long>(unsigned char, unsigned long&)
Unexecuted instantiation: fuzz_padded.cpp:bool simdjson::icelake::(anonymous namespace)::numberparsing::parse_digit<long>(unsigned char, long&)
Unexecuted instantiation: fuzz_padded.cpp:bool simdjson::haswell::(anonymous namespace)::numberparsing::parse_digit<unsigned long>(unsigned char, unsigned long&)
Unexecuted instantiation: fuzz_padded.cpp:bool simdjson::haswell::(anonymous namespace)::numberparsing::parse_digit<long>(unsigned char, long&)
Unexecuted instantiation: fuzz_padded.cpp:bool simdjson::westmere::(anonymous namespace)::numberparsing::parse_digit<unsigned long>(unsigned char, unsigned long&)
Unexecuted instantiation: fuzz_padded.cpp:bool simdjson::westmere::(anonymous namespace)::numberparsing::parse_digit<long>(unsigned char, long&)
374
375
0
simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
376
0
  // we continue with the fiction that we have an integer. If the
377
0
  // floating point number is representable as x * 10^z for some integer
378
0
  // z that fits in 53 bits, then we will be able to convert back the
379
0
  // the integer into a float in a lossless manner.
380
0
  const uint8_t *const first_after_period = p;
381
0
382
0
#ifdef SIMDJSON_SWAR_NUMBER_PARSING
383
0
#if SIMDJSON_SWAR_NUMBER_PARSING
384
0
  // this helps if we have lots of decimals!
385
0
  // this turns out to be frequent enough.
386
0
  if (is_made_of_eight_digits_fast(p)) {
387
0
    i = i * 100000000 + parse_eight_digits_unrolled(p);
388
0
    p += 8;
389
0
  }
390
0
#endif // SIMDJSON_SWAR_NUMBER_PARSING
391
0
#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
392
0
  // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
393
0
  if (parse_digit(*p, i)) { ++p; }
394
0
  while (parse_digit(*p, i)) { p++; }
395
0
  exponent = first_after_period - p;
396
0
  // Decimal without digits (123.) is illegal
397
0
  if (exponent == 0) {
398
0
    return INVALID_NUMBER(src);
399
0
  }
400
0
  return SUCCESS;
401
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::parse_decimal(unsigned char const*, unsigned char const*&, unsigned long&, long&)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::parse_decimal(unsigned char const*, unsigned char const*&, unsigned long&, long&)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::parse_decimal(unsigned char const*, unsigned char const*&, unsigned long&, long&)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::parse_decimal(unsigned char const*, unsigned char const*&, unsigned long&, long&)
402
403
0
simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
404
0
  // Exp Sign: -123.456e[-]78
405
0
  bool neg_exp = ('-' == *p);
406
0
  if (neg_exp || '+' == *p) { p++; } // Skip + as well
407
0
408
0
  // Exponent: -123.456e-[78]
409
0
  auto start_exp = p;
410
0
  int64_t exp_number = 0;
411
0
  while (parse_digit(*p, exp_number)) { ++p; }
412
0
  // It is possible for parse_digit to overflow.
413
0
  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
414
0
  // Thus we *must* check for possible overflow before we negate exp_number.
415
0
416
0
  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
417
0
  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
418
0
  // not oblige and may, in fact, generate two distinct paths in any case. It might be
419
0
  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
420
0
  // instructions for a simdjson_likely branch, an unconclusive gain.
421
0
422
0
  // If there were no digits, it's an error.
423
0
  if (simdjson_unlikely(p == start_exp)) {
424
0
    return INVALID_NUMBER(src);
425
0
  }
426
0
  // We have a valid positive exponent in exp_number at this point, except that
427
0
  // it may have overflowed.
428
0
429
0
  // If there were more than 18 digits, we may have overflowed the integer. We have to do
430
0
  // something!!!!
431
0
  if (simdjson_unlikely(p > start_exp+18)) {
432
0
    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
433
0
    while (*start_exp == '0') { start_exp++; }
434
0
    // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
435
0
    // support exponents smaller than -999,999,999,999,999,999 and bigger
436
0
    // than 999,999,999,999,999,999.
437
0
    // We can truncate.
438
0
    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
439
0
    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
440
0
    // truncate at 324.
441
0
    // Note that there is no reason to fail per se at this point in time.
442
0
    // E.g., 0e999999999999999999999 is a fine number.
443
0
    if (p > start_exp+18) { exp_number = 999999999999999999; }
444
0
  }
445
0
  // At this point, we know that exp_number is a sane, positive, signed integer.
446
0
  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
447
0
  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
448
0
  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
449
0
  // To sum it up: the next line should never overflow.
450
0
  exponent += (neg_exp ? -exp_number : exp_number);
451
0
  return SUCCESS;
452
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::parse_exponent(unsigned char const*, unsigned char const*&, long&)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::parse_exponent(unsigned char const*, unsigned char const*&, long&)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::parse_exponent(unsigned char const*, unsigned char const*&, long&)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::parse_exponent(unsigned char const*, unsigned char const*&, long&)
453
454
0
simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
455
0
  // It is possible that the integer had an overflow.
456
0
  // We have to handle the case where we have 0.0000somenumber.
457
0
  const uint8_t *start = start_digits;
458
0
  while ((*start == '0') || (*start == '.')) { ++start; }
459
0
  // we over-decrement by one when there is a '.'
460
0
  return digit_count - size_t(start - start_digits);
461
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::significant_digits(unsigned char const*, unsigned long)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::significant_digits(unsigned char const*, unsigned long)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::significant_digits(unsigned char const*, unsigned long)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::significant_digits(unsigned char const*, unsigned long)
462
463
template<typename W>
464
0
simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
465
0
  // If we frequently had to deal with long strings of digits,
466
0
  // we could extend our code by using a 128-bit integer instead
467
0
  // of a 64-bit integer. However, this is uncommon in practice.
468
0
  //
469
0
  // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
470
0
  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
471
0
  // may not have a decimal separator!
472
0
  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
473
0
    // Ok, chances are good that we had an overflow!
474
0
    // this is almost never going to get called!!!
475
0
    // we start anew, going slowly!!!
476
0
    // This will happen in the following examples:
477
0
    // 10000000000000000000000000000000000000000000e+308
478
0
    // 3.1415926535897932384626433832795028841971693993751
479
0
    //
480
0
    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
481
0
    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
482
0
    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
483
0
    // and putting into registers. i.e. if we pass it as reference, it gets slow.
484
0
    // This is what forces the skip_double, as well.
485
0
    error_code error = slow_float_parsing(src, writer);
486
0
    writer.skip_double();
487
0
    return error;
488
0
  }
489
0
  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
490
0
  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
491
0
  // To future reader: we'd love if someone found a better way, or at least could explain this result!
492
0
  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
493
0
    //
494
0
    // Important: smallest_power is such that it leads to a zero value.
495
0
    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
496
0
    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
497
0
    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
498
0
    //
499
0
    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
500
0
      // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
501
0
      WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
502
0
      return SUCCESS;
503
0
    } else { // (exponent > largest_power) and (i != 0)
504
0
      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
505
0
      return INVALID_NUMBER(src);
506
0
    }
507
0
  }
508
0
  double d;
509
0
  if (!compute_float_64(exponent, i, negative, d)) {
510
0
    // we are almost never going to get here.
511
0
    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
512
0
  }
513
0
  WRITE_DOUBLE(d, src, writer);
514
0
  return SUCCESS;
515
0
}
516
517
// for performance analysis, it is sometimes  useful to skip parsing
518
#ifdef SIMDJSON_SKIPNUMBERPARSING
519
520
template<typename W>
521
simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
522
  writer.append_s64(0);        // always write zero
523
  return SUCCESS;              // always succeeds
524
}
525
526
simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
527
simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
528
simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
529
simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
530
simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
531
simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
532
simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept  { return false; }
533
simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept  { return false; }
534
simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
535
#else
536
537
// parse the number at src
538
// define JSON_TEST_NUMBERS for unit testing
539
//
540
// It is assumed that the number is followed by a structural ({,},],[) character
541
// or a white space character. If that is not the case (e.g., when the JSON
542
// document is made of a single number), then it is necessary to copy the
543
// content and append a space before calling this function.
544
//
545
// Our objective is accurate parsing (ULP of 0) at high speed.
546
template<typename W>
547
0
simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
548
0
549
0
  //
550
0
  // Check for minus sign
551
0
  //
552
0
  bool negative = (*src == '-');
553
0
  const uint8_t *p = src + uint8_t(negative);
554
0
555
0
  //
556
0
  // Parse the integer part.
557
0
  //
558
0
  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
559
0
  const uint8_t *const start_digits = p;
560
0
  uint64_t i = 0;
561
0
  while (parse_digit(*p, i)) { p++; }
562
0
563
0
  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
564
0
  // Optimization note: size_t is expected to be unsigned.
565
0
  size_t digit_count = size_t(p - start_digits);
566
0
  if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
567
0
568
0
  //
569
0
  // Handle floats if there is a . or e (or both)
570
0
  //
571
0
  int64_t exponent = 0;
572
0
  bool is_float = false;
573
0
  if ('.' == *p) {
574
0
    is_float = true;
575
0
    ++p;
576
0
    SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
577
0
    digit_count = int(p - start_digits); // used later to guard against overflows
578
0
  }
579
0
  if (('e' == *p) || ('E' == *p)) {
580
0
    is_float = true;
581
0
    ++p;
582
0
    SIMDJSON_TRY( parse_exponent(src, p, exponent) );
583
0
  }
584
0
  if (is_float) {
585
0
    const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p);
586
0
    SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
587
0
    if (dirty_end) { return INVALID_NUMBER(src); }
588
0
    return SUCCESS;
589
0
  }
590
0
591
0
  // The longest negative 64-bit number is 19 digits.
592
0
  // The longest positive 64-bit number is 20 digits.
593
0
  // We do it this way so we don't trigger this branch unless we must.
594
0
  size_t longest_digit_count = negative ? 19 : 20;
595
0
  if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
596
0
  if (digit_count == longest_digit_count) {
597
0
    if (negative) {
598
0
      // Anything negative above INT64_MAX+1 is invalid
599
0
      if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src);  }
600
0
      WRITE_INTEGER(~i+1, src, writer);
601
0
      if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
602
0
      return SUCCESS;
603
0
    // Positive overflow check:
604
0
    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
605
0
    //   biggest uint64_t.
606
0
    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
607
0
    //   If we got here, it's a 20 digit number starting with the digit "1".
608
0
    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
609
0
    //   than 1,553,255,926,290,448,384.
610
0
    // - That is smaller than the smallest possible 20-digit number the user could write:
611
0
    //   10,000,000,000,000,000,000.
612
0
    // - Therefore, if the number is positive and lower than that, it's overflow.
613
0
    // - The value we are looking at is less than or equal to INT64_MAX.
614
0
    //
615
0
    }  else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
616
0
  }
617
0
618
0
  // Write unsigned if it doesn't fit in a signed integer.
619
0
  if (i > uint64_t(INT64_MAX)) {
620
0
    WRITE_UNSIGNED(i, src, writer);
621
0
  } else {
622
0
    WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
623
0
  }
624
0
  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); }
625
0
  return SUCCESS;
626
0
}
627
628
// Inlineable functions
629
namespace {
630
631
// This table can be used to characterize the final character of an integer
632
// string. For JSON structural character and allowable white space characters,
633
// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
634
// we return NUMBER_ERROR.
635
// Optimization note: we could easily reduce the size of the table by half (to 128)
636
// at the cost of an extra branch.
637
// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
638
static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
639
static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
640
static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
641
642
const uint8_t integer_string_finisher[256] = {
643
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
644
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
645
    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
646
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
647
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
648
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
649
    NUMBER_ERROR, NUMBER_ERROR,   SUCCESS,      NUMBER_ERROR, NUMBER_ERROR,
650
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
651
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
652
    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
653
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
654
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
655
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
656
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
657
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
658
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
659
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
660
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
661
    NUMBER_ERROR, SUCCESS,        NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
662
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
663
    NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
664
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
665
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
666
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
667
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, SUCCESS,      NUMBER_ERROR,
668
    SUCCESS,      NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
669
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
670
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
671
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
672
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
673
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
674
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
675
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
676
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
677
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
678
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
679
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
680
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
681
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
682
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
683
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
684
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
685
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
686
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
687
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
688
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
689
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
690
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
691
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
692
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
693
    NUMBER_ERROR, NUMBER_ERROR,   NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
694
    NUMBER_ERROR};
695
696
// Parse any number from 0 to 18,446,744,073,709,551,615
697
0
simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
698
0
  const uint8_t *p = src;
699
0
  //
700
0
  // Parse the integer part.
701
0
  //
702
0
  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
703
0
  const uint8_t *const start_digits = p;
704
0
  uint64_t i = 0;
705
0
  while (parse_digit(*p, i)) { p++; }
706
0
707
0
  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
708
0
  // Optimization note: size_t is expected to be unsigned.
709
0
  size_t digit_count = size_t(p - start_digits);
710
0
  // The longest positive 64-bit number is 20 digits.
711
0
  // We do it this way so we don't trigger this branch unless we must.
712
0
  // Optimization note: the compiler can probably merge
713
0
  // ((digit_count == 0) || (digit_count > 20))
714
0
  // into a single  branch since digit_count is unsigned.
715
0
  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
716
0
  // Here digit_count > 0.
717
0
  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
718
0
  // We can do the following...
719
0
  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
720
0
  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
721
0
  // }
722
0
  // as a single table lookup:
723
0
  if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
724
0
725
0
  if (digit_count == 20) {
726
0
    // Positive overflow check:
727
0
    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
728
0
    //   biggest uint64_t.
729
0
    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
730
0
    //   If we got here, it's a 20 digit number starting with the digit "1".
731
0
    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
732
0
    //   than 1,553,255,926,290,448,384.
733
0
    // - That is smaller than the smallest possible 20-digit number the user could write:
734
0
    //   10,000,000,000,000,000,000.
735
0
    // - Therefore, if the number is positive and lower than that, it's overflow.
736
0
    // - The value we are looking at is less than or equal to INT64_MAX.
737
0
    //
738
0
    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
739
0
  }
740
0
741
0
  return i;
742
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_unsigned(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_unsigned(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_unsigned(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_unsigned(unsigned char const*)
743
744
745
// Parse any number from 0 to 18,446,744,073,709,551,615
746
// Never read at src_end or beyond
747
0
simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
748
0
  const uint8_t *p = src;
749
0
  //
750
0
  // Parse the integer part.
751
0
  //
752
0
  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
753
0
  const uint8_t *const start_digits = p;
754
0
  uint64_t i = 0;
755
0
  while ((p != src_end) && parse_digit(*p, i)) { p++; }
756
0
757
0
  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
758
0
  // Optimization note: size_t is expected to be unsigned.
759
0
  size_t digit_count = size_t(p - start_digits);
760
0
  // The longest positive 64-bit number is 20 digits.
761
0
  // We do it this way so we don't trigger this branch unless we must.
762
0
  // Optimization note: the compiler can probably merge
763
0
  // ((digit_count == 0) || (digit_count > 20))
764
0
  // into a single  branch since digit_count is unsigned.
765
0
  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
766
0
  // Here digit_count > 0.
767
0
  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
768
0
  // We can do the following...
769
0
  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
770
0
  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
771
0
  // }
772
0
  // as a single table lookup:
773
0
  if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
774
0
775
0
  if (digit_count == 20) {
776
0
    // Positive overflow check:
777
0
    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
778
0
    //   biggest uint64_t.
779
0
    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
780
0
    //   If we got here, it's a 20 digit number starting with the digit "1".
781
0
    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
782
0
    //   than 1,553,255,926,290,448,384.
783
0
    // - That is smaller than the smallest possible 20-digit number the user could write:
784
0
    //   10,000,000,000,000,000,000.
785
0
    // - Therefore, if the number is positive and lower than that, it's overflow.
786
0
    // - The value we are looking at is less than or equal to INT64_MAX.
787
0
    //
788
0
    if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
789
0
  }
790
0
791
0
  return i;
792
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_unsigned(unsigned char const*, unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_unsigned(unsigned char const*, unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_unsigned(unsigned char const*, unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_unsigned(unsigned char const*, unsigned char const*)
793
794
// Parse any number from 0 to 18,446,744,073,709,551,615
795
0
simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
796
0
  const uint8_t *p = src + 1;
797
0
  //
798
0
  // Parse the integer part.
799
0
  //
800
0
  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
801
0
  const uint8_t *const start_digits = p;
802
0
  uint64_t i = 0;
803
0
  while (parse_digit(*p, i)) { p++; }
804
0
805
0
  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
806
0
  // Optimization note: size_t is expected to be unsigned.
807
0
  size_t digit_count = size_t(p - start_digits);
808
0
  // The longest positive 64-bit number is 20 digits.
809
0
  // We do it this way so we don't trigger this branch unless we must.
810
0
  // Optimization note: the compiler can probably merge
811
0
  // ((digit_count == 0) || (digit_count > 20))
812
0
  // into a single  branch since digit_count is unsigned.
813
0
  if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
814
0
  // Here digit_count > 0.
815
0
  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
816
0
  // We can do the following...
817
0
  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
818
0
  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
819
0
  // }
820
0
  // as a single table lookup:
821
0
  if (*p != '"') { return NUMBER_ERROR; }
822
0
823
0
  if (digit_count == 20) {
824
0
    // Positive overflow check:
825
0
    // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
826
0
    //   biggest uint64_t.
827
0
    // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
828
0
    //   If we got here, it's a 20 digit number starting with the digit "1".
829
0
    // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
830
0
    //   than 1,553,255,926,290,448,384.
831
0
    // - That is smaller than the smallest possible 20-digit number the user could write:
832
0
    //   10,000,000,000,000,000,000.
833
0
    // - Therefore, if the number is positive and lower than that, it's overflow.
834
0
    // - The value we are looking at is less than or equal to INT64_MAX.
835
0
    //
836
0
    // Note: we use src[1] and not src[0] because src[0] is the quote character in this
837
0
    // instance.
838
0
    if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
839
0
  }
840
0
841
0
  return i;
842
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_unsigned_in_string(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_unsigned_in_string(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_unsigned_in_string(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_unsigned_in_string(unsigned char const*)
843
844
// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
845
0
simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
846
0
  //
847
0
  // Check for minus sign
848
0
  //
849
0
  bool negative = (*src == '-');
850
0
  const uint8_t *p = src + uint8_t(negative);
851
0
852
0
  //
853
0
  // Parse the integer part.
854
0
  //
855
0
  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
856
0
  const uint8_t *const start_digits = p;
857
0
  uint64_t i = 0;
858
0
  while (parse_digit(*p, i)) { p++; }
859
0
860
0
  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
861
0
  // Optimization note: size_t is expected to be unsigned.
862
0
  size_t digit_count = size_t(p - start_digits);
863
0
  // We go from
864
0
  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
865
0
  // so we can never represent numbers that have more than 19 digits.
866
0
  size_t longest_digit_count = 19;
867
0
  // Optimization note: the compiler can probably merge
868
0
  // ((digit_count == 0) || (digit_count > longest_digit_count))
869
0
  // into a single  branch since digit_count is unsigned.
870
0
  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
871
0
  // Here digit_count > 0.
872
0
  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
873
0
  // We can do the following...
874
0
  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
875
0
  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
876
0
  // }
877
0
  // as a single table lookup:
878
0
  if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
879
0
  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
880
0
  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
881
0
  // so cheap that we might as well always make it.
882
0
  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
883
0
  return negative ? (~i+1) : i;
884
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_integer(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_integer(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_integer(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_integer(unsigned char const*)
885
886
// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
887
// Never read at src_end or beyond
888
0
simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
889
0
  //
890
0
  // Check for minus sign
891
0
  //
892
0
  if(src == src_end) { return NUMBER_ERROR; }
893
0
  bool negative = (*src == '-');
894
0
  const uint8_t *p = src + uint8_t(negative);
895
0
896
0
  //
897
0
  // Parse the integer part.
898
0
  //
899
0
  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
900
0
  const uint8_t *const start_digits = p;
901
0
  uint64_t i = 0;
902
0
  while ((p != src_end) && parse_digit(*p, i)) { p++; }
903
0
904
0
  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
905
0
  // Optimization note: size_t is expected to be unsigned.
906
0
  size_t digit_count = size_t(p - start_digits);
907
0
  // We go from
908
0
  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
909
0
  // so we can never represent numbers that have more than 19 digits.
910
0
  size_t longest_digit_count = 19;
911
0
  // Optimization note: the compiler can probably merge
912
0
  // ((digit_count == 0) || (digit_count > longest_digit_count))
913
0
  // into a single  branch since digit_count is unsigned.
914
0
  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
915
0
  // Here digit_count > 0.
916
0
  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
917
0
  // We can do the following...
918
0
  // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
919
0
  //  return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
920
0
  // }
921
0
  // as a single table lookup:
922
0
  if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
923
0
  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
924
0
  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
925
0
  // so cheap that we might as well always make it.
926
0
  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
927
0
  return negative ? (~i+1) : i;
928
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_integer(unsigned char const*, unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_integer(unsigned char const*, unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_integer(unsigned char const*, unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_integer(unsigned char const*, unsigned char const*)
929
930
// Parse any number from  -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
931
0
simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
932
0
  //
933
0
  // Check for minus sign
934
0
  //
935
0
  bool negative = (*(src + 1) == '-');
936
0
  src += uint8_t(negative) + 1;
937
0
938
0
  //
939
0
  // Parse the integer part.
940
0
  //
941
0
  // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
942
0
  const uint8_t *const start_digits = src;
943
0
  uint64_t i = 0;
944
0
  while (parse_digit(*src, i)) { src++; }
945
0
946
0
  // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
947
0
  // Optimization note: size_t is expected to be unsigned.
948
0
  size_t digit_count = size_t(src - start_digits);
949
0
  // We go from
950
0
  // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
951
0
  // so we can never represent numbers that have more than 19 digits.
952
0
  size_t longest_digit_count = 19;
953
0
  // Optimization note: the compiler can probably merge
954
0
  // ((digit_count == 0) || (digit_count > longest_digit_count))
955
0
  // into a single  branch since digit_count is unsigned.
956
0
  if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
957
0
  // Here digit_count > 0.
958
0
  if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
959
0
  // We can do the following...
960
0
  // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
961
0
  //  return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
962
0
  // }
963
0
  // as a single table lookup:
964
0
  if(*src != '"') { return NUMBER_ERROR; }
965
0
  // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
966
0
  // Performance note: This check is only needed when digit_count == longest_digit_count but it is
967
0
  // so cheap that we might as well always make it.
968
0
  if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
969
0
  return negative ? (~i+1) : i;
970
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_integer_in_string(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_integer_in_string(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_integer_in_string(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_integer_in_string(unsigned char const*)
971
972
0
simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
973
0
  //
974
0
  // Check for minus sign
975
0
  //
976
0
  bool negative = (*src == '-');
977
0
  src += uint8_t(negative);
978
0
979
0
  //
980
0
  // Parse the integer part.
981
0
  //
982
0
  uint64_t i = 0;
983
0
  const uint8_t *p = src;
984
0
  p += parse_digit(*p, i);
985
0
  bool leading_zero = (i == 0);
986
0
  while (parse_digit(*p, i)) { p++; }
987
0
  // no integer digits, or 0123 (zero must be solo)
988
0
  if ( p == src ) { return INCORRECT_TYPE; }
989
0
  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
990
0
991
0
  //
992
0
  // Parse the decimal part.
993
0
  //
994
0
  int64_t exponent = 0;
995
0
  bool overflow;
996
0
  if (simdjson_likely(*p == '.')) {
997
0
    p++;
998
0
    const uint8_t *start_decimal_digits = p;
999
0
    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
1000
0
    p++;
1001
0
    while (parse_digit(*p, i)) { p++; }
1002
0
    exponent = -(p - start_decimal_digits);
1003
0
1004
0
    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
1005
0
    overflow = p-src-1 > 19;
1006
0
    if (simdjson_unlikely(overflow && leading_zero)) {
1007
0
      // Skip leading 0.00000 and see if it still overflows
1008
0
      const uint8_t *start_digits = src + 2;
1009
0
      while (*start_digits == '0') { start_digits++; }
1010
0
      overflow = start_digits-src > 19;
1011
0
    }
1012
0
  } else {
1013
0
    overflow = p-src > 19;
1014
0
  }
1015
0
1016
0
  //
1017
0
  // Parse the exponent
1018
0
  //
1019
0
  if (*p == 'e' || *p == 'E') {
1020
0
    p++;
1021
0
    bool exp_neg = *p == '-';
1022
0
    p += exp_neg || *p == '+';
1023
0
1024
0
    uint64_t exp = 0;
1025
0
    const uint8_t *start_exp_digits = p;
1026
0
    while (parse_digit(*p, exp)) { p++; }
1027
0
    // no exp digits, or 20+ exp digits
1028
0
    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
1029
0
1030
0
    exponent += exp_neg ? 0-exp : exp;
1031
0
  }
1032
0
1033
0
  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
1034
0
1035
0
  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
1036
0
1037
0
  //
1038
0
  // Assemble (or slow-parse) the float
1039
0
  //
1040
0
  double d;
1041
0
  if (simdjson_likely(!overflow)) {
1042
0
    if (compute_float_64(exponent, i, negative, d)) { return d; }
1043
0
  }
1044
0
  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
1045
0
    return NUMBER_ERROR;
1046
0
  }
1047
0
  return d;
1048
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_double(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_double(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_double(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_double(unsigned char const*)
1049
1050
0
simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
1051
0
  return (*src == '-');
1052
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::is_negative(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::is_negative(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::is_negative(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::is_negative(unsigned char const*)
1053
1054
0
simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
1055
0
  bool negative = (*src == '-');
1056
0
  src += uint8_t(negative);
1057
0
  const uint8_t *p = src;
1058
0
  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
1059
0
  if ( p == src ) { return NUMBER_ERROR; }
1060
0
  if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; }
1061
0
  return false;
1062
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::is_integer(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::is_integer(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::is_integer(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::is_integer(unsigned char const*)
1063
1064
0
simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
1065
0
  bool negative = (*src == '-');
1066
0
  src += uint8_t(negative);
1067
0
  const uint8_t *p = src;
1068
0
  while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
1069
0
  if ( p == src ) { return NUMBER_ERROR; }
1070
0
  if (jsoncharutils::is_structural_or_whitespace(*p)) {
1071
0
    // We have an integer.
1072
0
    // If the number is negative and valid, it must be a signed integer.
1073
0
    if(negative) { return ondemand::number_type::signed_integer; }
1074
0
    // We want values larger or equal to 9223372036854775808 to be unsigned
1075
0
    // integers, and the other values to be signed integers.
1076
0
    int digit_count = int(p - src);
1077
0
    if(digit_count >= 19) {
1078
0
      const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
1079
0
      if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) {
1080
0
        return ondemand::number_type::unsigned_integer;
1081
0
      }
1082
0
    }
1083
0
    return ondemand::number_type::signed_integer;
1084
0
  }
1085
0
  // Hopefully, we have 'e' or 'E' or '.'.
1086
0
  return ondemand::number_type::floating_point_number;
1087
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::get_number_type(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::get_number_type(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::get_number_type(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::get_number_type(unsigned char const*)
1088
1089
// Never read at src_end or beyond
1090
0
simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
1091
0
  if(src == src_end) { return NUMBER_ERROR; }
1092
0
  //
1093
0
  // Check for minus sign
1094
0
  //
1095
0
  bool negative = (*src == '-');
1096
0
  src += uint8_t(negative);
1097
0
1098
0
  //
1099
0
  // Parse the integer part.
1100
0
  //
1101
0
  uint64_t i = 0;
1102
0
  const uint8_t *p = src;
1103
0
  if(p == src_end) { return NUMBER_ERROR; }
1104
0
  p += parse_digit(*p, i);
1105
0
  bool leading_zero = (i == 0);
1106
0
  while ((p != src_end) && parse_digit(*p, i)) { p++; }
1107
0
  // no integer digits, or 0123 (zero must be solo)
1108
0
  if ( p == src ) { return INCORRECT_TYPE; }
1109
0
  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
1110
0
1111
0
  //
1112
0
  // Parse the decimal part.
1113
0
  //
1114
0
  int64_t exponent = 0;
1115
0
  bool overflow;
1116
0
  if (simdjson_likely((p != src_end) && (*p == '.'))) {
1117
0
    p++;
1118
0
    const uint8_t *start_decimal_digits = p;
1119
0
    if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
1120
0
    p++;
1121
0
    while ((p != src_end) && parse_digit(*p, i)) { p++; }
1122
0
    exponent = -(p - start_decimal_digits);
1123
0
1124
0
    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
1125
0
    overflow = p-src-1 > 19;
1126
0
    if (simdjson_unlikely(overflow && leading_zero)) {
1127
0
      // Skip leading 0.00000 and see if it still overflows
1128
0
      const uint8_t *start_digits = src + 2;
1129
0
      while (*start_digits == '0') { start_digits++; }
1130
0
      overflow = start_digits-src > 19;
1131
0
    }
1132
0
  } else {
1133
0
    overflow = p-src > 19;
1134
0
  }
1135
0
1136
0
  //
1137
0
  // Parse the exponent
1138
0
  //
1139
0
  if ((p != src_end) && (*p == 'e' || *p == 'E')) {
1140
0
    p++;
1141
0
    if(p == src_end) { return NUMBER_ERROR; }
1142
0
    bool exp_neg = *p == '-';
1143
0
    p += exp_neg || *p == '+';
1144
0
1145
0
    uint64_t exp = 0;
1146
0
    const uint8_t *start_exp_digits = p;
1147
0
    while ((p != src_end) && parse_digit(*p, exp)) { p++; }
1148
0
    // no exp digits, or 20+ exp digits
1149
0
    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
1150
0
1151
0
    exponent += exp_neg ? 0-exp : exp;
1152
0
  }
1153
0
1154
0
  if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
1155
0
1156
0
  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
1157
0
1158
0
  //
1159
0
  // Assemble (or slow-parse) the float
1160
0
  //
1161
0
  double d;
1162
0
  if (simdjson_likely(!overflow)) {
1163
0
    if (compute_float_64(exponent, i, negative, d)) { return d; }
1164
0
  }
1165
0
  if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) {
1166
0
    return NUMBER_ERROR;
1167
0
  }
1168
0
  return d;
1169
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_double(unsigned char const*, unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_double(unsigned char const*, unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_double(unsigned char const*, unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_double(unsigned char const*, unsigned char const*)
1170
1171
0
simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
1172
0
  //
1173
0
  // Check for minus sign
1174
0
  //
1175
0
  bool negative = (*(src + 1) == '-');
1176
0
  src += uint8_t(negative) + 1;
1177
0
1178
0
  //
1179
0
  // Parse the integer part.
1180
0
  //
1181
0
  uint64_t i = 0;
1182
0
  const uint8_t *p = src;
1183
0
  p += parse_digit(*p, i);
1184
0
  bool leading_zero = (i == 0);
1185
0
  while (parse_digit(*p, i)) { p++; }
1186
0
  // no integer digits, or 0123 (zero must be solo)
1187
0
  if ( p == src ) { return INCORRECT_TYPE; }
1188
0
  if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
1189
0
1190
0
  //
1191
0
  // Parse the decimal part.
1192
0
  //
1193
0
  int64_t exponent = 0;
1194
0
  bool overflow;
1195
0
  if (simdjson_likely(*p == '.')) {
1196
0
    p++;
1197
0
    const uint8_t *start_decimal_digits = p;
1198
0
    if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
1199
0
    p++;
1200
0
    while (parse_digit(*p, i)) { p++; }
1201
0
    exponent = -(p - start_decimal_digits);
1202
0
1203
0
    // Overflow check. More than 19 digits (minus the decimal) may be overflow.
1204
0
    overflow = p-src-1 > 19;
1205
0
    if (simdjson_unlikely(overflow && leading_zero)) {
1206
0
      // Skip leading 0.00000 and see if it still overflows
1207
0
      const uint8_t *start_digits = src + 2;
1208
0
      while (*start_digits == '0') { start_digits++; }
1209
0
      overflow = start_digits-src > 19;
1210
0
    }
1211
0
  } else {
1212
0
    overflow = p-src > 19;
1213
0
  }
1214
0
1215
0
  //
1216
0
  // Parse the exponent
1217
0
  //
1218
0
  if (*p == 'e' || *p == 'E') {
1219
0
    p++;
1220
0
    bool exp_neg = *p == '-';
1221
0
    p += exp_neg || *p == '+';
1222
0
1223
0
    uint64_t exp = 0;
1224
0
    const uint8_t *start_exp_digits = p;
1225
0
    while (parse_digit(*p, exp)) { p++; }
1226
0
    // no exp digits, or 20+ exp digits
1227
0
    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
1228
0
1229
0
    exponent += exp_neg ? 0-exp : exp;
1230
0
  }
1231
0
1232
0
  if (*p != '"') { return NUMBER_ERROR; }
1233
0
1234
0
  overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
1235
0
1236
0
  //
1237
0
  // Assemble (or slow-parse) the float
1238
0
  //
1239
0
  double d;
1240
0
  if (simdjson_likely(!overflow)) {
1241
0
    if (compute_float_64(exponent, i, negative, d)) { return d; }
1242
0
  }
1243
0
  if (!parse_float_fallback(src - uint8_t(negative), &d)) {
1244
0
    return NUMBER_ERROR;
1245
0
  }
1246
0
  return d;
1247
0
}
Unexecuted instantiation: fuzz_padded.cpp:simdjson::fallback::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_double_in_string(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::icelake::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_double_in_string(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::haswell::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_double_in_string(unsigned char const*)
Unexecuted instantiation: fuzz_padded.cpp:simdjson::westmere::(anonymous namespace)::numberparsing::(anonymous namespace)::parse_double_in_string(unsigned char const*)
1248
} //namespace {}
1249
#endif // SIMDJSON_SKIPNUMBERPARSING
1250
1251
} // namespace numberparsing
1252
} // unnamed namespace
1253
} // namespace SIMDJSON_IMPLEMENTATION
1254
} // namespace simdjson