Coverage Report

Created: 2025-06-22 06:56

/src/json-c/json_tokener.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
3
 *
4
 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
5
 * Michael Clark <michael@metaparadigm.com>
6
 *
7
 * This library is free software; you can redistribute it and/or modify
8
 * it under the terms of the MIT license. See COPYING for details.
9
 *
10
 *
11
 * Copyright (c) 2008-2009 Yahoo! Inc.  All rights reserved.
12
 * The copyrights to the contents of this file are licensed under the MIT License
13
 * (https://www.opensource.org/licenses/mit-license.php)
14
 */
15
16
#include "config.h"
17
18
#include "math_compat.h"
19
#include <assert.h>
20
#include <errno.h>
21
#include <limits.h>
22
#include <math.h>
23
#include <stddef.h>
24
#include <stdio.h>
25
#include <stdlib.h>
26
#include <string.h>
27
28
#include "debug.h"
29
#include "json_inttypes.h"
30
#include "json_object.h"
31
#include "json_object_private.h"
32
#include "json_tokener.h"
33
#include "json_util.h"
34
#include "printbuf.h"
35
#include "strdup_compat.h"
36
37
#ifdef HAVE_LOCALE_H
38
#include <locale.h>
39
#endif /* HAVE_LOCALE_H */
40
#ifdef HAVE_XLOCALE_H
41
#include <xlocale.h>
42
#endif
43
#ifdef HAVE_STRINGS_H
44
#include <strings.h>
45
#endif /* HAVE_STRINGS_H */
46
47
27.1k
#define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9)
48
49
#if !HAVE_STRNCASECMP && defined(_MSC_VER)
50
/* MSC has the version as _strnicmp */
51
#define strncasecmp _strnicmp
52
#elif !HAVE_STRNCASECMP
53
#error You do not have strncasecmp on your system.
54
#endif /* HAVE_STRNCASECMP */
55
56
#if defined(_MSC_VER) && (_MSC_VER <= 1800)
57
/* VS2013 doesn't know about "inline" */
58
#define inline __inline
59
#elif defined(AIX_CC)
60
#define inline
61
#endif
62
63
/* The following helper functions are used to speed up parsing. They
64
 * are faster than their ctype counterparts because they assume that
65
 * the input is in ASCII and that the locale is set to "C". The
66
 * compiler will also inline these functions, providing an additional
67
 * speedup by saving on function calls.
68
 */
69
static inline int is_ws_char(char c)
70
1.93M
{
71
1.93M
  return c == ' '
72
1.93M
      || c == '\t'
73
1.93M
      || c == '\n'
74
1.93M
      || c == '\r';
75
1.93M
}
76
77
static inline int is_hex_char(char c)
78
27.2k
{
79
27.2k
  return (c >= '0' && c <= '9')
80
27.2k
      || (c >= 'A' && c <= 'F')
81
27.2k
      || (c >= 'a' && c <= 'f');
82
27.2k
}
83
84
/* Use C99 NAN by default; if not available, nan("") should work too. */
85
#ifndef NAN
86
#define NAN nan("")
87
#endif /* !NAN */
88
89
static const char json_null_str[] = "null";
90
static const int json_null_str_len = sizeof(json_null_str) - 1;
91
static const char json_inf_str[] = "Infinity";
92
/* Swapped case "Infinity" to avoid need to call tolower() on input chars: */
93
static const char json_inf_str_invert[] = "iNFINITY";
94
static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1;
95
static const char json_nan_str[] = "NaN";
96
static const int json_nan_str_len = sizeof(json_nan_str) - 1;
97
static const char json_true_str[] = "true";
98
static const int json_true_str_len = sizeof(json_true_str) - 1;
99
static const char json_false_str[] = "false";
100
static const int json_false_str_len = sizeof(json_false_str) - 1;
101
102
/* clang-format off */
103
static const char *json_tokener_errors[] = {
104
  "success",
105
  "continue",
106
  "nesting too deep",
107
  "unexpected end of data",
108
  "unexpected character",
109
  "null expected",
110
  "boolean expected",
111
  "number expected",
112
  "array value separator ',' expected",
113
  "quoted object property name expected",
114
  "object property name separator ':' expected",
115
  "object value separator ',' expected",
116
  "invalid string sequence",
117
  "expected comment",
118
  "invalid utf-8 string",
119
  "buffer size overflow",
120
  "out of memory"
121
};
122
/* clang-format on */
123
124
/**
125
 * validete the utf-8 string in strict model.
126
 * if not utf-8 format, return err.
127
 */
128
static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes);
129
130
static int json_tokener_parse_double(const char *buf, int len, double *retval);
131
132
const char *json_tokener_error_desc(enum json_tokener_error jerr)
133
2.35k
{
134
2.35k
  int jerr_int = (int)jerr;
135
2.35k
  if (jerr_int < 0 ||
136
2.35k
      jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0])))
137
0
    return "Unknown error, "
138
0
           "invalid json_tokener_error value passed to json_tokener_error_desc()";
139
2.35k
  return json_tokener_errors[jerr];
140
2.35k
}
141
142
enum json_tokener_error json_tokener_get_error(struct json_tokener *tok)
143
4.71k
{
144
4.71k
  return tok->err;
145
4.71k
}
146
147
/* Stuff for decoding unicode sequences */
148
4.88k
#define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800)
149
3.60k
#define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00)
150
736
#define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000)
151
static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD};
152
153
struct json_tokener *json_tokener_new_ex(int depth)
154
10.3k
{
155
10.3k
  struct json_tokener *tok;
156
157
10.3k
  if (depth < 1)
158
0
    return NULL;
159
160
10.3k
  tok = (struct json_tokener *)calloc(1, sizeof(struct json_tokener));
161
10.3k
  if (!tok)
162
0
    return NULL;
163
10.3k
  tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
164
10.3k
  if (!tok->stack)
165
0
  {
166
0
    free(tok);
167
0
    return NULL;
168
0
  }
169
10.3k
  tok->pb = printbuf_new();
170
10.3k
  if (!tok->pb)
171
0
  {
172
0
    free(tok->stack);
173
0
    free(tok);
174
0
    return NULL;
175
0
  }
176
10.3k
  tok->max_depth = depth;
177
10.3k
  json_tokener_reset(tok);
178
10.3k
  return tok;
179
10.3k
}
180
181
struct json_tokener *json_tokener_new(void)
182
10.3k
{
183
10.3k
  return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
184
10.3k
}
185
186
void json_tokener_free(struct json_tokener *tok)
187
10.3k
{
188
10.3k
  if (!tok)
189
0
    return;
190
10.3k
  json_tokener_reset(tok);
191
10.3k
  if (tok->pb)
192
10.3k
    printbuf_free(tok->pb);
193
10.3k
  free(tok->stack);
194
10.3k
  free(tok);
195
10.3k
}
196
197
static void json_tokener_reset_level(struct json_tokener *tok, int depth)
198
746k
{
199
746k
  tok->stack[depth].state = json_tokener_state_eatws;
200
746k
  tok->stack[depth].saved_state = json_tokener_state_start;
201
746k
  json_object_put(tok->stack[depth].current);
202
746k
  tok->stack[depth].current = NULL;
203
746k
  free(tok->stack[depth].obj_field_name);
204
746k
  tok->stack[depth].obj_field_name = NULL;
205
746k
}
206
207
void json_tokener_reset(struct json_tokener *tok)
208
20.7k
{
209
20.7k
  int i;
210
20.7k
  if (!tok)
211
0
    return;
212
213
45.1k
  for (i = tok->depth; i >= 0; i--)
214
24.4k
    json_tokener_reset_level(tok, i);
215
20.7k
  tok->depth = 0;
216
20.7k
  tok->err = json_tokener_success;
217
20.7k
}
218
219
struct json_object *json_tokener_parse(const char *str)
220
0
{
221
0
  enum json_tokener_error jerr_ignored;
222
0
  struct json_object *obj;
223
0
  obj = json_tokener_parse_verbose(str, &jerr_ignored);
224
0
  return obj;
225
0
}
226
227
struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
228
0
{
229
0
  struct json_tokener *tok;
230
0
  struct json_object *obj;
231
232
0
  tok = json_tokener_new();
233
0
  if (!tok)
234
0
  {
235
0
    *error = json_tokener_error_memory;
236
0
    return NULL;
237
0
  }
238
0
  obj = json_tokener_parse_ex(tok, str, -1);
239
0
  *error = tok->err;
240
0
  if (tok->err != json_tokener_success
241
#if 0
242
    /* This would be a more sensible default, and cause parsing
243
     * things like "null123" to fail when the caller can't know
244
     * where the parsing left off, but starting to fail would
245
     * be a notable behaviour change.  Save for a 1.0 release.
246
     */
247
      || json_tokener_get_parse_end(tok) != strlen(str)
248
#endif
249
0
  )
250
251
0
  {
252
0
    if (obj != NULL)
253
0
      json_object_put(obj);
254
0
    obj = NULL;
255
0
  }
256
257
0
  json_tokener_free(tok);
258
0
  return obj;
259
0
}
260
261
8.61M
#define state tok->stack[tok->depth].state
262
3.25M
#define saved_state tok->stack[tok->depth].saved_state
263
1.45M
#define current tok->stack[tok->depth].current
264
674k
#define obj_field_name tok->stack[tok->depth].obj_field_name
265
266
/* Optimization:
267
 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
268
 * iterating character-by character.  A large performance boost is
269
 * achieved by using tighter loops to locally handle units such as
270
 * comments and strings.  Loops that handle an entire token within
271
 * their scope also gather entire strings and pass them to
272
 * printbuf_memappend() in a single call, rather than calling
273
 * printbuf_memappend() one char at a time.
274
 *
275
 * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
276
 * common to both the main loop and the tighter loops.
277
 */
278
279
/* PEEK_CHAR(dest, tok) macro:
280
 *   Peeks at the current char and stores it in dest.
281
 *   Returns 1 on success, sets tok->err and returns 0 if no more chars.
282
 *   Implicit inputs:  str, len, nBytesp vars
283
 */
284
#define PEEK_CHAR(dest, tok)                                                 \
285
7.07M
  (((tok)->char_offset == len)                                         \
286
7.07M
       ? (((tok)->depth == 0 && state == json_tokener_state_eatws &&   \
287
0
           saved_state == json_tokener_state_finish)                   \
288
0
              ? (((tok)->err = json_tokener_success), 0)               \
289
0
              : (((tok)->err = json_tokener_continue), 0))             \
290
7.07M
       : (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) &&                \
291
7.07M
           (!json_tokener_validate_utf8(*str, nBytesp)))               \
292
7.07M
              ? ((tok->err = json_tokener_error_parse_utf8_string), 0) \
293
7.07M
              : (((dest) = *str), 1)))
294
295
/* ADVANCE_CHAR() macro:
296
 *   Increments str & tok->char_offset.
297
 *   For convenience of existing conditionals, returns the old value of c (0 on eof).
298
 *   Implicit inputs:  c var
299
 */
300
12.6M
#define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c)
301
302
/* printbuf_memappend_checked(p, s, l) macro:
303
 *   Add string s of length l to printbuffer p.
304
 *   If operation fails abort parse operation with memory error.
305
 */
306
#define printbuf_memappend_checked(p, s, l)                   \
307
655k
  do {                                                  \
308
655k
    if (printbuf_memappend((p), (s), (l)) < 0)    \
309
655k
    {                                             \
310
0
      tok->err = json_tokener_error_memory; \
311
0
      goto out;                             \
312
0
    }                                             \
313
655k
  } while (0)
314
315
/* End optimization macro defs */
316
317
struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len)
318
10.3k
{
319
10.3k
  struct json_object *obj = NULL;
320
10.3k
  char c = '\1';
321
10.3k
  unsigned int nBytes = 0;
322
10.3k
  unsigned int *nBytesp = &nBytes;
323
324
10.3k
#ifdef HAVE_USELOCALE
325
10.3k
  locale_t oldlocale = uselocale(NULL);
326
10.3k
  locale_t newloc;
327
#elif defined(HAVE_SETLOCALE)
328
  char *oldlocale = NULL;
329
#endif
330
331
10.3k
  tok->char_offset = 0;
332
10.3k
  tok->err = json_tokener_success;
333
334
  /* this interface is presently not 64-bit clean due to the int len argument
335
   * and the internal printbuf interface that takes 32-bit int len arguments
336
   * so the function limits the maximum string size to INT32_MAX (2GB).
337
   * If the function is called with len == -1 then strlen is called to check
338
   * the string length is less than INT32_MAX (2GB)
339
   */
340
10.3k
  if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX))
341
0
  {
342
0
    tok->err = json_tokener_error_size;
343
0
    return NULL;
344
0
  }
345
346
10.3k
#ifdef HAVE_USELOCALE
347
10.3k
  {
348
10.3k
#ifdef HAVE_DUPLOCALE
349
10.3k
    locale_t duploc = duplocale(oldlocale);
350
10.3k
    if (duploc == NULL && errno == ENOMEM)
351
0
    {
352
0
      tok->err = json_tokener_error_memory;
353
0
      return NULL;
354
0
    }
355
10.3k
    newloc = newlocale(LC_NUMERIC_MASK, "C", duploc);
356
#else
357
    newloc = newlocale(LC_NUMERIC_MASK, "C", oldlocale);
358
#endif
359
10.3k
    if (newloc == NULL)
360
0
    {
361
0
      tok->err = json_tokener_error_memory;
362
0
#ifdef HAVE_DUPLOCALE
363
0
      freelocale(duploc);
364
0
#endif
365
0
      return NULL;
366
0
    }
367
#ifdef NEWLOCALE_NEEDS_FREELOCALE
368
#ifdef HAVE_DUPLOCALE
369
    // Older versions of FreeBSD (<12.4) don't free the locale
370
    // passed to newlocale(), so do it here
371
    freelocale(duploc);
372
#endif
373
#endif
374
10.3k
    uselocale(newloc);
375
10.3k
  }
376
#elif defined(HAVE_SETLOCALE)
377
  {
378
    char *tmplocale;
379
    tmplocale = setlocale(LC_NUMERIC, NULL);
380
    if (tmplocale)
381
    {
382
      oldlocale = strdup(tmplocale);
383
      if (oldlocale == NULL)
384
      {
385
        tok->err = json_tokener_error_memory;
386
        return NULL;
387
      }
388
    }
389
    setlocale(LC_NUMERIC, "C");
390
  }
391
#endif
392
393
1.27M
  while (PEEK_CHAR(c, tok)) // Note: c might be '\0' !
394
1.27M
  {
395
396
4.50M
  redo_char:
397
4.50M
    switch (state)
398
4.50M
    {
399
400
1.74M
    case json_tokener_state_eatws:
401
      /* Advance until we change state */
402
1.92M
      while (is_ws_char(c))
403
182k
      {
404
182k
        if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
405
0
          goto out;
406
182k
      }
407
1.74M
      if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT))
408
3.02k
      {
409
3.02k
        printbuf_reset(tok->pb);
410
3.02k
        printbuf_memappend_checked(tok->pb, &c, 1);
411
3.02k
        state = json_tokener_state_comment_start;
412
3.02k
      }
413
1.74M
      else
414
1.74M
      {
415
1.74M
        state = saved_state;
416
1.74M
        goto redo_char;
417
1.74M
      }
418
3.02k
      break;
419
420
369k
    case json_tokener_state_start:
421
369k
      switch (c)
422
369k
      {
423
58.3k
      case '{':
424
58.3k
        state = json_tokener_state_eatws;
425
58.3k
        saved_state = json_tokener_state_object_field_start;
426
58.3k
        current = json_object_new_object();
427
58.3k
        if (current == NULL)
428
0
        {
429
0
          tok->err = json_tokener_error_memory;
430
0
          goto out;
431
0
        }
432
58.3k
        break;
433
58.3k
      case '[':
434
15.4k
        state = json_tokener_state_eatws;
435
15.4k
        saved_state = json_tokener_state_array;
436
15.4k
        current = json_object_new_array();
437
15.4k
        if (current == NULL)
438
0
        {
439
0
          tok->err = json_tokener_error_memory;
440
0
          goto out;
441
0
        }
442
15.4k
        break;
443
15.4k
      case 'I':
444
615
      case 'i':
445
615
        state = json_tokener_state_inf;
446
615
        printbuf_reset(tok->pb);
447
615
        tok->st_pos = 0;
448
615
        goto redo_char;
449
1.17k
      case 'N':
450
14.3k
      case 'n':
451
14.3k
        state = json_tokener_state_null; // or NaN
452
14.3k
        printbuf_reset(tok->pb);
453
14.3k
        tok->st_pos = 0;
454
14.3k
        goto redo_char;
455
514
      case '\'':
456
514
        if (tok->flags & JSON_TOKENER_STRICT)
457
0
        {
458
          /* in STRICT mode only double-quote are allowed */
459
0
          tok->err = json_tokener_error_parse_unexpected;
460
0
          goto out;
461
0
        }
462
        /* FALLTHRU */
463
86.0k
      case '"':
464
86.0k
        state = json_tokener_state_string;
465
86.0k
        printbuf_reset(tok->pb);
466
86.0k
        tok->quote_char = c;
467
86.0k
        break;
468
610
      case 'T':
469
1.12k
      case 't':
470
1.26k
      case 'F':
471
1.69k
      case 'f':
472
1.69k
        state = json_tokener_state_boolean;
473
1.69k
        printbuf_reset(tok->pb);
474
1.69k
        tok->st_pos = 0;
475
1.69k
        goto redo_char;
476
33.0k
      case '0':
477
38.2k
      case '1':
478
42.4k
      case '2':
479
44.6k
      case '3':
480
47.9k
      case '4':
481
185k
      case '5':
482
186k
      case '6':
483
187k
      case '7':
484
187k
      case '8':
485
188k
      case '9':
486
192k
      case '-':
487
192k
        state = json_tokener_state_number;
488
192k
        printbuf_reset(tok->pb);
489
192k
        tok->is_double = 0;
490
192k
        goto redo_char;
491
128
      default: tok->err = json_tokener_error_parse_unexpected; goto out;
492
369k
      }
493
159k
      break;
494
495
363k
    case json_tokener_state_finish:
496
363k
      if (tok->depth == 0)
497
7.93k
        goto out;
498
355k
      obj = json_object_get(current);
499
355k
      json_tokener_reset_level(tok, tok->depth);
500
355k
      tok->depth--;
501
355k
      goto redo_char;
502
503
1.11k
    case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */
504
1.11k
    {
505
      /* If we were guaranteed to have len set, then we could (usually) handle
506
       * the entire "Infinity" check in a single strncmp (strncasecmp), but
507
       * since len might be -1 (i.e. "read until \0"), we need to check it
508
       * a character at a time.
509
       * Trying to handle it both ways would make this code considerably more
510
       * complicated with likely little performance benefit.
511
       */
512
1.11k
      int is_negative = 0;
513
514
      /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */
515
9.38k
      while (tok->st_pos < (int)json_inf_str_len)
516
8.37k
      {
517
8.37k
        char inf_char = *str;
518
8.37k
        if (inf_char != json_inf_str[tok->st_pos] &&
519
8.37k
            ((tok->flags & JSON_TOKENER_STRICT) ||
520
3.65k
              inf_char != json_inf_str_invert[tok->st_pos])
521
8.37k
           )
522
104
        {
523
104
          tok->err = json_tokener_error_parse_unexpected;
524
104
          goto out;
525
104
        }
526
8.27k
        tok->st_pos++;
527
8.27k
        (void)ADVANCE_CHAR(str, tok);
528
8.27k
        if (!PEEK_CHAR(c, tok))
529
0
        {
530
          /* out of input chars, for now at least */
531
0
          goto out;
532
0
        }
533
8.27k
      }
534
      /* We checked the full length of "Infinity", so create the object.
535
       * When handling -Infinity, the number parsing code will have dropped
536
       * the "-" into tok->pb for us, so check it now.
537
       */
538
1.00k
      if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-')
539
474
      {
540
474
        is_negative = 1;
541
474
      }
542
1.00k
      current = json_object_new_double(is_negative ? -INFINITY : INFINITY);
543
1.00k
      if (current == NULL)
544
0
      {
545
0
        tok->err = json_tokener_error_memory;
546
0
        goto out;
547
0
      }
548
1.00k
      saved_state = json_tokener_state_finish;
549
1.00k
      state = json_tokener_state_eatws;
550
1.00k
      goto redo_char;
551
1.00k
    }
552
0
    break;
553
71.1k
    case json_tokener_state_null: /* aka starts with 'n' */
554
71.1k
    {
555
71.1k
      int size;
556
71.1k
      int size_nan;
557
71.1k
      printbuf_memappend_checked(tok->pb, &c, 1);
558
71.1k
      size = json_min(tok->st_pos + 1, json_null_str_len);
559
71.1k
      size_nan = json_min(tok->st_pos + 1, json_nan_str_len);
560
71.1k
      if ((!(tok->flags & JSON_TOKENER_STRICT) &&
561
71.1k
           strncasecmp(json_null_str, tok->pb->buf, size) == 0) ||
562
71.1k
          (strncmp(json_null_str, tok->pb->buf, size) == 0))
563
69.4k
      {
564
69.4k
        if (tok->st_pos == json_null_str_len)
565
13.7k
        {
566
13.7k
          current = NULL;
567
13.7k
          saved_state = json_tokener_state_finish;
568
13.7k
          state = json_tokener_state_eatws;
569
13.7k
          goto redo_char;
570
13.7k
        }
571
69.4k
      }
572
1.63k
      else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
573
1.63k
                strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) ||
574
1.63k
               (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0))
575
1.54k
      {
576
1.54k
        if (tok->st_pos == json_nan_str_len)
577
507
        {
578
507
          current = json_object_new_double(NAN);
579
507
          if (current == NULL)
580
0
          {
581
0
            tok->err = json_tokener_error_memory;
582
0
            goto out;
583
0
          }
584
507
          saved_state = json_tokener_state_finish;
585
507
          state = json_tokener_state_eatws;
586
507
          goto redo_char;
587
507
        }
588
1.54k
      }
589
96
      else
590
96
      {
591
96
        tok->err = json_tokener_error_parse_null;
592
96
        goto out;
593
96
      }
594
56.7k
      tok->st_pos++;
595
56.7k
    }
596
0
    break;
597
598
3.02k
    case json_tokener_state_comment_start:
599
3.02k
      if (c == '*')
600
885
      {
601
885
        state = json_tokener_state_comment;
602
885
      }
603
2.13k
      else if (c == '/')
604
2.06k
      {
605
2.06k
        state = json_tokener_state_comment_eol;
606
2.06k
      }
607
74
      else
608
74
      {
609
74
        tok->err = json_tokener_error_parse_comment;
610
74
        goto out;
611
74
      }
612
2.94k
      printbuf_memappend_checked(tok->pb, &c, 1);
613
2.94k
      break;
614
615
4.44k
    case json_tokener_state_comment:
616
4.44k
    {
617
      /* Advance until we change state */
618
4.44k
      const char *case_start = str;
619
540k
      while (c != '*')
620
536k
      {
621
536k
        if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
622
190
        {
623
190
          printbuf_memappend_checked(tok->pb, case_start,
624
190
                                     str - case_start);
625
190
          goto out;
626
190
        }
627
536k
      }
628
4.25k
      printbuf_memappend_checked(tok->pb, case_start, 1 + str - case_start);
629
4.25k
      state = json_tokener_state_comment_end;
630
4.25k
    }
631
0
    break;
632
633
2.06k
    case json_tokener_state_comment_eol:
634
2.06k
    {
635
      /* Advance until we change state */
636
2.06k
      const char *case_start = str;
637
185k
      while (c != '\n')
638
183k
      {
639
183k
        if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
640
120
        {
641
120
          printbuf_memappend_checked(tok->pb, case_start,
642
120
                                     str - case_start);
643
120
          goto out;
644
120
        }
645
183k
      }
646
1.94k
      printbuf_memappend_checked(tok->pb, case_start, str - case_start);
647
1.94k
      MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
648
1.94k
      state = json_tokener_state_eatws;
649
1.94k
    }
650
0
    break;
651
652
4.25k
    case json_tokener_state_comment_end:
653
4.25k
      printbuf_memappend_checked(tok->pb, &c, 1);
654
4.25k
      if (c == '/')
655
686
      {
656
686
        MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
657
686
        state = json_tokener_state_eatws;
658
686
      }
659
3.57k
      else
660
3.57k
      {
661
3.57k
        state = json_tokener_state_comment;
662
3.57k
      }
663
4.25k
      break;
664
665
146k
    case json_tokener_state_string:
666
146k
    {
667
      /* Advance until we change state */
668
146k
      const char *case_start = str;
669
2.57M
      while (1)
670
2.57M
      {
671
2.57M
        if (c == tok->quote_char)
672
85.8k
        {
673
85.8k
          printbuf_memappend_checked(tok->pb, case_start,
674
85.8k
                                     str - case_start);
675
85.8k
          current =
676
85.8k
              json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
677
85.8k
          if (current == NULL)
678
0
          {
679
0
            tok->err = json_tokener_error_memory;
680
0
            goto out;
681
0
          }
682
85.8k
          saved_state = json_tokener_state_finish;
683
85.8k
          state = json_tokener_state_eatws;
684
85.8k
          break;
685
85.8k
        }
686
2.49M
        else if (c == '\\')
687
60.3k
        {
688
60.3k
          printbuf_memappend_checked(tok->pb, case_start,
689
60.3k
                                     str - case_start);
690
60.3k
          saved_state = json_tokener_state_string;
691
60.3k
          state = json_tokener_state_string_escape;
692
60.3k
          break;
693
60.3k
        }
694
2.43M
        else if ((tok->flags & JSON_TOKENER_STRICT) && (unsigned char)c <= 0x1f)
695
0
        {
696
          // Disallow control characters in strict mode
697
0
          tok->err = json_tokener_error_parse_string;
698
0
          goto out;
699
0
        }
700
2.43M
        if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
701
226
        {
702
226
          printbuf_memappend_checked(tok->pb, case_start,
703
226
                                     str - case_start);
704
226
          goto out;
705
226
        }
706
2.43M
      }
707
146k
    }
708
146k
    break;
709
710
146k
    case json_tokener_state_string_escape:
711
71.9k
      switch (c)
712
71.9k
      {
713
12.6k
      case '"':
714
41.9k
      case '\\':
715
42.3k
      case '/':
716
42.3k
        printbuf_memappend_checked(tok->pb, &c, 1);
717
42.3k
        state = saved_state;
718
42.3k
        break;
719
729
      case 'b':
720
22.0k
      case 'n':
721
22.6k
      case 'r':
722
23.8k
      case 't':
723
24.3k
      case 'f':
724
24.3k
        if (c == 'b')
725
729
          printbuf_memappend_checked(tok->pb, "\b", 1);
726
23.6k
        else if (c == 'n')
727
21.3k
          printbuf_memappend_checked(tok->pb, "\n", 1);
728
2.26k
        else if (c == 'r')
729
599
          printbuf_memappend_checked(tok->pb, "\r", 1);
730
1.66k
        else if (c == 't')
731
1.13k
          printbuf_memappend_checked(tok->pb, "\t", 1);
732
530
        else if (c == 'f')
733
530
          printbuf_memappend_checked(tok->pb, "\f", 1);
734
24.3k
        state = saved_state;
735
24.3k
        break;
736
5.19k
      case 'u':
737
5.19k
        tok->ucs_char = 0;
738
5.19k
        tok->st_pos = 0;
739
5.19k
        state = json_tokener_state_escape_unicode;
740
5.19k
        break;
741
34
      default: tok->err = json_tokener_error_parse_string; goto out;
742
71.9k
      }
743
71.8k
      break;
744
745
      // ===================================================
746
747
71.8k
    case json_tokener_state_escape_unicode:
748
6.86k
    {
749
      /* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */
750
27.2k
      while (1)
751
27.2k
      {
752
27.2k
        if (!c || !is_hex_char(c))
753
92
        {
754
92
          tok->err = json_tokener_error_parse_string;
755
92
          goto out;
756
92
        }
757
27.1k
        tok->ucs_char |=
758
27.1k
            ((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4));
759
27.1k
        tok->st_pos++;
760
27.1k
        if (tok->st_pos >= 4)
761
6.77k
          break;
762
763
20.4k
        (void)ADVANCE_CHAR(str, tok);
764
20.4k
        if (!PEEK_CHAR(c, tok))
765
0
        {
766
          /*
767
           * We're out of characters in the current call to
768
           * json_tokener_parse(), but a subsequent call might
769
           * provide us with more, so leave our current state
770
           * as-is (including tok->high_surrogate) and return.
771
           */
772
0
          goto out;
773
0
        }
774
20.4k
      }
775
6.77k
      tok->st_pos = 0;
776
777
      /* Now, we have a full \uNNNN sequence in tok->ucs_char */
778
779
      /* If the *previous* sequence was a high surrogate ... */
780
6.77k
      if (tok->high_surrogate)
781
1.66k
      {
782
1.66k
        if (IS_LOW_SURROGATE(tok->ucs_char))
783
736
        {
784
          /* Recalculate the ucs_char, then fall thru to process normally */
785
736
          tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate,
786
736
                                                tok->ucs_char);
787
736
        }
788
925
        else
789
925
        {
790
          /* High surrogate was not followed by a low surrogate
791
           * Replace the high and process the rest normally
792
           */
793
925
          printbuf_memappend_checked(tok->pb,
794
925
                                     (char *)utf8_replacement_char, 3);
795
925
        }
796
1.66k
        tok->high_surrogate = 0;
797
1.66k
      }
798
799
6.77k
      if (tok->ucs_char < 0x80)
800
1.46k
      {
801
1.46k
        unsigned char unescaped_utf[1];
802
1.46k
        unescaped_utf[0] = tok->ucs_char;
803
1.46k
        printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 1);
804
1.46k
      }
805
5.30k
      else if (tok->ucs_char < 0x800)
806
429
      {
807
429
        unsigned char unescaped_utf[2];
808
429
        unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
809
429
        unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
810
429
        printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 2);
811
429
      }
812
4.88k
      else if (IS_HIGH_SURROGATE(tok->ucs_char))
813
2.93k
      {
814
        /*
815
         * The next two characters should be \u, HOWEVER,
816
         * we can't simply peek ahead here, because the
817
         * characters we need might not be passed to us
818
         * until a subsequent call to json_tokener_parse.
819
         * Instead, transition through a couple of states.
820
         * (now):
821
         *   _escape_unicode => _unicode_need_escape
822
         * (see a '\\' char):
823
         *   _unicode_need_escape => _unicode_need_u
824
         * (see a 'u' char):
825
         *   _unicode_need_u => _escape_unicode
826
         *      ...and we'll end up back around here.
827
         */
828
2.93k
        tok->high_surrogate = tok->ucs_char;
829
2.93k
        tok->ucs_char = 0;
830
2.93k
        state = json_tokener_state_escape_unicode_need_escape;
831
2.93k
        break;
832
2.93k
      }
833
1.94k
      else if (IS_LOW_SURROGATE(tok->ucs_char))
834
616
      {
835
        /* Got a low surrogate not preceded by a high */
836
616
        printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3);
837
616
      }
838
1.33k
      else if (tok->ucs_char < 0x10000)
839
681
      {
840
681
        unsigned char unescaped_utf[3];
841
681
        unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
842
681
        unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
843
681
        unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
844
681
        printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 3);
845
681
      }
846
650
      else if (tok->ucs_char < 0x110000)
847
650
      {
848
650
        unsigned char unescaped_utf[4];
849
650
        unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
850
650
        unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
851
650
        unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
852
650
        unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
853
650
        printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 4);
854
650
      }
855
0
      else
856
0
      {
857
        /* Don't know what we got--insert the replacement char */
858
0
        printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3);
859
0
      }
860
3.83k
      state = saved_state; // i.e. _state_string or _state_object_field
861
3.83k
    }
862
0
    break;
863
864
2.93k
    case json_tokener_state_escape_unicode_need_escape:
865
      // We get here after processing a high_surrogate
866
      // require a '\\' char
867
2.93k
      if (!c || c != '\\')
868
639
      {
869
        /* Got a high surrogate without another sequence following
870
         * it.  Put a replacement char in for the high surrogate
871
         * and pop back up to _state_string or _state_object_field.
872
         */
873
639
        printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3);
874
639
        tok->high_surrogate = 0;
875
639
        tok->ucs_char = 0;
876
639
        tok->st_pos = 0;
877
639
        state = saved_state;
878
639
        goto redo_char;
879
639
      }
880
2.29k
      state = json_tokener_state_escape_unicode_need_u;
881
2.29k
      break;
882
883
2.29k
    case json_tokener_state_escape_unicode_need_u:
884
      /* We already had a \ char, check that it's \u */
885
2.29k
      if (!c || c != 'u')
886
625
      {
887
        /* Got a high surrogate with some non-unicode escape
888
         * sequence following it.
889
         * Put a replacement char in for the high surrogate
890
         * and handle the escape sequence normally.
891
         */
892
625
        printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3);
893
625
        tok->high_surrogate = 0;
894
625
        tok->ucs_char = 0;
895
625
        tok->st_pos = 0;
896
625
        state = json_tokener_state_string_escape;
897
625
        goto redo_char;
898
625
      }
899
1.66k
      state = json_tokener_state_escape_unicode;
900
1.66k
      break;
901
902
      // ===================================================
903
904
8.66k
    case json_tokener_state_boolean:
905
8.66k
    {
906
8.66k
      int size1, size2;
907
8.66k
      printbuf_memappend_checked(tok->pb, &c, 1);
908
8.66k
      size1 = json_min(tok->st_pos + 1, json_true_str_len);
909
8.66k
      size2 = json_min(tok->st_pos + 1, json_false_str_len);
910
8.66k
      if ((!(tok->flags & JSON_TOKENER_STRICT) &&
911
8.66k
           strncasecmp(json_true_str, tok->pb->buf, size1) == 0) ||
912
8.66k
          (strncmp(json_true_str, tok->pb->buf, size1) == 0))
913
5.30k
      {
914
5.30k
        if (tok->st_pos == json_true_str_len)
915
1.03k
        {
916
1.03k
          current = json_object_new_boolean(1);
917
1.03k
          if (current == NULL)
918
0
          {
919
0
            tok->err = json_tokener_error_memory;
920
0
            goto out;
921
0
          }
922
1.03k
          saved_state = json_tokener_state_finish;
923
1.03k
          state = json_tokener_state_eatws;
924
1.03k
          goto redo_char;
925
1.03k
        }
926
5.30k
      }
927
3.36k
      else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
928
3.36k
                strncasecmp(json_false_str, tok->pb->buf, size2) == 0) ||
929
3.36k
               (strncmp(json_false_str, tok->pb->buf, size2) == 0))
930
3.21k
      {
931
3.21k
        if (tok->st_pos == json_false_str_len)
932
511
        {
933
511
          current = json_object_new_boolean(0);
934
511
          if (current == NULL)
935
0
          {
936
0
            tok->err = json_tokener_error_memory;
937
0
            goto out;
938
0
          }
939
511
          saved_state = json_tokener_state_finish;
940
511
          state = json_tokener_state_eatws;
941
511
          goto redo_char;
942
511
        }
943
3.21k
      }
944
152
      else
945
152
      {
946
152
        tok->err = json_tokener_error_parse_boolean;
947
152
        goto out;
948
152
      }
949
6.97k
      tok->st_pos++;
950
6.97k
    }
951
0
    break;
952
953
192k
    case json_tokener_state_number:
954
192k
    {
955
      /* Advance until we change state */
956
192k
      const char *case_start = str;
957
192k
      int case_len = 0;
958
192k
      int is_exponent = 0;
959
192k
      int neg_sign_ok = 1;
960
192k
      int pos_sign_ok = 0;
961
192k
      if (printbuf_length(tok->pb) > 0)
962
0
      {
963
        /* We don't save all state from the previous incremental parse
964
           so we need to re-generate it based on the saved string so far.
965
         */
966
0
        char *e_loc = strchr(tok->pb->buf, 'e');
967
0
        if (!e_loc)
968
0
          e_loc = strchr(tok->pb->buf, 'E');
969
0
        if (e_loc)
970
0
        {
971
0
          char *last_saved_char =
972
0
              &tok->pb->buf[printbuf_length(tok->pb) - 1];
973
0
          is_exponent = 1;
974
0
          pos_sign_ok = neg_sign_ok = 1;
975
          /* If the "e" isn't at the end, we can't start with a '-' */
976
0
          if (e_loc != last_saved_char)
977
0
          {
978
0
            neg_sign_ok = 0;
979
0
            pos_sign_ok = 0;
980
0
          }
981
          // else leave it set to 1, i.e. start of the new input
982
0
        }
983
0
      }
984
985
530k
      while (c && ((c >= '0' && c <= '9') ||
986
530k
                   (!is_exponent && (c == 'e' || c == 'E')) ||
987
530k
                   (neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') ||
988
530k
                   (!tok->is_double && c == '.')))
989
338k
      {
990
338k
        pos_sign_ok = neg_sign_ok = 0;
991
338k
        ++case_len;
992
993
        /* non-digit characters checks */
994
        /* note: since the main loop condition to get here was
995
         * an input starting with 0-9 or '-', we are
996
         * protected from input starting with '.' or
997
         * e/E.
998
         */
999
338k
        switch (c)
1000
338k
        {
1001
820
        case '.':
1002
820
          tok->is_double = 1;
1003
820
          pos_sign_ok = 1;
1004
820
          neg_sign_ok = 1;
1005
820
          break;
1006
1.68k
        case 'e': /* FALLTHRU */
1007
3.23k
        case 'E':
1008
3.23k
          is_exponent = 1;
1009
3.23k
          tok->is_double = 1;
1010
          /* the exponent part can begin with a negative sign */
1011
3.23k
          pos_sign_ok = neg_sign_ok = 1;
1012
3.23k
          break;
1013
334k
        default: break;
1014
338k
        }
1015
1016
338k
        if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
1017
0
        {
1018
0
          printbuf_memappend_checked(tok->pb, case_start, case_len);
1019
0
          goto out;
1020
0
        }
1021
338k
      }
1022
      /*
1023
        Now we know c isn't a valid number char, but check whether
1024
        it might have been intended to be, and return a potentially
1025
        more understandable error right away.
1026
        However, if we're at the top-level, use the number as-is
1027
        because c can be part of a new object to parse on the
1028
        next call to json_tokener_parse().
1029
       */
1030
192k
      if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' &&
1031
192k
          c != 'I' && c != 'i' && !is_ws_char(c))
1032
281
      {
1033
281
        tok->err = json_tokener_error_parse_number;
1034
281
        goto out;
1035
281
      }
1036
192k
      if (case_len > 0)
1037
192k
        printbuf_memappend_checked(tok->pb, case_start, case_len);
1038
1039
      // Check for -Infinity
1040
192k
      if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I'))
1041
497
      {
1042
497
        state = json_tokener_state_inf;
1043
497
        tok->st_pos = 0;
1044
497
        goto redo_char;
1045
497
      }
1046
191k
      if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT))
1047
3.61k
      {
1048
        /* Trim some chars off the end, to allow things
1049
           like "123e+" to parse ok. */
1050
7.81k
        while (printbuf_length(tok->pb) > 1)
1051
5.98k
        {
1052
5.98k
          char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1];
1053
5.98k
          if (last_char != 'e' && last_char != 'E' &&
1054
5.98k
              last_char != '-' && last_char != '+')
1055
1.78k
          {
1056
1.78k
            break;
1057
1.78k
          }
1058
4.20k
          tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0';
1059
4.20k
          printbuf_length(tok->pb)--;
1060
4.20k
        }
1061
3.61k
      }
1062
191k
    }
1063
0
      {
1064
191k
        int64_t num64;
1065
191k
        uint64_t numuint64;
1066
191k
        double numd;
1067
191k
        if (!tok->is_double && tok->pb->buf[0] == '-' &&
1068
191k
            json_parse_int64(tok->pb->buf, &num64) == 0)
1069
2.55k
        {
1070
2.55k
          if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT))
1071
0
          {
1072
0
            tok->err = json_tokener_error_parse_number;
1073
0
            goto out;
1074
0
          }
1075
2.55k
          current = json_object_new_int64(num64);
1076
2.55k
          if (current == NULL)
1077
0
          {
1078
0
            tok->err = json_tokener_error_memory;
1079
0
            goto out;
1080
0
          }
1081
2.55k
        }
1082
189k
        else if (!tok->is_double && tok->pb->buf[0] != '-' &&
1083
189k
                 json_parse_uint64(tok->pb->buf, &numuint64) == 0)
1084
185k
        {
1085
185k
          if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT))
1086
0
          {
1087
0
            tok->err = json_tokener_error_parse_number;
1088
0
            goto out;
1089
0
          }
1090
185k
          if (numuint64 && tok->pb->buf[0] == '0' &&
1091
185k
              (tok->flags & JSON_TOKENER_STRICT))
1092
0
          {
1093
0
            tok->err = json_tokener_error_parse_number;
1094
0
            goto out;
1095
0
          }
1096
185k
          if (numuint64 <= INT64_MAX)
1097
184k
          {
1098
184k
            num64 = (uint64_t)numuint64;
1099
184k
            current = json_object_new_int64(num64);
1100
184k
            if (current == NULL)
1101
0
            {
1102
0
              tok->err = json_tokener_error_memory;
1103
0
              goto out;
1104
0
            }
1105
184k
          }
1106
806
          else
1107
806
          {
1108
806
            current = json_object_new_uint64(numuint64);
1109
806
            if (current == NULL)
1110
0
            {
1111
0
              tok->err = json_tokener_error_memory;
1112
0
              goto out;
1113
0
            }
1114
806
          }
1115
185k
        }
1116
3.64k
        else if (tok->is_double &&
1117
3.64k
                 json_tokener_parse_double(
1118
3.61k
                     tok->pb->buf, printbuf_length(tok->pb), &numd) == 0)
1119
3.58k
        {
1120
3.58k
          current = json_object_new_double_s(numd, tok->pb->buf);
1121
3.58k
          if (current == NULL)
1122
0
          {
1123
0
            tok->err = json_tokener_error_memory;
1124
0
            goto out;
1125
0
          }
1126
3.58k
        }
1127
61
        else
1128
61
        {
1129
61
          tok->err = json_tokener_error_parse_number;
1130
61
          goto out;
1131
61
        }
1132
191k
        saved_state = json_tokener_state_finish;
1133
191k
        state = json_tokener_state_eatws;
1134
191k
        goto redo_char;
1135
191k
      }
1136
0
      break;
1137
1138
209k
    case json_tokener_state_array_after_sep:
1139
224k
    case json_tokener_state_array:
1140
224k
      if (c == ']')
1141
1.76k
      {
1142
        // Minimize memory usage; assume parsed objs are unlikely to be changed
1143
1.76k
        json_object_array_shrink(current, 0);
1144
1145
1.76k
        if (state == json_tokener_state_array_after_sep &&
1146
1.76k
            (tok->flags & JSON_TOKENER_STRICT))
1147
0
        {
1148
0
          tok->err = json_tokener_error_parse_unexpected;
1149
0
          goto out;
1150
0
        }
1151
1.76k
        saved_state = json_tokener_state_finish;
1152
1.76k
        state = json_tokener_state_eatws;
1153
1.76k
      }
1154
222k
      else
1155
222k
      {
1156
222k
        if (tok->depth >= tok->max_depth - 1)
1157
4
        {
1158
4
          tok->err = json_tokener_error_depth;
1159
4
          goto out;
1160
4
        }
1161
222k
        state = json_tokener_state_array_add;
1162
222k
        tok->depth++;
1163
222k
        json_tokener_reset_level(tok, tok->depth);
1164
222k
        goto redo_char;
1165
222k
      }
1166
1.76k
      break;
1167
1168
221k
    case json_tokener_state_array_add:
1169
221k
      if (json_object_array_add(current, obj) != 0)
1170
0
      {
1171
0
        tok->err = json_tokener_error_memory;
1172
0
        goto out;
1173
0
      }
1174
221k
      saved_state = json_tokener_state_array_sep;
1175
221k
      state = json_tokener_state_eatws;
1176
221k
      goto redo_char;
1177
1178
221k
    case json_tokener_state_array_sep:
1179
221k
      if (c == ']')
1180
11.7k
      {
1181
        // Minimize memory usage; assume parsed objs are unlikely to be changed
1182
11.7k
        json_object_array_shrink(current, 0);
1183
1184
11.7k
        saved_state = json_tokener_state_finish;
1185
11.7k
        state = json_tokener_state_eatws;
1186
11.7k
      }
1187
209k
      else if (c == ',')
1188
209k
      {
1189
209k
        saved_state = json_tokener_state_array_after_sep;
1190
209k
        state = json_tokener_state_eatws;
1191
209k
      }
1192
175
      else
1193
175
      {
1194
175
        tok->err = json_tokener_error_parse_array;
1195
175
        goto out;
1196
175
      }
1197
220k
      break;
1198
1199
220k
    case json_tokener_state_object_field_start:
1200
157k
    case json_tokener_state_object_field_start_after_sep:
1201
157k
      if (c == '}')
1202
21.1k
      {
1203
21.1k
        if (state == json_tokener_state_object_field_start_after_sep &&
1204
21.1k
            (tok->flags & JSON_TOKENER_STRICT))
1205
0
        {
1206
0
          tok->err = json_tokener_error_parse_unexpected;
1207
0
          goto out;
1208
0
        }
1209
21.1k
        saved_state = json_tokener_state_finish;
1210
21.1k
        state = json_tokener_state_eatws;
1211
21.1k
      }
1212
136k
      else if (c == '"' || c == '\'')
1213
136k
      {
1214
136k
        tok->quote_char = c;
1215
136k
        printbuf_reset(tok->pb);
1216
136k
        state = json_tokener_state_object_field;
1217
136k
      }
1218
59
      else
1219
59
      {
1220
59
        tok->err = json_tokener_error_parse_object_key_name;
1221
59
        goto out;
1222
59
      }
1223
157k
      break;
1224
1225
157k
    case json_tokener_state_object_field:
1226
147k
    {
1227
      /* Advance until we change state */
1228
147k
      const char *case_start = str;
1229
2.24M
      while (1)
1230
2.24M
      {
1231
2.24M
        if (c == tok->quote_char)
1232
136k
        {
1233
136k
          printbuf_memappend_checked(tok->pb, case_start,
1234
136k
                                     str - case_start);
1235
136k
          obj_field_name = strdup(tok->pb->buf);
1236
136k
          if (obj_field_name == NULL)
1237
0
          {
1238
0
            tok->err = json_tokener_error_memory;
1239
0
            goto out;
1240
0
          }
1241
136k
          saved_state = json_tokener_state_object_field_end;
1242
136k
          state = json_tokener_state_eatws;
1243
136k
          break;
1244
136k
        }
1245
2.11M
        else if (c == '\\')
1246
10.8k
        {
1247
10.8k
          printbuf_memappend_checked(tok->pb, case_start,
1248
10.8k
                                     str - case_start);
1249
10.8k
          saved_state = json_tokener_state_object_field;
1250
10.8k
          state = json_tokener_state_string_escape;
1251
10.8k
          break;
1252
10.8k
        }
1253
2.10M
        else if ((tok->flags & JSON_TOKENER_STRICT) && (unsigned char)c <= 0x1f)
1254
0
        {
1255
          // Disallow control characters in strict mode
1256
0
          tok->err = json_tokener_error_parse_string;
1257
0
          goto out;
1258
0
        }
1259
2.10M
        if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
1260
204
        {
1261
204
          printbuf_memappend_checked(tok->pb, case_start,
1262
204
                                     str - case_start);
1263
204
          goto out;
1264
204
        }
1265
2.10M
      }
1266
147k
    }
1267
147k
    break;
1268
1269
147k
    case json_tokener_state_object_field_end:
1270
136k
      if (c == ':')
1271
135k
      {
1272
135k
        saved_state = json_tokener_state_object_value;
1273
135k
        state = json_tokener_state_eatws;
1274
135k
      }
1275
142
      else
1276
142
      {
1277
142
        tok->err = json_tokener_error_parse_object_key_sep;
1278
142
        goto out;
1279
142
      }
1280
135k
      break;
1281
1282
135k
    case json_tokener_state_object_value:
1283
135k
      if (tok->depth >= tok->max_depth - 1)
1284
3
      {
1285
3
        tok->err = json_tokener_error_depth;
1286
3
        goto out;
1287
3
      }
1288
135k
      state = json_tokener_state_object_value_add;
1289
135k
      tok->depth++;
1290
135k
      json_tokener_reset_level(tok, tok->depth);
1291
135k
      goto redo_char;
1292
1293
133k
    case json_tokener_state_object_value_add:
1294
133k
      if (json_object_object_add(current, obj_field_name, obj) != 0)
1295
0
      {
1296
0
        tok->err = json_tokener_error_memory;
1297
0
        goto out;
1298
0
      }
1299
133k
      free(obj_field_name);
1300
133k
      obj_field_name = NULL;
1301
133k
      saved_state = json_tokener_state_object_sep;
1302
133k
      state = json_tokener_state_eatws;
1303
133k
      goto redo_char;
1304
1305
133k
    case json_tokener_state_object_sep:
1306
      /* { */
1307
133k
      if (c == '}')
1308
34.3k
      {
1309
34.3k
        saved_state = json_tokener_state_finish;
1310
34.3k
        state = json_tokener_state_eatws;
1311
34.3k
      }
1312
99.6k
      else if (c == ',')
1313
99.3k
      {
1314
99.3k
        saved_state = json_tokener_state_object_field_start_after_sep;
1315
99.3k
        state = json_tokener_state_eatws;
1316
99.3k
      }
1317
291
      else
1318
291
      {
1319
291
        tok->err = json_tokener_error_parse_object_value_sep;
1320
291
        goto out;
1321
291
      }
1322
133k
      break;
1323
4.50M
    }
1324
1.26M
    (void)ADVANCE_CHAR(str, tok);
1325
1.26M
    if (!c) // This is the char *before* advancing
1326
9
      break;
1327
1.26M
  } /* while(PEEK_CHAR) */
1328
1329
10.3k
out:
1330
10.3k
  if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0))
1331
0
  {
1332
0
    tok->err = json_tokener_error_parse_utf8_string;
1333
0
  }
1334
10.3k
  if (c && (state == json_tokener_state_finish) && (tok->depth == 0) &&
1335
10.3k
      (tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) ==
1336
94
          JSON_TOKENER_STRICT)
1337
0
  {
1338
    /* unexpected char after JSON data */
1339
0
    tok->err = json_tokener_error_parse_unexpected;
1340
0
  }
1341
10.3k
  if (!c)
1342
8.71k
  {
1343
    /* We hit an eof char (0) */
1344
8.71k
    if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish)
1345
785
      tok->err = json_tokener_error_parse_eof;
1346
8.71k
  }
1347
1348
10.3k
#ifdef HAVE_USELOCALE
1349
10.3k
  uselocale(oldlocale);
1350
10.3k
  freelocale(newloc);
1351
#elif defined(HAVE_SETLOCALE)
1352
  setlocale(LC_NUMERIC, oldlocale);
1353
  free(oldlocale);
1354
#endif
1355
1356
10.3k
  if (tok->err == json_tokener_success)
1357
8.02k
  {
1358
8.02k
    json_object *ret = json_object_get(current);
1359
8.02k
    int ii;
1360
1361
    /* Partially reset, so we parse additional objects on subsequent calls. */
1362
16.3k
    for (ii = tok->depth; ii >= 0; ii--)
1363
8.36k
      json_tokener_reset_level(tok, ii);
1364
8.02k
    return ret;
1365
8.02k
  }
1366
1367
2.35k
  MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err],
1368
2.35k
           tok->char_offset);
1369
2.35k
  return NULL;
1370
10.3k
}
1371
1372
static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes)
1373
0
{
1374
0
  unsigned char chr = c;
1375
0
  if (*nBytes == 0)
1376
0
  {
1377
0
    if (chr >= 0x80)
1378
0
    {
1379
0
      if ((chr & 0xe0) == 0xc0)
1380
0
        *nBytes = 1;
1381
0
      else if ((chr & 0xf0) == 0xe0)
1382
0
        *nBytes = 2;
1383
0
      else if ((chr & 0xf8) == 0xf0)
1384
0
        *nBytes = 3;
1385
0
      else
1386
0
        return 0;
1387
0
    }
1388
0
  }
1389
0
  else
1390
0
  {
1391
0
    if ((chr & 0xC0) != 0x80)
1392
0
      return 0;
1393
0
    (*nBytes)--;
1394
0
  }
1395
0
  return 1;
1396
0
}
1397
1398
void json_tokener_set_flags(struct json_tokener *tok, int flags)
1399
0
{
1400
0
  tok->flags = flags;
1401
0
}
1402
1403
size_t json_tokener_get_parse_end(struct json_tokener *tok)
1404
0
{
1405
0
  assert(tok->char_offset >= 0); /* Drop this line when char_offset becomes a size_t */
1406
0
  return (size_t)tok->char_offset;
1407
0
}
1408
1409
static int json_tokener_parse_double(const char *buf, int len, double *retval)
1410
3.61k
{
1411
3.61k
  char *end;
1412
3.61k
  *retval = strtod(buf, &end);
1413
3.61k
  if (buf + len == end)
1414
3.58k
    return 0; // It worked
1415
32
  return 1;
1416
3.61k
}