Coverage Report

Created: 2024-02-11 06:18

/src/libsass/src/json.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
  Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com)
3
  All rights reserved.
4
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
  of this software and associated documentation files (the "Software"), to deal
7
  in the Software without restriction, including without limitation the rights
8
  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
  copies of the Software, and to permit persons to whom the Software is
10
  furnished to do so, subject to the following conditions:
11
12
  The above copyright notice and this permission notice shall be included in
13
  all copies or substantial portions of the Software.
14
15
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
  THE SOFTWARE.
22
*/
23
24
#ifdef _MSC_VER
25
#define _CRT_SECURE_NO_WARNINGS
26
#define _CRT_NONSTDC_NO_DEPRECATE
27
#endif
28
29
#include "json.hpp"
30
31
// include utf8 library used by libsass
32
// ToDo: replace internal json utf8 code
33
#include "utf8.h"
34
35
#include <assert.h>
36
#include <stdint.h>
37
#include <stdio.h>
38
#include <stdlib.h>
39
#include <string.h>
40
41
#if defined(_MSC_VER) && _MSC_VER < 1900
42
#include <stdarg.h>
43
#ifdef snprintf
44
#undef snprintf
45
#endif
46
extern "C" int snprintf(char *, size_t, const char *, ...);
47
#endif
48
49
0
#define out_of_memory() do {                    \
50
0
    fprintf(stderr, "Out of memory.\n");    \
51
0
    exit(EXIT_FAILURE);                     \
52
0
  } while (0)
53
54
/* Sadly, strdup is not portable. */
55
static char *json_strdup(const char *str)
56
159
{
57
159
  char *ret = (char*) malloc(strlen(str) + 1);
58
159
  if (ret == NULL)
59
0
    out_of_memory();
60
159
  strcpy(ret, str);
61
159
  return ret;
62
159
}
63
64
/* String buffer */
65
66
typedef struct
67
{
68
  char *cur;
69
  char *end;
70
  char *start;
71
} SB;
72
73
static void sb_init(SB *sb)
74
19
{
75
19
  sb->start = (char*) malloc(17);
76
19
  if (sb->start == NULL)
77
0
    out_of_memory();
78
19
  sb->cur = sb->start;
79
19
  sb->end = sb->start + 16;
80
19
}
81
82
/* sb and need may be evaluated multiple times. */
83
5.12M
#define sb_need(sb, need) do {                  \
84
5.12M
    if ((sb)->end - (sb)->cur < (need))     \
85
5.12M
      sb_grow(sb, need);                  \
86
5.12M
  } while (0)
87
88
static void sb_grow(SB *sb, int need)
89
166
{
90
166
  size_t length = sb->cur - sb->start;
91
166
  size_t alloc = sb->end - sb->start;
92
93
166
  do {
94
166
    alloc *= 2;
95
166
  } while (alloc < length + need);
96
97
166
  sb->start = (char*) realloc(sb->start, alloc + 1);
98
166
  if (sb->start == NULL)
99
0
    out_of_memory();
100
166
  sb->cur = sb->start + length;
101
166
  sb->end = sb->start + alloc;
102
166
}
103
104
static void sb_put(SB *sb, const char *bytes, int count)
105
385
{
106
385
  sb_need(sb, count);
107
385
  memcpy(sb->cur, bytes, count);
108
385
  sb->cur += count;
109
385
}
110
111
19
#define sb_putc(sb, c) do {         \
112
19
    if ((sb)->cur >= (sb)->end) \
113
19
      sb_grow(sb, 1);         \
114
19
    *(sb)->cur++ = (c);         \
115
19
  } while (0)
116
117
static void sb_puts(SB *sb, const char *str)
118
385
{
119
385
  sb_put(sb, str, (int)strlen(str));
120
385
}
121
122
static char *sb_finish(SB *sb)
123
19
{
124
19
  *sb->cur = 0;
125
19
  assert(sb->start <= sb->cur && strlen(sb->start) == (size_t)(sb->cur - sb->start));
126
0
  return sb->start;
127
19
}
128
129
static void sb_free(SB *sb)
130
0
{
131
0
  free(sb->start);
132
0
}
133
134
/*
135
 * Unicode helper functions
136
 *
137
 * These are taken from the ccan/charset module and customized a bit.
138
 * Putting them here means the compiler can (choose to) inline them,
139
 * and it keeps ccan/json from having a dependency.
140
 *
141
 * We use uint32_t Type for Unicode codepoints.
142
 * We need our own because wchar_t might be 16 bits.
143
 */
144
145
/*
146
 * Validate a single UTF-8 character starting at @s.
147
 * The string must be null-terminated.
148
 *
149
 * If it's valid, return its length (1 thru 4).
150
 * If it's invalid or clipped, return 0.
151
 *
152
 * This function implements the syntax given in RFC3629, which is
153
 * the same as that given in The Unicode Standard, Version 6.0.
154
 *
155
 * It has the following properties:
156
 *
157
 *  * All codepoints U+0000..U+10FFFF may be encoded,
158
 *    except for U+D800..U+DFFF, which are reserved
159
 *    for UTF-16 surrogate pair encoding.
160
 *  * UTF-8 byte sequences longer than 4 bytes are not permitted,
161
 *    as they exceed the range of Unicode.
162
 *  * The sixty-six Unicode "non-characters" are permitted
163
 *    (namely, U+FDD0..U+FDEF, U+xxFFFE, and U+xxFFFF).
164
 */
165
static int utf8_validate_cz(const char *s)
166
15.1M
{
167
15.1M
  unsigned char c = *s++;
168
169
15.1M
  if (c <= 0x7F) {        /* 00..7F */
170
15.1M
    return 1;
171
15.1M
  } else if (c <= 0xC1) { /* 80..C1 */
172
    /* Disallow overlong 2-byte sequence. */
173
0
    return 0;
174
24
  } else if (c <= 0xDF) { /* C2..DF */
175
    /* Make sure subsequent byte is in the range 0x80..0xBF. */
176
0
    if (((unsigned char)*s++ & 0xC0) != 0x80)
177
0
      return 0;
178
179
0
    return 2;
180
24
  } else if (c <= 0xEF) { /* E0..EF */
181
    /* Disallow overlong 3-byte sequence. */
182
24
    if (c == 0xE0 && (unsigned char)*s < 0xA0)
183
0
      return 0;
184
185
    /* Disallow U+D800..U+DFFF. */
186
24
    if (c == 0xED && (unsigned char)*s > 0x9F)
187
0
      return 0;
188
189
    /* Make sure subsequent bytes are in the range 0x80..0xBF. */
190
24
    if (((unsigned char)*s++ & 0xC0) != 0x80)
191
0
      return 0;
192
24
    if (((unsigned char)*s++ & 0xC0) != 0x80)
193
0
      return 0;
194
195
24
    return 3;
196
24
  } else if (c <= 0xF4) { /* F0..F4 */
197
    /* Disallow overlong 4-byte sequence. */
198
0
    if (c == 0xF0 && (unsigned char)*s < 0x90)
199
0
      return 0;
200
201
    /* Disallow codepoints beyond U+10FFFF. */
202
0
    if (c == 0xF4 && (unsigned char)*s > 0x8F)
203
0
      return 0;
204
205
    /* Make sure subsequent bytes are in the range 0x80..0xBF. */
206
0
    if (((unsigned char)*s++ & 0xC0) != 0x80)
207
0
      return 0;
208
0
    if (((unsigned char)*s++ & 0xC0) != 0x80)
209
0
      return 0;
210
0
    if (((unsigned char)*s++ & 0xC0) != 0x80)
211
0
      return 0;
212
213
0
    return 4;
214
0
  } else {                /* F5..FF */
215
0
    return 0;
216
0
  }
217
15.1M
}
218
219
/* Validate a null-terminated UTF-8 string. */
220
static bool utf8_validate(const char *s)
221
318
{
222
318
  int len;
223
224
10.2M
  for (; *s != 0; s += len) {
225
10.2M
    len = utf8_validate_cz(s);
226
10.2M
    if (len == 0)
227
0
      return false;
228
10.2M
  }
229
230
318
  return true;
231
318
}
232
233
/*
234
 * Read a single UTF-8 character starting at @s,
235
 * returning the length, in bytes, of the character read.
236
 *
237
 * This function assumes input is valid UTF-8,
238
 * and that there are enough characters in front of @s.
239
 */
240
static int utf8_read_char(const char *s, uint32_t *out)
241
1
{
242
1
  const unsigned char *c = (const unsigned char*) s;
243
244
1
  assert(utf8_validate_cz(s));
245
246
1
  if (c[0] <= 0x7F) {
247
    /* 00..7F */
248
1
    *out = c[0];
249
1
    return 1;
250
1
  } else if (c[0] <= 0xDF) {
251
    /* C2..DF (unless input is invalid) */
252
0
    *out = ((uint32_t)c[0] & 0x1F) << 6 |
253
0
           ((uint32_t)c[1] & 0x3F);
254
0
    return 2;
255
0
  } else if (c[0] <= 0xEF) {
256
    /* E0..EF */
257
0
    *out = ((uint32_t)c[0] &  0xF) << 12 |
258
0
           ((uint32_t)c[1] & 0x3F) << 6  |
259
0
           ((uint32_t)c[2] & 0x3F);
260
0
    return 3;
261
0
  } else {
262
    /* F0..F4 (unless input is invalid) */
263
0
    *out = ((uint32_t)c[0] &  0x7) << 18 |
264
0
           ((uint32_t)c[1] & 0x3F) << 12 |
265
0
           ((uint32_t)c[2] & 0x3F) << 6  |
266
0
           ((uint32_t)c[3] & 0x3F);
267
0
    return 4;
268
0
  }
269
1
}
270
271
/*
272
 * Write a single UTF-8 character to @s,
273
 * returning the length, in bytes, of the character written.
274
 *
275
 * @unicode must be U+0000..U+10FFFF, but not U+D800..U+DFFF.
276
 *
277
 * This function will write up to 4 bytes to @out.
278
 */
279
static int utf8_write_char(uint32_t unicode, char *out)
280
0
{
281
0
  unsigned char *o = (unsigned char*) out;
282
283
0
  assert(unicode <= 0x10FFFF && !(unicode >= 0xD800 && unicode <= 0xDFFF));
284
285
0
  if (unicode <= 0x7F) {
286
    /* U+0000..U+007F */
287
0
    *o++ = unicode;
288
0
    return 1;
289
0
  } else if (unicode <= 0x7FF) {
290
    /* U+0080..U+07FF */
291
0
    *o++ = 0xC0 | unicode >> 6;
292
0
    *o++ = 0x80 | (unicode & 0x3F);
293
0
    return 2;
294
0
  } else if (unicode <= 0xFFFF) {
295
    /* U+0800..U+FFFF */
296
0
    *o++ = 0xE0 | unicode >> 12;
297
0
    *o++ = 0x80 | (unicode >> 6 & 0x3F);
298
0
    *o++ = 0x80 | (unicode & 0x3F);
299
0
    return 3;
300
0
  } else {
301
    /* U+10000..U+10FFFF */
302
0
    *o++ = 0xF0 | unicode >> 18;
303
0
    *o++ = 0x80 | (unicode >> 12 & 0x3F);
304
0
    *o++ = 0x80 | (unicode >> 6 & 0x3F);
305
0
    *o++ = 0x80 | (unicode & 0x3F);
306
0
    return 4;
307
0
  }
308
0
}
309
310
/*
311
 * Compute the Unicode codepoint of a UTF-16 surrogate pair.
312
 *
313
 * @uc should be 0xD800..0xDBFF, and @lc should be 0xDC00..0xDFFF.
314
 * If they aren't, this function returns false.
315
 */
316
static bool from_surrogate_pair(uint16_t uc, uint16_t lc, uint32_t *unicode)
317
0
{
318
0
  if (uc >= 0xD800 && uc <= 0xDBFF && lc >= 0xDC00 && lc <= 0xDFFF) {
319
0
    *unicode = 0x10000 + ((((uint32_t)uc & 0x3FF) << 10) | (lc & 0x3FF));
320
0
    return true;
321
0
  } else {
322
0
    return false;
323
0
  }
324
0
}
325
326
/*
327
 * Construct a UTF-16 surrogate pair given a Unicode codepoint.
328
 *
329
 * @unicode must be U+10000..U+10FFFF.
330
 */
331
static void to_surrogate_pair(uint32_t unicode, uint16_t *uc, uint16_t *lc)
332
0
{
333
0
  uint32_t n;
334
335
0
  assert(unicode >= 0x10000 && unicode <= 0x10FFFF);
336
337
0
  n = unicode - 0x10000;
338
0
  *uc = ((n >> 10) & 0x3FF) | 0xD800;
339
0
  *lc = (n & 0x3FF) | 0xDC00;
340
0
}
341
342
static bool is_space        (const char *c);
343
static bool is_digit        (const char *c);
344
static bool parse_value     (const char **sp, JsonNode        **out);
345
static bool parse_string    (const char **sp, char            **out);
346
static bool parse_number    (const char **sp, double           *out);
347
static bool parse_array     (const char **sp, JsonNode        **out);
348
static bool parse_object    (const char **sp, JsonNode        **out);
349
static bool parse_hex16     (const char **sp, uint16_t         *out);
350
351
static bool expect_literal  (const char **sp, const char *str);
352
static void skip_space      (const char **sp);
353
354
static void emit_value              (SB *out, const JsonNode *node);
355
static void emit_value_indented     (SB *out, const JsonNode *node, const char *space, int indent_level);
356
static void emit_string             (SB *out, const char *str);
357
static void emit_number             (SB *out, double num);
358
static void emit_array              (SB *out, const JsonNode *array);
359
static void emit_array_indented     (SB *out, const JsonNode *array, const char *space, int indent_level);
360
static void emit_object             (SB *out, const JsonNode *object);
361
static void emit_object_indented    (SB *out, const JsonNode *object, const char *space, int indent_level);
362
363
static int write_hex16(char *out, uint16_t val);
364
365
static JsonNode *mknode(JsonTag tag);
366
static void append_node(JsonNode *parent, JsonNode *child);
367
static void prepend_node(JsonNode *parent, JsonNode *child);
368
static void append_member(JsonNode *object, char *key, JsonNode *value);
369
370
/* Assertion-friendly validity checks */
371
static bool tag_is_valid(unsigned int tag);
372
static bool number_is_valid(const char *num);
373
374
JsonNode *json_decode(const char *json)
375
0
{
376
0
  const char *s = json;
377
0
  JsonNode *ret;
378
379
0
  skip_space(&s);
380
0
  if (!parse_value(&s, &ret))
381
0
    return NULL;
382
383
0
  skip_space(&s);
384
0
  if (*s != 0) {
385
0
    json_delete(ret);
386
0
    return NULL;
387
0
  }
388
389
0
  return ret;
390
0
}
391
392
char *json_encode(const JsonNode *node)
393
0
{
394
0
  return json_stringify(node, NULL);
395
0
}
396
397
char *json_encode_string(const char *str)
398
0
{
399
0
  SB sb;
400
0
  sb_init(&sb);
401
402
0
  try {
403
0
    emit_string(&sb, str);
404
0
  }
405
0
  catch (std::exception&) {
406
0
    sb_free(&sb);
407
0
    throw;
408
0
  }
409
410
0
  return sb_finish(&sb);
411
0
}
412
413
char *json_stringify(const JsonNode *node, const char *space)
414
19
{
415
19
  SB sb;
416
19
  sb_init(&sb);
417
418
19
  try {
419
19
    if (space != NULL)
420
19
      emit_value_indented(&sb, node, space, 0);
421
0
    else
422
0
      emit_value(&sb, node);
423
19
  }
424
19
  catch (std::exception&) {
425
0
    sb_free(&sb);
426
0
    throw;
427
0
  }
428
429
19
  return sb_finish(&sb);
430
19
}
431
432
void json_delete(JsonNode *node)
433
124
{
434
124
  if (node != NULL) {
435
124
    json_remove_from_parent(node);
436
437
124
    switch (node->tag) {
438
54
      case JSON_STRING:
439
54
        free(node->string_);
440
54
        break;
441
0
      case JSON_ARRAY:
442
19
      case JSON_OBJECT:
443
19
      {
444
19
        JsonNode *child, *next;
445
124
        for (child = node->children.head; child != NULL; child = next) {
446
105
          next = child->next;
447
105
          json_delete(child);
448
105
        }
449
19
        break;
450
0
      }
451
51
      default:;
452
124
    }
453
454
124
    free(node);
455
124
  }
456
124
}
457
458
bool json_validate(const char *json)
459
0
{
460
0
  const char *s = json;
461
462
0
  skip_space(&s);
463
0
  if (!parse_value(&s, NULL))
464
0
    return false;
465
466
0
  skip_space(&s);
467
0
  if (*s != 0)
468
0
    return false;
469
470
0
  return true;
471
0
}
472
473
JsonNode *json_find_element(JsonNode *array, int index)
474
0
{
475
0
  JsonNode *element;
476
0
  int i = 0;
477
478
0
  if (array == NULL || array->tag != JSON_ARRAY)
479
0
    return NULL;
480
481
0
  json_foreach(element, array) {
482
0
    if (i == index)
483
0
      return element;
484
0
    i++;
485
0
  }
486
487
0
  return NULL;
488
0
}
489
490
JsonNode *json_find_member(JsonNode *object, const char *name)
491
0
{
492
0
  JsonNode *member;
493
494
0
  if (object == NULL || object->tag != JSON_OBJECT)
495
0
    return NULL;
496
497
0
  json_foreach(member, object)
498
0
    if (strcmp(member->key, name) == 0)
499
0
      return member;
500
501
0
  return NULL;
502
0
}
503
504
JsonNode *json_first_child(const JsonNode *node)
505
0
{
506
0
  if (node != NULL && (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT))
507
0
    return node->children.head;
508
0
  return NULL;
509
0
}
510
511
static JsonNode *mknode(JsonTag tag)
512
124
{
513
124
  JsonNode *ret = (JsonNode*) calloc(1, sizeof(JsonNode));
514
124
  if (ret == NULL)
515
0
    out_of_memory();
516
124
  ret->tag = tag;
517
124
  return ret;
518
124
}
519
520
JsonNode *json_mknull(void)
521
0
{
522
0
  return mknode(JSON_NULL);
523
0
}
524
525
JsonNode *json_mkbool(bool b)
526
0
{
527
0
  JsonNode *ret = mknode(JSON_BOOL);
528
0
  ret->bool_ = b;
529
0
  return ret;
530
0
}
531
532
static JsonNode *mkstring(char *s)
533
54
{
534
54
  JsonNode *ret = mknode(JSON_STRING);
535
54
  ret->string_ = s;
536
54
  return ret;
537
54
}
538
539
JsonNode *json_mkstring(const char *s)
540
54
{
541
54
  return mkstring(json_strdup(s));
542
54
}
543
544
JsonNode *json_mknumber(double n)
545
51
{
546
51
  JsonNode *node = mknode(JSON_NUMBER);
547
51
  node->number_ = n;
548
51
  return node;
549
51
}
550
551
JsonNode *json_mkarray(void)
552
0
{
553
0
  return mknode(JSON_ARRAY);
554
0
}
555
556
JsonNode *json_mkobject(void)
557
19
{
558
19
  return mknode(JSON_OBJECT);
559
19
}
560
561
static void append_node(JsonNode *parent, JsonNode *child)
562
105
{
563
105
  if (child != NULL && parent != NULL) {
564
105
      child->parent = parent;
565
105
      child->prev = parent->children.tail;
566
105
      child->next = NULL;
567
568
105
      if (parent->children.tail != NULL)
569
86
          parent->children.tail->next = child;
570
19
      else
571
19
          parent->children.head = child;
572
105
      parent->children.tail = child;
573
105
  }
574
105
}
575
576
static void prepend_node(JsonNode *parent, JsonNode *child)
577
0
{
578
0
  if (child != NULL && parent != NULL) {
579
0
      child->parent = parent;
580
0
      child->prev = NULL;
581
0
      child->next = parent->children.head;
582
583
0
      if (parent->children.head != NULL)
584
0
          parent->children.head->prev = child;
585
0
      else
586
0
          parent->children.tail = child;
587
0
      parent->children.head = child;
588
0
  }
589
0
}
590
591
static void append_member(JsonNode *object, char *key, JsonNode *value)
592
105
{
593
105
  if (value != NULL && object != NULL) {
594
105
      value->key = key;
595
105
      append_node(object, value);
596
105
  }
597
105
}
598
599
void json_append_element(JsonNode *array, JsonNode *element)
600
0
{
601
0
  if (array != NULL && element !=NULL) {
602
0
      assert(array->tag == JSON_ARRAY);
603
0
      assert(element->parent == NULL);
604
605
0
      append_node(array, element);
606
0
  }
607
0
}
608
609
void json_prepend_element(JsonNode *array, JsonNode *element)
610
0
{
611
0
  assert(array->tag == JSON_ARRAY);
612
0
  assert(element->parent == NULL);
613
614
0
  prepend_node(array, element);
615
0
}
616
617
void json_append_member(JsonNode *object, const char *key, JsonNode *value)
618
105
{
619
105
  if (object != NULL && key != NULL && value != NULL) {
620
105
      assert(object->tag == JSON_OBJECT);
621
0
      assert(value->parent == NULL);
622
623
0
      append_member(object, json_strdup(key), value);
624
105
  }
625
105
}
626
627
void json_prepend_member(JsonNode *object, const char *key, JsonNode *value)
628
0
{
629
0
  if (object != NULL && key != NULL && value != NULL) {
630
0
      assert(object->tag == JSON_OBJECT);
631
0
      assert(value->parent == NULL);
632
633
0
      value->key = json_strdup(key);
634
0
      prepend_node(object, value);
635
0
  }
636
0
}
637
638
void json_remove_from_parent(JsonNode *node)
639
124
{
640
124
  if (node != NULL) {
641
124
      JsonNode *parent = node->parent;
642
643
124
      if (parent != NULL) {
644
105
          if (node->prev != NULL)
645
0
              node->prev->next = node->next;
646
105
          else
647
105
              parent->children.head = node->next;
648
649
105
          if (node->next != NULL)
650
86
              node->next->prev = node->prev;
651
19
          else
652
19
              parent->children.tail = node->prev;
653
654
105
          free(node->key);
655
656
105
          node->parent = NULL;
657
105
          node->prev = node->next = NULL;
658
105
          node->key = NULL;
659
105
      }
660
124
  }
661
124
}
662
663
static bool parse_value(const char **sp, JsonNode **out)
664
0
{
665
0
  const char *s = *sp;
666
667
0
  switch (*s) {
668
0
    case 'n':
669
0
      if (expect_literal(&s, "null")) {
670
0
        if (out)
671
0
          *out = json_mknull();
672
0
        *sp = s;
673
0
        return true;
674
0
      }
675
0
      return false;
676
677
0
    case 'f':
678
0
      if (expect_literal(&s, "false")) {
679
0
        if (out)
680
0
          *out = json_mkbool(false);
681
0
        *sp = s;
682
0
        return true;
683
0
      }
684
0
      return false;
685
686
0
    case 't':
687
0
      if (expect_literal(&s, "true")) {
688
0
        if (out)
689
0
          *out = json_mkbool(true);
690
0
        *sp = s;
691
0
        return true;
692
0
      }
693
0
      return false;
694
695
0
    case '"': {
696
0
      char *str = NULL;
697
0
      if (parse_string(&s, out ? &str : NULL)) {
698
0
        if (out)
699
0
          *out = mkstring(str);
700
0
        *sp = s;
701
0
        return true;
702
0
      }
703
0
      return false;
704
0
    }
705
706
0
    case '[':
707
0
      if (parse_array(&s, out)) {
708
0
        *sp = s;
709
0
        return true;
710
0
      }
711
0
      return false;
712
713
0
    case '{':
714
0
      if (parse_object(&s, out)) {
715
0
        *sp = s;
716
0
        return true;
717
0
      }
718
0
      return false;
719
720
0
    default: {
721
0
      double num;
722
0
      if (parse_number(&s, out ? &num : NULL)) {
723
0
        if (out)
724
0
          *out = json_mknumber(num);
725
0
        *sp = s;
726
0
        return true;
727
0
      }
728
0
      return false;
729
0
    }
730
0
  }
731
0
}
732
733
static bool parse_array(const char **sp, JsonNode **out)
734
0
{
735
0
  const char *s = *sp;
736
0
  JsonNode *ret = out ? json_mkarray() : NULL;
737
0
  JsonNode *element = NULL;
738
739
0
  if (*s++ != '[')
740
0
    goto failure;
741
0
  skip_space(&s);
742
743
0
  if (*s == ']') {
744
0
    s++;
745
0
    goto success;
746
0
  }
747
748
0
  for (;;) {
749
0
    if (!parse_value(&s, out ? &element : NULL))
750
0
      goto failure;
751
0
    skip_space(&s);
752
753
0
    if (out)
754
0
      json_append_element(ret, element);
755
756
0
    if (*s == ']') {
757
0
      s++;
758
0
      goto success;
759
0
    }
760
761
0
    if (*s++ != ',')
762
0
      goto failure;
763
0
    skip_space(&s);
764
0
  }
765
766
0
success:
767
0
  *sp = s;
768
0
  if (out)
769
0
    *out = ret;
770
0
  return true;
771
772
0
failure:
773
0
  json_delete(ret);
774
0
  return false;
775
0
}
776
777
static bool parse_object(const char **sp, JsonNode **out)
778
0
{
779
0
  const char *s = *sp;
780
0
  JsonNode *ret = out ? json_mkobject() : NULL;
781
0
  char *key = NULL;
782
0
  JsonNode *value = NULL;
783
784
0
  if (*s++ != '{')
785
0
    goto failure;
786
0
  skip_space(&s);
787
788
0
  if (*s == '}') {
789
0
    s++;
790
0
    goto success;
791
0
  }
792
793
0
  for (;;) {
794
0
    if (!parse_string(&s, out ? &key : NULL))
795
0
      goto failure;
796
0
    skip_space(&s);
797
798
0
    if (*s++ != ':')
799
0
      goto failure_free_key;
800
0
    skip_space(&s);
801
802
0
    if (!parse_value(&s, out ? &value : NULL))
803
0
      goto failure_free_key;
804
0
    skip_space(&s);
805
806
0
    if (out)
807
0
      append_member(ret, key, value);
808
809
0
    if (*s == '}') {
810
0
      s++;
811
0
      goto success;
812
0
    }
813
814
0
    if (*s++ != ',')
815
0
      goto failure;
816
0
    skip_space(&s);
817
0
  }
818
819
0
success:
820
0
  *sp = s;
821
0
  if (out)
822
0
    *out = ret;
823
0
  return true;
824
825
0
failure_free_key:
826
0
  if (out)
827
0
    free(key);
828
0
failure:
829
0
  json_delete(ret);
830
0
  return false;
831
0
}
832
833
bool parse_string(const char **sp, char **out)
834
0
{
835
0
  const char *s = *sp;
836
0
  SB sb = { 0, 0, 0 };
837
0
  char throwaway_buffer[4];
838
    /* enough space for a UTF-8 character */
839
0
  char *b;
840
841
0
  if (*s++ != '"')
842
0
    return false;
843
844
0
  if (out) {
845
0
    sb_init(&sb);
846
0
    sb_need(&sb, 4);
847
0
    b = sb.cur;
848
0
  } else {
849
0
    b = throwaway_buffer;
850
0
  }
851
852
0
  while (*s != '"') {
853
0
    unsigned char c = *s++;
854
855
    /* Parse next character, and write it to b. */
856
0
    if (c == '\\') {
857
0
      c = *s++;
858
0
      switch (c) {
859
0
        case '"':
860
0
        case '\\':
861
0
        case '/':
862
0
          *b++ = c;
863
0
          break;
864
0
        case 'b':
865
0
          *b++ = '\b';
866
0
          break;
867
0
        case 'f':
868
0
          *b++ = '\f';
869
0
          break;
870
0
        case 'n':
871
0
          *b++ = '\n';
872
0
          break;
873
0
        case 'r':
874
0
          *b++ = '\r';
875
0
          break;
876
0
        case 't':
877
0
          *b++ = '\t';
878
0
          break;
879
0
        case 'u':
880
0
        {
881
0
          uint16_t uc, lc;
882
0
          uint32_t unicode;
883
884
0
          if (!parse_hex16(&s, &uc))
885
0
            goto failed;
886
887
0
          if (uc >= 0xD800 && uc <= 0xDFFF) {
888
            /* Handle UTF-16 surrogate pair. */
889
0
            if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc))
890
0
              goto failed; /* Incomplete surrogate pair. */
891
0
            if (!from_surrogate_pair(uc, lc, &unicode))
892
0
              goto failed; /* Invalid surrogate pair. */
893
0
          } else if (uc == 0) {
894
            /* Disallow "\u0000". */
895
0
            goto failed;
896
0
          } else {
897
0
            unicode = uc;
898
0
          }
899
900
0
          b += utf8_write_char(unicode, b);
901
0
          break;
902
0
        }
903
0
        default:
904
          /* Invalid escape */
905
0
          goto failed;
906
0
      }
907
0
    } else if (c <= 0x1F) {
908
      /* Control characters are not allowed in string literals. */
909
0
      goto failed;
910
0
    } else {
911
      /* Validate and echo a UTF-8 character. */
912
0
      int len;
913
914
0
      s--;
915
0
      len = utf8_validate_cz(s);
916
0
      if (len == 0)
917
0
        goto failed; /* Invalid UTF-8 character. */
918
919
0
      while (len--)
920
0
        *b++ = *s++;
921
0
    }
922
923
    /*
924
     * Update sb to know about the new bytes,
925
     * and set up b to write another character.
926
     */
927
0
    if (out) {
928
0
      sb.cur = b;
929
0
      sb_need(&sb, 4);
930
0
      b = sb.cur;
931
0
    } else {
932
0
      b = throwaway_buffer;
933
0
    }
934
0
  }
935
0
  s++;
936
937
0
  if (out)
938
0
    *out = sb_finish(&sb);
939
0
  *sp = s;
940
0
  return true;
941
942
0
failed:
943
0
  if (out)
944
0
    sb_free(&sb);
945
0
  return false;
946
0
}
947
948
0
bool is_space(const char *c) {
949
0
  return ((*c) == '\t' || (*c) == '\n' || (*c) == '\r' || (*c) == ' ');
950
0
}
951
952
177
bool is_digit(const char *c){
953
177
  return ((*c) >= '0' && (*c) <= '9');
954
177
}
955
956
/*
957
 * The JSON spec says that a number shall follow this precise pattern
958
 * (spaces and quotes added for readability):
959
 *   '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)?
960
 *
961
 * However, some JSON parsers are more liberal.  For instance, PHP accepts
962
 * '.5' and '1.'.  JSON.parse accepts '+3'.
963
 *
964
 * This function takes the strict approach.
965
 */
966
bool parse_number(const char **sp, double *out)
967
51
{
968
51
  const char *s = *sp;
969
970
  /* '-'? */
971
51
  if (*s == '-')
972
0
    s++;
973
974
  /* (0 | [1-9][0-9]*) */
975
51
  if (*s == '0') {
976
0
    s++;
977
51
  } else {
978
51
    if (!is_digit(s))
979
0
      return false;
980
126
    do {
981
126
      s++;
982
126
    } while (is_digit(s));
983
51
  }
984
985
  /* ('.' [0-9]+)? */
986
51
  if (*s == '.') {
987
0
    s++;
988
0
    if (!is_digit(s))
989
0
      return false;
990
0
    do {
991
0
      s++;
992
0
    } while (is_digit(s));
993
0
  }
994
995
  /* ([Ee] [+-]? [0-9]+)? */
996
51
  if (*s == 'E' || *s == 'e') {
997
0
    s++;
998
0
    if (*s == '+' || *s == '-')
999
0
      s++;
1000
0
    if (!is_digit(s))
1001
0
      return false;
1002
0
    do {
1003
0
      s++;
1004
0
    } while (is_digit(s));
1005
0
  }
1006
1007
51
  if (out)
1008
0
    *out = strtod(*sp, NULL);
1009
1010
51
  *sp = s;
1011
51
  return true;
1012
51
}
1013
1014
static void skip_space(const char **sp)
1015
0
{
1016
0
  const char *s = *sp;
1017
0
  while (is_space(s))
1018
0
    s++;
1019
0
  *sp = s;
1020
0
}
1021
1022
static void emit_value(SB *out, const JsonNode *node)
1023
0
{
1024
0
  assert(tag_is_valid(node->tag));
1025
0
  switch (node->tag) {
1026
0
    case JSON_NULL:
1027
0
      sb_puts(out, "null");
1028
0
      break;
1029
0
    case JSON_BOOL:
1030
0
      sb_puts(out, node->bool_ ? "true" : "false");
1031
0
      break;
1032
0
    case JSON_STRING:
1033
0
      emit_string(out, node->string_);
1034
0
      break;
1035
0
    case JSON_NUMBER:
1036
0
      emit_number(out, node->number_);
1037
0
      break;
1038
0
    case JSON_ARRAY:
1039
0
      emit_array(out, node);
1040
0
      break;
1041
0
    case JSON_OBJECT:
1042
0
      emit_object(out, node);
1043
0
      break;
1044
0
    default:
1045
0
      assert(false);
1046
0
  }
1047
0
}
1048
1049
void emit_value_indented(SB *out, const JsonNode *node, const char *space, int indent_level)
1050
124
{
1051
124
  assert(tag_is_valid(node->tag));
1052
0
  switch (node->tag) {
1053
0
    case JSON_NULL:
1054
0
      sb_puts(out, "null");
1055
0
      break;
1056
0
    case JSON_BOOL:
1057
0
      sb_puts(out, node->bool_ ? "true" : "false");
1058
0
      break;
1059
54
    case JSON_STRING:
1060
54
      emit_string(out, node->string_);
1061
54
      break;
1062
51
    case JSON_NUMBER:
1063
51
      emit_number(out, node->number_);
1064
51
      break;
1065
0
    case JSON_ARRAY:
1066
0
      emit_array_indented(out, node, space, indent_level);
1067
0
      break;
1068
19
    case JSON_OBJECT:
1069
19
      emit_object_indented(out, node, space, indent_level);
1070
19
      break;
1071
0
    default:
1072
0
      assert(false);
1073
124
  }
1074
124
}
1075
1076
static void emit_array(SB *out, const JsonNode *array)
1077
0
{
1078
0
  const JsonNode *element;
1079
1080
0
  sb_putc(out, '[');
1081
0
  json_foreach(element, array) {
1082
0
    emit_value(out, element);
1083
0
    if (element->next != NULL)
1084
0
      sb_putc(out, ',');
1085
0
  }
1086
0
  sb_putc(out, ']');
1087
0
}
1088
1089
static void emit_array_indented(SB *out, const JsonNode *array, const char *space, int indent_level)
1090
0
{
1091
0
  const JsonNode *element = array->children.head;
1092
0
  int i;
1093
1094
0
  if (element == NULL) {
1095
0
    sb_puts(out, "[]");
1096
0
    return;
1097
0
  }
1098
1099
0
  sb_puts(out, "[\n");
1100
0
  while (element != NULL) {
1101
0
    for (i = 0; i < indent_level + 1; i++)
1102
0
      sb_puts(out, space);
1103
0
    emit_value_indented(out, element, space, indent_level + 1);
1104
1105
0
    element = element->next;
1106
0
    sb_puts(out, element != NULL ? ",\n" : "\n");
1107
0
  }
1108
0
  for (i = 0; i < indent_level; i++)
1109
0
    sb_puts(out, space);
1110
0
  sb_putc(out, ']');
1111
0
}
1112
1113
static void emit_object(SB *out, const JsonNode *object)
1114
0
{
1115
0
  const JsonNode *member;
1116
1117
0
  sb_putc(out, '{');
1118
0
  json_foreach(member, object) {
1119
0
    emit_string(out, member->key);
1120
0
    sb_putc(out, ':');
1121
0
    emit_value(out, member);
1122
0
    if (member->next != NULL)
1123
0
      sb_putc(out, ',');
1124
0
  }
1125
0
  sb_putc(out, '}');
1126
0
}
1127
1128
static void emit_object_indented(SB *out, const JsonNode *object, const char *space, int indent_level)
1129
19
{
1130
19
  const JsonNode *member = object->children.head;
1131
19
  int i;
1132
1133
19
  if (member == NULL) {
1134
0
    sb_puts(out, "{}");
1135
0
    return;
1136
0
  }
1137
1138
19
  sb_puts(out, "{\n");
1139
124
  while (member != NULL) {
1140
210
    for (i = 0; i < indent_level + 1; i++)
1141
105
      sb_puts(out, space);
1142
105
    emit_string(out, member->key);
1143
105
    sb_puts(out, ": ");
1144
105
    emit_value_indented(out, member, space, indent_level + 1);
1145
1146
105
    member = member->next;
1147
105
    sb_puts(out, member != NULL ? ",\n" : "\n");
1148
105
  }
1149
19
  for (i = 0; i < indent_level; i++)
1150
0
    sb_puts(out, space);
1151
19
  sb_putc(out, '}');
1152
19
}
1153
1154
void emit_string(SB *out, const char *str)
1155
159
{
1156
159
  bool escape_unicode = false;
1157
159
  const char *s = str;
1158
159
  char *b;
1159
1160
// make assertion catchable
1161
159
#ifndef NDEBUG
1162
159
  if (!utf8_validate(str)) {
1163
0
    throw utf8::invalid_utf8(0);
1164
0
  }
1165
159
#endif
1166
1167
159
  assert(utf8_validate(str));
1168
1169
  /*
1170
   * 14 bytes is enough space to write up to two
1171
   * \uXXXX escapes and two quotation marks.
1172
   */
1173
159
  sb_need(out, 14);
1174
159
  b = out->cur;
1175
1176
159
  *b++ = '"';
1177
5.12M
  while (*s != 0) {
1178
5.12M
    unsigned char c = *s++;
1179
1180
    /* Encode the next character, and write it to b. */
1181
5.12M
    switch (c) {
1182
148
      case '"':
1183
148
        *b++ = '\\';
1184
148
        *b++ = '"';
1185
148
        break;
1186
246k
      case '\\':
1187
246k
        *b++ = '\\';
1188
246k
        *b++ = '\\';
1189
246k
        break;
1190
0
      case '\b':
1191
0
        *b++ = '\\';
1192
0
        *b++ = 'b';
1193
0
        break;
1194
9
      case '\f':
1195
9
        *b++ = '\\';
1196
9
        *b++ = 'f';
1197
9
        break;
1198
68
      case '\n':
1199
68
        *b++ = '\\';
1200
68
        *b++ = 'n';
1201
68
        break;
1202
4
      case '\r':
1203
4
        *b++ = '\\';
1204
4
        *b++ = 'r';
1205
4
        break;
1206
3
      case '\t':
1207
3
        *b++ = '\\';
1208
3
        *b++ = 't';
1209
3
        break;
1210
4.87M
      default: {
1211
4.87M
        int len;
1212
1213
4.87M
        s--;
1214
4.87M
        len = utf8_validate_cz(s);
1215
1216
4.87M
        if (len == 0) {
1217
          /*
1218
           * Handle invalid UTF-8 character gracefully in production
1219
           * by writing a replacement character (U+FFFD)
1220
           * and skipping a single byte.
1221
           *
1222
           * This should never happen when assertions are enabled
1223
           * due to the assertion at the beginning of this function.
1224
           */
1225
0
          assert(false);
1226
0
          if (escape_unicode) {
1227
0
            strcpy(b, "\\uFFFD");
1228
0
            b += 6;
1229
0
          } else {
1230
0
            *b++ = 0xEFu;
1231
0
            *b++ = 0xBFu;
1232
0
            *b++ = 0xBDu;
1233
0
          }
1234
0
          s++;
1235
4.87M
        } else if (c < 0x1F || (c >= 0x80 && escape_unicode)) {
1236
          /* Encode using \u.... */
1237
1
          uint32_t unicode;
1238
1239
1
          s += utf8_read_char(s, &unicode);
1240
1241
1
          if (unicode <= 0xFFFF) {
1242
1
            *b++ = '\\';
1243
1
            *b++ = 'u';
1244
1
            b += write_hex16(b, unicode);
1245
1
          } else {
1246
            /* Produce a surrogate pair. */
1247
0
            uint16_t uc, lc;
1248
0
            assert(unicode <= 0x10FFFF);
1249
0
            to_surrogate_pair(unicode, &uc, &lc);
1250
0
            *b++ = '\\';
1251
0
            *b++ = 'u';
1252
0
            b += write_hex16(b, uc);
1253
0
            *b++ = '\\';
1254
0
            *b++ = 'u';
1255
0
            b += write_hex16(b, lc);
1256
0
          }
1257
4.87M
        } else {
1258
          /* Write the character directly. */
1259
9.75M
          while (len--)
1260
4.87M
            *b++ = *s++;
1261
4.87M
        }
1262
1263
0
        break;
1264
0
      }
1265
5.12M
    }
1266
1267
    /*
1268
     * Update *out to know about the new bytes,
1269
     * and set up b to write another encoded character.
1270
     */
1271
5.12M
    out->cur = b;
1272
5.12M
    sb_need(out, 14);
1273
5.12M
    b = out->cur;
1274
5.12M
  }
1275
159
  *b++ = '"';
1276
1277
159
  out->cur = b;
1278
159
}
1279
1280
static void emit_number(SB *out, double num)
1281
51
{
1282
  /*
1283
   * This isn't exactly how JavaScript renders numbers,
1284
   * but it should produce valid JSON for reasonable numbers
1285
   * preserve precision well enough, and avoid some oddities
1286
   * like 0.3 -> 0.299999999999999988898 .
1287
   */
1288
51
  char buf[64];
1289
51
  sprintf(buf, "%.16g", num);
1290
1291
51
  if (number_is_valid(buf))
1292
51
    sb_puts(out, buf);
1293
0
  else
1294
0
    sb_puts(out, "null");
1295
51
}
1296
1297
static bool tag_is_valid(unsigned int tag)
1298
124
{
1299
124
  return (/* tag >= JSON_NULL && */ tag <= JSON_OBJECT);
1300
124
}
1301
1302
static bool number_is_valid(const char *num)
1303
51
{
1304
51
  return (parse_number(&num, NULL) && *num == '\0');
1305
51
}
1306
1307
static bool expect_literal(const char **sp, const char *str)
1308
0
{
1309
0
  const char *s = *sp;
1310
1311
0
  while (*str != '\0')
1312
0
    if (*s++ != *str++)
1313
0
      return false;
1314
1315
0
  *sp = s;
1316
0
  return true;
1317
0
}
1318
1319
/*
1320
 * Parses exactly 4 hex characters (capital or lowercase).
1321
 * Fails if any input chars are not [0-9A-Fa-f].
1322
 */
1323
static bool parse_hex16(const char **sp, uint16_t *out)
1324
0
{
1325
0
  const char *s = *sp;
1326
0
  uint16_t ret = 0;
1327
0
  uint16_t i;
1328
0
  uint16_t tmp;
1329
0
  char c;
1330
1331
0
  for (i = 0; i < 4; i++) {
1332
0
    c = *s++;
1333
0
    if (c >= '0' && c <= '9')
1334
0
      tmp = c - '0';
1335
0
    else if (c >= 'A' && c <= 'F')
1336
0
      tmp = c - 'A' + 10;
1337
0
    else if (c >= 'a' && c <= 'f')
1338
0
      tmp = c - 'a' + 10;
1339
0
    else
1340
0
      return false;
1341
1342
0
    ret <<= 4;
1343
0
    ret += tmp;
1344
0
  }
1345
1346
0
  if (out)
1347
0
    *out = ret;
1348
0
  *sp = s;
1349
0
  return true;
1350
0
}
1351
1352
/*
1353
 * Encodes a 16-bit number into hexadecimal,
1354
 * writing exactly 4 hex chars.
1355
 */
1356
static int write_hex16(char *out, uint16_t val)
1357
1
{
1358
1
  const char *hex = "0123456789ABCDEF";
1359
1360
1
  *out++ = hex[(val >> 12) & 0xF];
1361
1
  *out++ = hex[(val >> 8)  & 0xF];
1362
1
  *out++ = hex[(val >> 4)  & 0xF];
1363
1
  *out++ = hex[ val        & 0xF];
1364
1365
1
  return 4;
1366
1
}
1367
1368
bool json_check(const JsonNode *node, char errmsg[256])
1369
0
{
1370
0
  #define problem(...) do { \
1371
0
      if (errmsg != NULL) \
1372
0
        snprintf(errmsg, 256, __VA_ARGS__); \
1373
0
      return false; \
1374
0
    } while (0)
1375
1376
0
  if (node->key != NULL && !utf8_validate(node->key))
1377
0
    problem("key contains invalid UTF-8");
1378
1379
0
  if (!tag_is_valid(node->tag))
1380
0
    problem("tag is invalid (%u)", node->tag);
1381
1382
0
  if (node->tag == JSON_BOOL) {
1383
0
    if (node->bool_ != false && node->bool_ != true)
1384
0
      problem("bool_ is neither false (%d) nor true (%d)", (int)false, (int)true);
1385
0
  } else if (node->tag == JSON_STRING) {
1386
0
    if (node->string_ == NULL)
1387
0
      problem("string_ is NULL");
1388
0
    if (!utf8_validate(node->string_))
1389
0
      problem("string_ contains invalid UTF-8");
1390
0
  } else if (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT) {
1391
0
    JsonNode *head = node->children.head;
1392
0
    JsonNode *tail = node->children.tail;
1393
1394
0
    if (head == NULL || tail == NULL) {
1395
0
      if (head != NULL)
1396
0
        problem("tail is NULL, but head is not");
1397
0
      if (tail != NULL)
1398
0
        problem("head is NULL, but tail is not");
1399
0
    } else {
1400
0
      JsonNode *child;
1401
0
      JsonNode *last = NULL;
1402
1403
0
      if (head->prev != NULL)
1404
0
        problem("First child's prev pointer is not NULL");
1405
1406
0
      for (child = head; child != NULL; last = child, child = child->next) {
1407
0
        if (child == node)
1408
0
          problem("node is its own child");
1409
0
        if (child->next == child)
1410
0
          problem("child->next == child (cycle)");
1411
0
        if (child->next == head)
1412
0
          problem("child->next == head (cycle)");
1413
1414
0
        if (child->parent != node)
1415
0
          problem("child does not point back to parent");
1416
0
        if (child->next != NULL && child->next->prev != child)
1417
0
          problem("child->next does not point back to child");
1418
1419
0
        if (node->tag == JSON_ARRAY && child->key != NULL)
1420
0
          problem("Array element's key is not NULL");
1421
0
        if (node->tag == JSON_OBJECT && child->key == NULL)
1422
0
          problem("Object member's key is NULL");
1423
1424
0
        if (!json_check(child, errmsg))
1425
0
          return false;
1426
0
      }
1427
1428
0
      if (last != tail)
1429
0
        problem("tail does not match pointer found by starting at head and following next links");
1430
0
    }
1431
0
  }
1432
1433
0
  return true;
1434
1435
0
  #undef problem
1436
0
}