/src/libsass/src/json.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com) |
3 | | All rights reserved. |
4 | | |
5 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
6 | | of this software and associated documentation files (the "Software"), to deal |
7 | | in the Software without restriction, including without limitation the rights |
8 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
9 | | copies of the Software, and to permit persons to whom the Software is |
10 | | furnished to do so, subject to the following conditions: |
11 | | |
12 | | The above copyright notice and this permission notice shall be included in |
13 | | all copies or substantial portions of the Software. |
14 | | |
15 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
18 | | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
20 | | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
21 | | THE SOFTWARE. |
22 | | */ |
23 | | |
24 | | #ifdef _MSC_VER |
25 | | #define _CRT_SECURE_NO_WARNINGS |
26 | | #define _CRT_NONSTDC_NO_DEPRECATE |
27 | | #endif |
28 | | |
29 | | #include "json.hpp" |
30 | | |
31 | | // include utf8 library used by libsass |
32 | | // ToDo: replace internal json utf8 code |
33 | | #include "utf8.h" |
34 | | |
35 | | #include <assert.h> |
36 | | #include <stdint.h> |
37 | | #include <stdio.h> |
38 | | #include <stdlib.h> |
39 | | #include <string.h> |
40 | | |
41 | | #if defined(_MSC_VER) && _MSC_VER < 1900 |
42 | | #include <stdarg.h> |
43 | | #ifdef snprintf |
44 | | #undef snprintf |
45 | | #endif |
46 | | extern "C" int snprintf(char *, size_t, const char *, ...); |
47 | | #endif |
48 | | |
49 | 0 | #define out_of_memory() do { \ |
50 | 0 | fprintf(stderr, "Out of memory.\n"); \ |
51 | 0 | exit(EXIT_FAILURE); \ |
52 | 0 | } while (0) |
53 | | |
54 | | /* Sadly, strdup is not portable. */ |
55 | | static char *json_strdup(const char *str) |
56 | 159 | { |
57 | 159 | char *ret = (char*) malloc(strlen(str) + 1); |
58 | 159 | if (ret == NULL) |
59 | 0 | out_of_memory(); |
60 | 159 | strcpy(ret, str); |
61 | 159 | return ret; |
62 | 159 | } |
63 | | |
64 | | /* String buffer */ |
65 | | |
66 | | typedef struct |
67 | | { |
68 | | char *cur; |
69 | | char *end; |
70 | | char *start; |
71 | | } SB; |
72 | | |
73 | | static void sb_init(SB *sb) |
74 | 19 | { |
75 | 19 | sb->start = (char*) malloc(17); |
76 | 19 | if (sb->start == NULL) |
77 | 0 | out_of_memory(); |
78 | 19 | sb->cur = sb->start; |
79 | 19 | sb->end = sb->start + 16; |
80 | 19 | } |
81 | | |
82 | | /* sb and need may be evaluated multiple times. */ |
83 | 5.12M | #define sb_need(sb, need) do { \ |
84 | 5.12M | if ((sb)->end - (sb)->cur < (need)) \ |
85 | 5.12M | sb_grow(sb, need); \ |
86 | 5.12M | } while (0) |
87 | | |
88 | | static void sb_grow(SB *sb, int need) |
89 | 166 | { |
90 | 166 | size_t length = sb->cur - sb->start; |
91 | 166 | size_t alloc = sb->end - sb->start; |
92 | | |
93 | 166 | do { |
94 | 166 | alloc *= 2; |
95 | 166 | } while (alloc < length + need); |
96 | | |
97 | 166 | sb->start = (char*) realloc(sb->start, alloc + 1); |
98 | 166 | if (sb->start == NULL) |
99 | 0 | out_of_memory(); |
100 | 166 | sb->cur = sb->start + length; |
101 | 166 | sb->end = sb->start + alloc; |
102 | 166 | } |
103 | | |
104 | | static void sb_put(SB *sb, const char *bytes, int count) |
105 | 385 | { |
106 | 385 | sb_need(sb, count); |
107 | 385 | memcpy(sb->cur, bytes, count); |
108 | 385 | sb->cur += count; |
109 | 385 | } |
110 | | |
111 | 19 | #define sb_putc(sb, c) do { \ |
112 | 19 | if ((sb)->cur >= (sb)->end) \ |
113 | 19 | sb_grow(sb, 1); \ |
114 | 19 | *(sb)->cur++ = (c); \ |
115 | 19 | } while (0) |
116 | | |
117 | | static void sb_puts(SB *sb, const char *str) |
118 | 385 | { |
119 | 385 | sb_put(sb, str, (int)strlen(str)); |
120 | 385 | } |
121 | | |
122 | | static char *sb_finish(SB *sb) |
123 | 19 | { |
124 | 19 | *sb->cur = 0; |
125 | 19 | assert(sb->start <= sb->cur && strlen(sb->start) == (size_t)(sb->cur - sb->start)); |
126 | 0 | return sb->start; |
127 | 19 | } |
128 | | |
129 | | static void sb_free(SB *sb) |
130 | 0 | { |
131 | 0 | free(sb->start); |
132 | 0 | } |
133 | | |
134 | | /* |
135 | | * Unicode helper functions |
136 | | * |
137 | | * These are taken from the ccan/charset module and customized a bit. |
138 | | * Putting them here means the compiler can (choose to) inline them, |
139 | | * and it keeps ccan/json from having a dependency. |
140 | | * |
141 | | * We use uint32_t Type for Unicode codepoints. |
142 | | * We need our own because wchar_t might be 16 bits. |
143 | | */ |
144 | | |
145 | | /* |
146 | | * Validate a single UTF-8 character starting at @s. |
147 | | * The string must be null-terminated. |
148 | | * |
149 | | * If it's valid, return its length (1 thru 4). |
150 | | * If it's invalid or clipped, return 0. |
151 | | * |
152 | | * This function implements the syntax given in RFC3629, which is |
153 | | * the same as that given in The Unicode Standard, Version 6.0. |
154 | | * |
155 | | * It has the following properties: |
156 | | * |
157 | | * * All codepoints U+0000..U+10FFFF may be encoded, |
158 | | * except for U+D800..U+DFFF, which are reserved |
159 | | * for UTF-16 surrogate pair encoding. |
160 | | * * UTF-8 byte sequences longer than 4 bytes are not permitted, |
161 | | * as they exceed the range of Unicode. |
162 | | * * The sixty-six Unicode "non-characters" are permitted |
163 | | * (namely, U+FDD0..U+FDEF, U+xxFFFE, and U+xxFFFF). |
164 | | */ |
165 | | static int utf8_validate_cz(const char *s) |
166 | 15.1M | { |
167 | 15.1M | unsigned char c = *s++; |
168 | | |
169 | 15.1M | if (c <= 0x7F) { /* 00..7F */ |
170 | 15.1M | return 1; |
171 | 15.1M | } else if (c <= 0xC1) { /* 80..C1 */ |
172 | | /* Disallow overlong 2-byte sequence. */ |
173 | 0 | return 0; |
174 | 24 | } else if (c <= 0xDF) { /* C2..DF */ |
175 | | /* Make sure subsequent byte is in the range 0x80..0xBF. */ |
176 | 0 | if (((unsigned char)*s++ & 0xC0) != 0x80) |
177 | 0 | return 0; |
178 | | |
179 | 0 | return 2; |
180 | 24 | } else if (c <= 0xEF) { /* E0..EF */ |
181 | | /* Disallow overlong 3-byte sequence. */ |
182 | 24 | if (c == 0xE0 && (unsigned char)*s < 0xA0) |
183 | 0 | return 0; |
184 | | |
185 | | /* Disallow U+D800..U+DFFF. */ |
186 | 24 | if (c == 0xED && (unsigned char)*s > 0x9F) |
187 | 0 | return 0; |
188 | | |
189 | | /* Make sure subsequent bytes are in the range 0x80..0xBF. */ |
190 | 24 | if (((unsigned char)*s++ & 0xC0) != 0x80) |
191 | 0 | return 0; |
192 | 24 | if (((unsigned char)*s++ & 0xC0) != 0x80) |
193 | 0 | return 0; |
194 | | |
195 | 24 | return 3; |
196 | 24 | } else if (c <= 0xF4) { /* F0..F4 */ |
197 | | /* Disallow overlong 4-byte sequence. */ |
198 | 0 | if (c == 0xF0 && (unsigned char)*s < 0x90) |
199 | 0 | return 0; |
200 | | |
201 | | /* Disallow codepoints beyond U+10FFFF. */ |
202 | 0 | if (c == 0xF4 && (unsigned char)*s > 0x8F) |
203 | 0 | return 0; |
204 | | |
205 | | /* Make sure subsequent bytes are in the range 0x80..0xBF. */ |
206 | 0 | if (((unsigned char)*s++ & 0xC0) != 0x80) |
207 | 0 | return 0; |
208 | 0 | if (((unsigned char)*s++ & 0xC0) != 0x80) |
209 | 0 | return 0; |
210 | 0 | if (((unsigned char)*s++ & 0xC0) != 0x80) |
211 | 0 | return 0; |
212 | | |
213 | 0 | return 4; |
214 | 0 | } else { /* F5..FF */ |
215 | 0 | return 0; |
216 | 0 | } |
217 | 15.1M | } |
218 | | |
219 | | /* Validate a null-terminated UTF-8 string. */ |
220 | | static bool utf8_validate(const char *s) |
221 | 318 | { |
222 | 318 | int len; |
223 | | |
224 | 10.2M | for (; *s != 0; s += len) { |
225 | 10.2M | len = utf8_validate_cz(s); |
226 | 10.2M | if (len == 0) |
227 | 0 | return false; |
228 | 10.2M | } |
229 | | |
230 | 318 | return true; |
231 | 318 | } |
232 | | |
233 | | /* |
234 | | * Read a single UTF-8 character starting at @s, |
235 | | * returning the length, in bytes, of the character read. |
236 | | * |
237 | | * This function assumes input is valid UTF-8, |
238 | | * and that there are enough characters in front of @s. |
239 | | */ |
240 | | static int utf8_read_char(const char *s, uint32_t *out) |
241 | 1 | { |
242 | 1 | const unsigned char *c = (const unsigned char*) s; |
243 | | |
244 | 1 | assert(utf8_validate_cz(s)); |
245 | | |
246 | 1 | if (c[0] <= 0x7F) { |
247 | | /* 00..7F */ |
248 | 1 | *out = c[0]; |
249 | 1 | return 1; |
250 | 1 | } else if (c[0] <= 0xDF) { |
251 | | /* C2..DF (unless input is invalid) */ |
252 | 0 | *out = ((uint32_t)c[0] & 0x1F) << 6 | |
253 | 0 | ((uint32_t)c[1] & 0x3F); |
254 | 0 | return 2; |
255 | 0 | } else if (c[0] <= 0xEF) { |
256 | | /* E0..EF */ |
257 | 0 | *out = ((uint32_t)c[0] & 0xF) << 12 | |
258 | 0 | ((uint32_t)c[1] & 0x3F) << 6 | |
259 | 0 | ((uint32_t)c[2] & 0x3F); |
260 | 0 | return 3; |
261 | 0 | } else { |
262 | | /* F0..F4 (unless input is invalid) */ |
263 | 0 | *out = ((uint32_t)c[0] & 0x7) << 18 | |
264 | 0 | ((uint32_t)c[1] & 0x3F) << 12 | |
265 | 0 | ((uint32_t)c[2] & 0x3F) << 6 | |
266 | 0 | ((uint32_t)c[3] & 0x3F); |
267 | 0 | return 4; |
268 | 0 | } |
269 | 1 | } |
270 | | |
271 | | /* |
272 | | * Write a single UTF-8 character to @s, |
273 | | * returning the length, in bytes, of the character written. |
274 | | * |
275 | | * @unicode must be U+0000..U+10FFFF, but not U+D800..U+DFFF. |
276 | | * |
277 | | * This function will write up to 4 bytes to @out. |
278 | | */ |
279 | | static int utf8_write_char(uint32_t unicode, char *out) |
280 | 0 | { |
281 | 0 | unsigned char *o = (unsigned char*) out; |
282 | |
|
283 | 0 | assert(unicode <= 0x10FFFF && !(unicode >= 0xD800 && unicode <= 0xDFFF)); |
284 | | |
285 | 0 | if (unicode <= 0x7F) { |
286 | | /* U+0000..U+007F */ |
287 | 0 | *o++ = unicode; |
288 | 0 | return 1; |
289 | 0 | } else if (unicode <= 0x7FF) { |
290 | | /* U+0080..U+07FF */ |
291 | 0 | *o++ = 0xC0 | unicode >> 6; |
292 | 0 | *o++ = 0x80 | (unicode & 0x3F); |
293 | 0 | return 2; |
294 | 0 | } else if (unicode <= 0xFFFF) { |
295 | | /* U+0800..U+FFFF */ |
296 | 0 | *o++ = 0xE0 | unicode >> 12; |
297 | 0 | *o++ = 0x80 | (unicode >> 6 & 0x3F); |
298 | 0 | *o++ = 0x80 | (unicode & 0x3F); |
299 | 0 | return 3; |
300 | 0 | } else { |
301 | | /* U+10000..U+10FFFF */ |
302 | 0 | *o++ = 0xF0 | unicode >> 18; |
303 | 0 | *o++ = 0x80 | (unicode >> 12 & 0x3F); |
304 | 0 | *o++ = 0x80 | (unicode >> 6 & 0x3F); |
305 | 0 | *o++ = 0x80 | (unicode & 0x3F); |
306 | 0 | return 4; |
307 | 0 | } |
308 | 0 | } |
309 | | |
310 | | /* |
311 | | * Compute the Unicode codepoint of a UTF-16 surrogate pair. |
312 | | * |
313 | | * @uc should be 0xD800..0xDBFF, and @lc should be 0xDC00..0xDFFF. |
314 | | * If they aren't, this function returns false. |
315 | | */ |
316 | | static bool from_surrogate_pair(uint16_t uc, uint16_t lc, uint32_t *unicode) |
317 | 0 | { |
318 | 0 | if (uc >= 0xD800 && uc <= 0xDBFF && lc >= 0xDC00 && lc <= 0xDFFF) { |
319 | 0 | *unicode = 0x10000 + ((((uint32_t)uc & 0x3FF) << 10) | (lc & 0x3FF)); |
320 | 0 | return true; |
321 | 0 | } else { |
322 | 0 | return false; |
323 | 0 | } |
324 | 0 | } |
325 | | |
326 | | /* |
327 | | * Construct a UTF-16 surrogate pair given a Unicode codepoint. |
328 | | * |
329 | | * @unicode must be U+10000..U+10FFFF. |
330 | | */ |
331 | | static void to_surrogate_pair(uint32_t unicode, uint16_t *uc, uint16_t *lc) |
332 | 0 | { |
333 | 0 | uint32_t n; |
334 | |
|
335 | 0 | assert(unicode >= 0x10000 && unicode <= 0x10FFFF); |
336 | | |
337 | 0 | n = unicode - 0x10000; |
338 | 0 | *uc = ((n >> 10) & 0x3FF) | 0xD800; |
339 | 0 | *lc = (n & 0x3FF) | 0xDC00; |
340 | 0 | } |
341 | | |
342 | | static bool is_space (const char *c); |
343 | | static bool is_digit (const char *c); |
344 | | static bool parse_value (const char **sp, JsonNode **out); |
345 | | static bool parse_string (const char **sp, char **out); |
346 | | static bool parse_number (const char **sp, double *out); |
347 | | static bool parse_array (const char **sp, JsonNode **out); |
348 | | static bool parse_object (const char **sp, JsonNode **out); |
349 | | static bool parse_hex16 (const char **sp, uint16_t *out); |
350 | | |
351 | | static bool expect_literal (const char **sp, const char *str); |
352 | | static void skip_space (const char **sp); |
353 | | |
354 | | static void emit_value (SB *out, const JsonNode *node); |
355 | | static void emit_value_indented (SB *out, const JsonNode *node, const char *space, int indent_level); |
356 | | static void emit_string (SB *out, const char *str); |
357 | | static void emit_number (SB *out, double num); |
358 | | static void emit_array (SB *out, const JsonNode *array); |
359 | | static void emit_array_indented (SB *out, const JsonNode *array, const char *space, int indent_level); |
360 | | static void emit_object (SB *out, const JsonNode *object); |
361 | | static void emit_object_indented (SB *out, const JsonNode *object, const char *space, int indent_level); |
362 | | |
363 | | static int write_hex16(char *out, uint16_t val); |
364 | | |
365 | | static JsonNode *mknode(JsonTag tag); |
366 | | static void append_node(JsonNode *parent, JsonNode *child); |
367 | | static void prepend_node(JsonNode *parent, JsonNode *child); |
368 | | static void append_member(JsonNode *object, char *key, JsonNode *value); |
369 | | |
370 | | /* Assertion-friendly validity checks */ |
371 | | static bool tag_is_valid(unsigned int tag); |
372 | | static bool number_is_valid(const char *num); |
373 | | |
374 | | JsonNode *json_decode(const char *json) |
375 | 0 | { |
376 | 0 | const char *s = json; |
377 | 0 | JsonNode *ret; |
378 | |
|
379 | 0 | skip_space(&s); |
380 | 0 | if (!parse_value(&s, &ret)) |
381 | 0 | return NULL; |
382 | | |
383 | 0 | skip_space(&s); |
384 | 0 | if (*s != 0) { |
385 | 0 | json_delete(ret); |
386 | 0 | return NULL; |
387 | 0 | } |
388 | | |
389 | 0 | return ret; |
390 | 0 | } |
391 | | |
392 | | char *json_encode(const JsonNode *node) |
393 | 0 | { |
394 | 0 | return json_stringify(node, NULL); |
395 | 0 | } |
396 | | |
397 | | char *json_encode_string(const char *str) |
398 | 0 | { |
399 | 0 | SB sb; |
400 | 0 | sb_init(&sb); |
401 | |
|
402 | 0 | try { |
403 | 0 | emit_string(&sb, str); |
404 | 0 | } |
405 | 0 | catch (std::exception&) { |
406 | 0 | sb_free(&sb); |
407 | 0 | throw; |
408 | 0 | } |
409 | | |
410 | 0 | return sb_finish(&sb); |
411 | 0 | } |
412 | | |
413 | | char *json_stringify(const JsonNode *node, const char *space) |
414 | 19 | { |
415 | 19 | SB sb; |
416 | 19 | sb_init(&sb); |
417 | | |
418 | 19 | try { |
419 | 19 | if (space != NULL) |
420 | 19 | emit_value_indented(&sb, node, space, 0); |
421 | 0 | else |
422 | 0 | emit_value(&sb, node); |
423 | 19 | } |
424 | 19 | catch (std::exception&) { |
425 | 0 | sb_free(&sb); |
426 | 0 | throw; |
427 | 0 | } |
428 | | |
429 | 19 | return sb_finish(&sb); |
430 | 19 | } |
431 | | |
432 | | void json_delete(JsonNode *node) |
433 | 124 | { |
434 | 124 | if (node != NULL) { |
435 | 124 | json_remove_from_parent(node); |
436 | | |
437 | 124 | switch (node->tag) { |
438 | 54 | case JSON_STRING: |
439 | 54 | free(node->string_); |
440 | 54 | break; |
441 | 0 | case JSON_ARRAY: |
442 | 19 | case JSON_OBJECT: |
443 | 19 | { |
444 | 19 | JsonNode *child, *next; |
445 | 124 | for (child = node->children.head; child != NULL; child = next) { |
446 | 105 | next = child->next; |
447 | 105 | json_delete(child); |
448 | 105 | } |
449 | 19 | break; |
450 | 0 | } |
451 | 51 | default:; |
452 | 124 | } |
453 | | |
454 | 124 | free(node); |
455 | 124 | } |
456 | 124 | } |
457 | | |
458 | | bool json_validate(const char *json) |
459 | 0 | { |
460 | 0 | const char *s = json; |
461 | |
|
462 | 0 | skip_space(&s); |
463 | 0 | if (!parse_value(&s, NULL)) |
464 | 0 | return false; |
465 | | |
466 | 0 | skip_space(&s); |
467 | 0 | if (*s != 0) |
468 | 0 | return false; |
469 | | |
470 | 0 | return true; |
471 | 0 | } |
472 | | |
473 | | JsonNode *json_find_element(JsonNode *array, int index) |
474 | 0 | { |
475 | 0 | JsonNode *element; |
476 | 0 | int i = 0; |
477 | |
|
478 | 0 | if (array == NULL || array->tag != JSON_ARRAY) |
479 | 0 | return NULL; |
480 | | |
481 | 0 | json_foreach(element, array) { |
482 | 0 | if (i == index) |
483 | 0 | return element; |
484 | 0 | i++; |
485 | 0 | } |
486 | | |
487 | 0 | return NULL; |
488 | 0 | } |
489 | | |
490 | | JsonNode *json_find_member(JsonNode *object, const char *name) |
491 | 0 | { |
492 | 0 | JsonNode *member; |
493 | |
|
494 | 0 | if (object == NULL || object->tag != JSON_OBJECT) |
495 | 0 | return NULL; |
496 | | |
497 | 0 | json_foreach(member, object) |
498 | 0 | if (strcmp(member->key, name) == 0) |
499 | 0 | return member; |
500 | | |
501 | 0 | return NULL; |
502 | 0 | } |
503 | | |
504 | | JsonNode *json_first_child(const JsonNode *node) |
505 | 0 | { |
506 | 0 | if (node != NULL && (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT)) |
507 | 0 | return node->children.head; |
508 | 0 | return NULL; |
509 | 0 | } |
510 | | |
511 | | static JsonNode *mknode(JsonTag tag) |
512 | 124 | { |
513 | 124 | JsonNode *ret = (JsonNode*) calloc(1, sizeof(JsonNode)); |
514 | 124 | if (ret == NULL) |
515 | 0 | out_of_memory(); |
516 | 124 | ret->tag = tag; |
517 | 124 | return ret; |
518 | 124 | } |
519 | | |
520 | | JsonNode *json_mknull(void) |
521 | 0 | { |
522 | 0 | return mknode(JSON_NULL); |
523 | 0 | } |
524 | | |
525 | | JsonNode *json_mkbool(bool b) |
526 | 0 | { |
527 | 0 | JsonNode *ret = mknode(JSON_BOOL); |
528 | 0 | ret->bool_ = b; |
529 | 0 | return ret; |
530 | 0 | } |
531 | | |
532 | | static JsonNode *mkstring(char *s) |
533 | 54 | { |
534 | 54 | JsonNode *ret = mknode(JSON_STRING); |
535 | 54 | ret->string_ = s; |
536 | 54 | return ret; |
537 | 54 | } |
538 | | |
539 | | JsonNode *json_mkstring(const char *s) |
540 | 54 | { |
541 | 54 | return mkstring(json_strdup(s)); |
542 | 54 | } |
543 | | |
544 | | JsonNode *json_mknumber(double n) |
545 | 51 | { |
546 | 51 | JsonNode *node = mknode(JSON_NUMBER); |
547 | 51 | node->number_ = n; |
548 | 51 | return node; |
549 | 51 | } |
550 | | |
551 | | JsonNode *json_mkarray(void) |
552 | 0 | { |
553 | 0 | return mknode(JSON_ARRAY); |
554 | 0 | } |
555 | | |
556 | | JsonNode *json_mkobject(void) |
557 | 19 | { |
558 | 19 | return mknode(JSON_OBJECT); |
559 | 19 | } |
560 | | |
561 | | static void append_node(JsonNode *parent, JsonNode *child) |
562 | 105 | { |
563 | 105 | if (child != NULL && parent != NULL) { |
564 | 105 | child->parent = parent; |
565 | 105 | child->prev = parent->children.tail; |
566 | 105 | child->next = NULL; |
567 | | |
568 | 105 | if (parent->children.tail != NULL) |
569 | 86 | parent->children.tail->next = child; |
570 | 19 | else |
571 | 19 | parent->children.head = child; |
572 | 105 | parent->children.tail = child; |
573 | 105 | } |
574 | 105 | } |
575 | | |
576 | | static void prepend_node(JsonNode *parent, JsonNode *child) |
577 | 0 | { |
578 | 0 | if (child != NULL && parent != NULL) { |
579 | 0 | child->parent = parent; |
580 | 0 | child->prev = NULL; |
581 | 0 | child->next = parent->children.head; |
582 | |
|
583 | 0 | if (parent->children.head != NULL) |
584 | 0 | parent->children.head->prev = child; |
585 | 0 | else |
586 | 0 | parent->children.tail = child; |
587 | 0 | parent->children.head = child; |
588 | 0 | } |
589 | 0 | } |
590 | | |
591 | | static void append_member(JsonNode *object, char *key, JsonNode *value) |
592 | 105 | { |
593 | 105 | if (value != NULL && object != NULL) { |
594 | 105 | value->key = key; |
595 | 105 | append_node(object, value); |
596 | 105 | } |
597 | 105 | } |
598 | | |
599 | | void json_append_element(JsonNode *array, JsonNode *element) |
600 | 0 | { |
601 | 0 | if (array != NULL && element !=NULL) { |
602 | 0 | assert(array->tag == JSON_ARRAY); |
603 | 0 | assert(element->parent == NULL); |
604 | | |
605 | 0 | append_node(array, element); |
606 | 0 | } |
607 | 0 | } |
608 | | |
609 | | void json_prepend_element(JsonNode *array, JsonNode *element) |
610 | 0 | { |
611 | 0 | assert(array->tag == JSON_ARRAY); |
612 | 0 | assert(element->parent == NULL); |
613 | | |
614 | 0 | prepend_node(array, element); |
615 | 0 | } |
616 | | |
617 | | void json_append_member(JsonNode *object, const char *key, JsonNode *value) |
618 | 105 | { |
619 | 105 | if (object != NULL && key != NULL && value != NULL) { |
620 | 105 | assert(object->tag == JSON_OBJECT); |
621 | 0 | assert(value->parent == NULL); |
622 | | |
623 | 0 | append_member(object, json_strdup(key), value); |
624 | 105 | } |
625 | 105 | } |
626 | | |
627 | | void json_prepend_member(JsonNode *object, const char *key, JsonNode *value) |
628 | 0 | { |
629 | 0 | if (object != NULL && key != NULL && value != NULL) { |
630 | 0 | assert(object->tag == JSON_OBJECT); |
631 | 0 | assert(value->parent == NULL); |
632 | | |
633 | 0 | value->key = json_strdup(key); |
634 | 0 | prepend_node(object, value); |
635 | 0 | } |
636 | 0 | } |
637 | | |
638 | | void json_remove_from_parent(JsonNode *node) |
639 | 124 | { |
640 | 124 | if (node != NULL) { |
641 | 124 | JsonNode *parent = node->parent; |
642 | | |
643 | 124 | if (parent != NULL) { |
644 | 105 | if (node->prev != NULL) |
645 | 0 | node->prev->next = node->next; |
646 | 105 | else |
647 | 105 | parent->children.head = node->next; |
648 | | |
649 | 105 | if (node->next != NULL) |
650 | 86 | node->next->prev = node->prev; |
651 | 19 | else |
652 | 19 | parent->children.tail = node->prev; |
653 | | |
654 | 105 | free(node->key); |
655 | | |
656 | 105 | node->parent = NULL; |
657 | 105 | node->prev = node->next = NULL; |
658 | 105 | node->key = NULL; |
659 | 105 | } |
660 | 124 | } |
661 | 124 | } |
662 | | |
663 | | static bool parse_value(const char **sp, JsonNode **out) |
664 | 0 | { |
665 | 0 | const char *s = *sp; |
666 | |
|
667 | 0 | switch (*s) { |
668 | 0 | case 'n': |
669 | 0 | if (expect_literal(&s, "null")) { |
670 | 0 | if (out) |
671 | 0 | *out = json_mknull(); |
672 | 0 | *sp = s; |
673 | 0 | return true; |
674 | 0 | } |
675 | 0 | return false; |
676 | | |
677 | 0 | case 'f': |
678 | 0 | if (expect_literal(&s, "false")) { |
679 | 0 | if (out) |
680 | 0 | *out = json_mkbool(false); |
681 | 0 | *sp = s; |
682 | 0 | return true; |
683 | 0 | } |
684 | 0 | return false; |
685 | | |
686 | 0 | case 't': |
687 | 0 | if (expect_literal(&s, "true")) { |
688 | 0 | if (out) |
689 | 0 | *out = json_mkbool(true); |
690 | 0 | *sp = s; |
691 | 0 | return true; |
692 | 0 | } |
693 | 0 | return false; |
694 | | |
695 | 0 | case '"': { |
696 | 0 | char *str = NULL; |
697 | 0 | if (parse_string(&s, out ? &str : NULL)) { |
698 | 0 | if (out) |
699 | 0 | *out = mkstring(str); |
700 | 0 | *sp = s; |
701 | 0 | return true; |
702 | 0 | } |
703 | 0 | return false; |
704 | 0 | } |
705 | | |
706 | 0 | case '[': |
707 | 0 | if (parse_array(&s, out)) { |
708 | 0 | *sp = s; |
709 | 0 | return true; |
710 | 0 | } |
711 | 0 | return false; |
712 | | |
713 | 0 | case '{': |
714 | 0 | if (parse_object(&s, out)) { |
715 | 0 | *sp = s; |
716 | 0 | return true; |
717 | 0 | } |
718 | 0 | return false; |
719 | | |
720 | 0 | default: { |
721 | 0 | double num; |
722 | 0 | if (parse_number(&s, out ? &num : NULL)) { |
723 | 0 | if (out) |
724 | 0 | *out = json_mknumber(num); |
725 | 0 | *sp = s; |
726 | 0 | return true; |
727 | 0 | } |
728 | 0 | return false; |
729 | 0 | } |
730 | 0 | } |
731 | 0 | } |
732 | | |
733 | | static bool parse_array(const char **sp, JsonNode **out) |
734 | 0 | { |
735 | 0 | const char *s = *sp; |
736 | 0 | JsonNode *ret = out ? json_mkarray() : NULL; |
737 | 0 | JsonNode *element = NULL; |
738 | |
|
739 | 0 | if (*s++ != '[') |
740 | 0 | goto failure; |
741 | 0 | skip_space(&s); |
742 | |
|
743 | 0 | if (*s == ']') { |
744 | 0 | s++; |
745 | 0 | goto success; |
746 | 0 | } |
747 | | |
748 | 0 | for (;;) { |
749 | 0 | if (!parse_value(&s, out ? &element : NULL)) |
750 | 0 | goto failure; |
751 | 0 | skip_space(&s); |
752 | |
|
753 | 0 | if (out) |
754 | 0 | json_append_element(ret, element); |
755 | |
|
756 | 0 | if (*s == ']') { |
757 | 0 | s++; |
758 | 0 | goto success; |
759 | 0 | } |
760 | | |
761 | 0 | if (*s++ != ',') |
762 | 0 | goto failure; |
763 | 0 | skip_space(&s); |
764 | 0 | } |
765 | | |
766 | 0 | success: |
767 | 0 | *sp = s; |
768 | 0 | if (out) |
769 | 0 | *out = ret; |
770 | 0 | return true; |
771 | | |
772 | 0 | failure: |
773 | 0 | json_delete(ret); |
774 | 0 | return false; |
775 | 0 | } |
776 | | |
777 | | static bool parse_object(const char **sp, JsonNode **out) |
778 | 0 | { |
779 | 0 | const char *s = *sp; |
780 | 0 | JsonNode *ret = out ? json_mkobject() : NULL; |
781 | 0 | char *key = NULL; |
782 | 0 | JsonNode *value = NULL; |
783 | |
|
784 | 0 | if (*s++ != '{') |
785 | 0 | goto failure; |
786 | 0 | skip_space(&s); |
787 | |
|
788 | 0 | if (*s == '}') { |
789 | 0 | s++; |
790 | 0 | goto success; |
791 | 0 | } |
792 | | |
793 | 0 | for (;;) { |
794 | 0 | if (!parse_string(&s, out ? &key : NULL)) |
795 | 0 | goto failure; |
796 | 0 | skip_space(&s); |
797 | |
|
798 | 0 | if (*s++ != ':') |
799 | 0 | goto failure_free_key; |
800 | 0 | skip_space(&s); |
801 | |
|
802 | 0 | if (!parse_value(&s, out ? &value : NULL)) |
803 | 0 | goto failure_free_key; |
804 | 0 | skip_space(&s); |
805 | |
|
806 | 0 | if (out) |
807 | 0 | append_member(ret, key, value); |
808 | |
|
809 | 0 | if (*s == '}') { |
810 | 0 | s++; |
811 | 0 | goto success; |
812 | 0 | } |
813 | | |
814 | 0 | if (*s++ != ',') |
815 | 0 | goto failure; |
816 | 0 | skip_space(&s); |
817 | 0 | } |
818 | | |
819 | 0 | success: |
820 | 0 | *sp = s; |
821 | 0 | if (out) |
822 | 0 | *out = ret; |
823 | 0 | return true; |
824 | | |
825 | 0 | failure_free_key: |
826 | 0 | if (out) |
827 | 0 | free(key); |
828 | 0 | failure: |
829 | 0 | json_delete(ret); |
830 | 0 | return false; |
831 | 0 | } |
832 | | |
833 | | bool parse_string(const char **sp, char **out) |
834 | 0 | { |
835 | 0 | const char *s = *sp; |
836 | 0 | SB sb = { 0, 0, 0 }; |
837 | 0 | char throwaway_buffer[4]; |
838 | | /* enough space for a UTF-8 character */ |
839 | 0 | char *b; |
840 | |
|
841 | 0 | if (*s++ != '"') |
842 | 0 | return false; |
843 | | |
844 | 0 | if (out) { |
845 | 0 | sb_init(&sb); |
846 | 0 | sb_need(&sb, 4); |
847 | 0 | b = sb.cur; |
848 | 0 | } else { |
849 | 0 | b = throwaway_buffer; |
850 | 0 | } |
851 | |
|
852 | 0 | while (*s != '"') { |
853 | 0 | unsigned char c = *s++; |
854 | | |
855 | | /* Parse next character, and write it to b. */ |
856 | 0 | if (c == '\\') { |
857 | 0 | c = *s++; |
858 | 0 | switch (c) { |
859 | 0 | case '"': |
860 | 0 | case '\\': |
861 | 0 | case '/': |
862 | 0 | *b++ = c; |
863 | 0 | break; |
864 | 0 | case 'b': |
865 | 0 | *b++ = '\b'; |
866 | 0 | break; |
867 | 0 | case 'f': |
868 | 0 | *b++ = '\f'; |
869 | 0 | break; |
870 | 0 | case 'n': |
871 | 0 | *b++ = '\n'; |
872 | 0 | break; |
873 | 0 | case 'r': |
874 | 0 | *b++ = '\r'; |
875 | 0 | break; |
876 | 0 | case 't': |
877 | 0 | *b++ = '\t'; |
878 | 0 | break; |
879 | 0 | case 'u': |
880 | 0 | { |
881 | 0 | uint16_t uc, lc; |
882 | 0 | uint32_t unicode; |
883 | |
|
884 | 0 | if (!parse_hex16(&s, &uc)) |
885 | 0 | goto failed; |
886 | | |
887 | 0 | if (uc >= 0xD800 && uc <= 0xDFFF) { |
888 | | /* Handle UTF-16 surrogate pair. */ |
889 | 0 | if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc)) |
890 | 0 | goto failed; /* Incomplete surrogate pair. */ |
891 | 0 | if (!from_surrogate_pair(uc, lc, &unicode)) |
892 | 0 | goto failed; /* Invalid surrogate pair. */ |
893 | 0 | } else if (uc == 0) { |
894 | | /* Disallow "\u0000". */ |
895 | 0 | goto failed; |
896 | 0 | } else { |
897 | 0 | unicode = uc; |
898 | 0 | } |
899 | | |
900 | 0 | b += utf8_write_char(unicode, b); |
901 | 0 | break; |
902 | 0 | } |
903 | 0 | default: |
904 | | /* Invalid escape */ |
905 | 0 | goto failed; |
906 | 0 | } |
907 | 0 | } else if (c <= 0x1F) { |
908 | | /* Control characters are not allowed in string literals. */ |
909 | 0 | goto failed; |
910 | 0 | } else { |
911 | | /* Validate and echo a UTF-8 character. */ |
912 | 0 | int len; |
913 | |
|
914 | 0 | s--; |
915 | 0 | len = utf8_validate_cz(s); |
916 | 0 | if (len == 0) |
917 | 0 | goto failed; /* Invalid UTF-8 character. */ |
918 | | |
919 | 0 | while (len--) |
920 | 0 | *b++ = *s++; |
921 | 0 | } |
922 | | |
923 | | /* |
924 | | * Update sb to know about the new bytes, |
925 | | * and set up b to write another character. |
926 | | */ |
927 | 0 | if (out) { |
928 | 0 | sb.cur = b; |
929 | 0 | sb_need(&sb, 4); |
930 | 0 | b = sb.cur; |
931 | 0 | } else { |
932 | 0 | b = throwaway_buffer; |
933 | 0 | } |
934 | 0 | } |
935 | 0 | s++; |
936 | |
|
937 | 0 | if (out) |
938 | 0 | *out = sb_finish(&sb); |
939 | 0 | *sp = s; |
940 | 0 | return true; |
941 | | |
942 | 0 | failed: |
943 | 0 | if (out) |
944 | 0 | sb_free(&sb); |
945 | 0 | return false; |
946 | 0 | } |
947 | | |
948 | 0 | bool is_space(const char *c) { |
949 | 0 | return ((*c) == '\t' || (*c) == '\n' || (*c) == '\r' || (*c) == ' '); |
950 | 0 | } |
951 | | |
952 | 177 | bool is_digit(const char *c){ |
953 | 177 | return ((*c) >= '0' && (*c) <= '9'); |
954 | 177 | } |
955 | | |
956 | | /* |
957 | | * The JSON spec says that a number shall follow this precise pattern |
958 | | * (spaces and quotes added for readability): |
959 | | * '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)? |
960 | | * |
961 | | * However, some JSON parsers are more liberal. For instance, PHP accepts |
962 | | * '.5' and '1.'. JSON.parse accepts '+3'. |
963 | | * |
964 | | * This function takes the strict approach. |
965 | | */ |
966 | | bool parse_number(const char **sp, double *out) |
967 | 51 | { |
968 | 51 | const char *s = *sp; |
969 | | |
970 | | /* '-'? */ |
971 | 51 | if (*s == '-') |
972 | 0 | s++; |
973 | | |
974 | | /* (0 | [1-9][0-9]*) */ |
975 | 51 | if (*s == '0') { |
976 | 0 | s++; |
977 | 51 | } else { |
978 | 51 | if (!is_digit(s)) |
979 | 0 | return false; |
980 | 126 | do { |
981 | 126 | s++; |
982 | 126 | } while (is_digit(s)); |
983 | 51 | } |
984 | | |
985 | | /* ('.' [0-9]+)? */ |
986 | 51 | if (*s == '.') { |
987 | 0 | s++; |
988 | 0 | if (!is_digit(s)) |
989 | 0 | return false; |
990 | 0 | do { |
991 | 0 | s++; |
992 | 0 | } while (is_digit(s)); |
993 | 0 | } |
994 | | |
995 | | /* ([Ee] [+-]? [0-9]+)? */ |
996 | 51 | if (*s == 'E' || *s == 'e') { |
997 | 0 | s++; |
998 | 0 | if (*s == '+' || *s == '-') |
999 | 0 | s++; |
1000 | 0 | if (!is_digit(s)) |
1001 | 0 | return false; |
1002 | 0 | do { |
1003 | 0 | s++; |
1004 | 0 | } while (is_digit(s)); |
1005 | 0 | } |
1006 | | |
1007 | 51 | if (out) |
1008 | 0 | *out = strtod(*sp, NULL); |
1009 | | |
1010 | 51 | *sp = s; |
1011 | 51 | return true; |
1012 | 51 | } |
1013 | | |
1014 | | static void skip_space(const char **sp) |
1015 | 0 | { |
1016 | 0 | const char *s = *sp; |
1017 | 0 | while (is_space(s)) |
1018 | 0 | s++; |
1019 | 0 | *sp = s; |
1020 | 0 | } |
1021 | | |
1022 | | static void emit_value(SB *out, const JsonNode *node) |
1023 | 0 | { |
1024 | 0 | assert(tag_is_valid(node->tag)); |
1025 | 0 | switch (node->tag) { |
1026 | 0 | case JSON_NULL: |
1027 | 0 | sb_puts(out, "null"); |
1028 | 0 | break; |
1029 | 0 | case JSON_BOOL: |
1030 | 0 | sb_puts(out, node->bool_ ? "true" : "false"); |
1031 | 0 | break; |
1032 | 0 | case JSON_STRING: |
1033 | 0 | emit_string(out, node->string_); |
1034 | 0 | break; |
1035 | 0 | case JSON_NUMBER: |
1036 | 0 | emit_number(out, node->number_); |
1037 | 0 | break; |
1038 | 0 | case JSON_ARRAY: |
1039 | 0 | emit_array(out, node); |
1040 | 0 | break; |
1041 | 0 | case JSON_OBJECT: |
1042 | 0 | emit_object(out, node); |
1043 | 0 | break; |
1044 | 0 | default: |
1045 | 0 | assert(false); |
1046 | 0 | } |
1047 | 0 | } |
1048 | | |
1049 | | void emit_value_indented(SB *out, const JsonNode *node, const char *space, int indent_level) |
1050 | 124 | { |
1051 | 124 | assert(tag_is_valid(node->tag)); |
1052 | 0 | switch (node->tag) { |
1053 | 0 | case JSON_NULL: |
1054 | 0 | sb_puts(out, "null"); |
1055 | 0 | break; |
1056 | 0 | case JSON_BOOL: |
1057 | 0 | sb_puts(out, node->bool_ ? "true" : "false"); |
1058 | 0 | break; |
1059 | 54 | case JSON_STRING: |
1060 | 54 | emit_string(out, node->string_); |
1061 | 54 | break; |
1062 | 51 | case JSON_NUMBER: |
1063 | 51 | emit_number(out, node->number_); |
1064 | 51 | break; |
1065 | 0 | case JSON_ARRAY: |
1066 | 0 | emit_array_indented(out, node, space, indent_level); |
1067 | 0 | break; |
1068 | 19 | case JSON_OBJECT: |
1069 | 19 | emit_object_indented(out, node, space, indent_level); |
1070 | 19 | break; |
1071 | 0 | default: |
1072 | 0 | assert(false); |
1073 | 124 | } |
1074 | 124 | } |
1075 | | |
1076 | | static void emit_array(SB *out, const JsonNode *array) |
1077 | 0 | { |
1078 | 0 | const JsonNode *element; |
1079 | |
|
1080 | 0 | sb_putc(out, '['); |
1081 | 0 | json_foreach(element, array) { |
1082 | 0 | emit_value(out, element); |
1083 | 0 | if (element->next != NULL) |
1084 | 0 | sb_putc(out, ','); |
1085 | 0 | } |
1086 | 0 | sb_putc(out, ']'); |
1087 | 0 | } |
1088 | | |
1089 | | static void emit_array_indented(SB *out, const JsonNode *array, const char *space, int indent_level) |
1090 | 0 | { |
1091 | 0 | const JsonNode *element = array->children.head; |
1092 | 0 | int i; |
1093 | |
|
1094 | 0 | if (element == NULL) { |
1095 | 0 | sb_puts(out, "[]"); |
1096 | 0 | return; |
1097 | 0 | } |
1098 | | |
1099 | 0 | sb_puts(out, "[\n"); |
1100 | 0 | while (element != NULL) { |
1101 | 0 | for (i = 0; i < indent_level + 1; i++) |
1102 | 0 | sb_puts(out, space); |
1103 | 0 | emit_value_indented(out, element, space, indent_level + 1); |
1104 | |
|
1105 | 0 | element = element->next; |
1106 | 0 | sb_puts(out, element != NULL ? ",\n" : "\n"); |
1107 | 0 | } |
1108 | 0 | for (i = 0; i < indent_level; i++) |
1109 | 0 | sb_puts(out, space); |
1110 | 0 | sb_putc(out, ']'); |
1111 | 0 | } |
1112 | | |
1113 | | static void emit_object(SB *out, const JsonNode *object) |
1114 | 0 | { |
1115 | 0 | const JsonNode *member; |
1116 | |
|
1117 | 0 | sb_putc(out, '{'); |
1118 | 0 | json_foreach(member, object) { |
1119 | 0 | emit_string(out, member->key); |
1120 | 0 | sb_putc(out, ':'); |
1121 | 0 | emit_value(out, member); |
1122 | 0 | if (member->next != NULL) |
1123 | 0 | sb_putc(out, ','); |
1124 | 0 | } |
1125 | 0 | sb_putc(out, '}'); |
1126 | 0 | } |
1127 | | |
1128 | | static void emit_object_indented(SB *out, const JsonNode *object, const char *space, int indent_level) |
1129 | 19 | { |
1130 | 19 | const JsonNode *member = object->children.head; |
1131 | 19 | int i; |
1132 | | |
1133 | 19 | if (member == NULL) { |
1134 | 0 | sb_puts(out, "{}"); |
1135 | 0 | return; |
1136 | 0 | } |
1137 | | |
1138 | 19 | sb_puts(out, "{\n"); |
1139 | 124 | while (member != NULL) { |
1140 | 210 | for (i = 0; i < indent_level + 1; i++) |
1141 | 105 | sb_puts(out, space); |
1142 | 105 | emit_string(out, member->key); |
1143 | 105 | sb_puts(out, ": "); |
1144 | 105 | emit_value_indented(out, member, space, indent_level + 1); |
1145 | | |
1146 | 105 | member = member->next; |
1147 | 105 | sb_puts(out, member != NULL ? ",\n" : "\n"); |
1148 | 105 | } |
1149 | 19 | for (i = 0; i < indent_level; i++) |
1150 | 0 | sb_puts(out, space); |
1151 | 19 | sb_putc(out, '}'); |
1152 | 19 | } |
1153 | | |
1154 | | void emit_string(SB *out, const char *str) |
1155 | 159 | { |
1156 | 159 | bool escape_unicode = false; |
1157 | 159 | const char *s = str; |
1158 | 159 | char *b; |
1159 | | |
1160 | | // make assertion catchable |
1161 | 159 | #ifndef NDEBUG |
1162 | 159 | if (!utf8_validate(str)) { |
1163 | 0 | throw utf8::invalid_utf8(0); |
1164 | 0 | } |
1165 | 159 | #endif |
1166 | | |
1167 | 159 | assert(utf8_validate(str)); |
1168 | | |
1169 | | /* |
1170 | | * 14 bytes is enough space to write up to two |
1171 | | * \uXXXX escapes and two quotation marks. |
1172 | | */ |
1173 | 159 | sb_need(out, 14); |
1174 | 159 | b = out->cur; |
1175 | | |
1176 | 159 | *b++ = '"'; |
1177 | 5.12M | while (*s != 0) { |
1178 | 5.12M | unsigned char c = *s++; |
1179 | | |
1180 | | /* Encode the next character, and write it to b. */ |
1181 | 5.12M | switch (c) { |
1182 | 148 | case '"': |
1183 | 148 | *b++ = '\\'; |
1184 | 148 | *b++ = '"'; |
1185 | 148 | break; |
1186 | 246k | case '\\': |
1187 | 246k | *b++ = '\\'; |
1188 | 246k | *b++ = '\\'; |
1189 | 246k | break; |
1190 | 0 | case '\b': |
1191 | 0 | *b++ = '\\'; |
1192 | 0 | *b++ = 'b'; |
1193 | 0 | break; |
1194 | 9 | case '\f': |
1195 | 9 | *b++ = '\\'; |
1196 | 9 | *b++ = 'f'; |
1197 | 9 | break; |
1198 | 68 | case '\n': |
1199 | 68 | *b++ = '\\'; |
1200 | 68 | *b++ = 'n'; |
1201 | 68 | break; |
1202 | 4 | case '\r': |
1203 | 4 | *b++ = '\\'; |
1204 | 4 | *b++ = 'r'; |
1205 | 4 | break; |
1206 | 3 | case '\t': |
1207 | 3 | *b++ = '\\'; |
1208 | 3 | *b++ = 't'; |
1209 | 3 | break; |
1210 | 4.87M | default: { |
1211 | 4.87M | int len; |
1212 | | |
1213 | 4.87M | s--; |
1214 | 4.87M | len = utf8_validate_cz(s); |
1215 | | |
1216 | 4.87M | if (len == 0) { |
1217 | | /* |
1218 | | * Handle invalid UTF-8 character gracefully in production |
1219 | | * by writing a replacement character (U+FFFD) |
1220 | | * and skipping a single byte. |
1221 | | * |
1222 | | * This should never happen when assertions are enabled |
1223 | | * due to the assertion at the beginning of this function. |
1224 | | */ |
1225 | 0 | assert(false); |
1226 | 0 | if (escape_unicode) { |
1227 | 0 | strcpy(b, "\\uFFFD"); |
1228 | 0 | b += 6; |
1229 | 0 | } else { |
1230 | 0 | *b++ = 0xEFu; |
1231 | 0 | *b++ = 0xBFu; |
1232 | 0 | *b++ = 0xBDu; |
1233 | 0 | } |
1234 | 0 | s++; |
1235 | 4.87M | } else if (c < 0x1F || (c >= 0x80 && escape_unicode)) { |
1236 | | /* Encode using \u.... */ |
1237 | 1 | uint32_t unicode; |
1238 | | |
1239 | 1 | s += utf8_read_char(s, &unicode); |
1240 | | |
1241 | 1 | if (unicode <= 0xFFFF) { |
1242 | 1 | *b++ = '\\'; |
1243 | 1 | *b++ = 'u'; |
1244 | 1 | b += write_hex16(b, unicode); |
1245 | 1 | } else { |
1246 | | /* Produce a surrogate pair. */ |
1247 | 0 | uint16_t uc, lc; |
1248 | 0 | assert(unicode <= 0x10FFFF); |
1249 | 0 | to_surrogate_pair(unicode, &uc, &lc); |
1250 | 0 | *b++ = '\\'; |
1251 | 0 | *b++ = 'u'; |
1252 | 0 | b += write_hex16(b, uc); |
1253 | 0 | *b++ = '\\'; |
1254 | 0 | *b++ = 'u'; |
1255 | 0 | b += write_hex16(b, lc); |
1256 | 0 | } |
1257 | 4.87M | } else { |
1258 | | /* Write the character directly. */ |
1259 | 9.75M | while (len--) |
1260 | 4.87M | *b++ = *s++; |
1261 | 4.87M | } |
1262 | | |
1263 | 0 | break; |
1264 | 0 | } |
1265 | 5.12M | } |
1266 | | |
1267 | | /* |
1268 | | * Update *out to know about the new bytes, |
1269 | | * and set up b to write another encoded character. |
1270 | | */ |
1271 | 5.12M | out->cur = b; |
1272 | 5.12M | sb_need(out, 14); |
1273 | 5.12M | b = out->cur; |
1274 | 5.12M | } |
1275 | 159 | *b++ = '"'; |
1276 | | |
1277 | 159 | out->cur = b; |
1278 | 159 | } |
1279 | | |
1280 | | static void emit_number(SB *out, double num) |
1281 | 51 | { |
1282 | | /* |
1283 | | * This isn't exactly how JavaScript renders numbers, |
1284 | | * but it should produce valid JSON for reasonable numbers |
1285 | | * preserve precision well enough, and avoid some oddities |
1286 | | * like 0.3 -> 0.299999999999999988898 . |
1287 | | */ |
1288 | 51 | char buf[64]; |
1289 | 51 | sprintf(buf, "%.16g", num); |
1290 | | |
1291 | 51 | if (number_is_valid(buf)) |
1292 | 51 | sb_puts(out, buf); |
1293 | 0 | else |
1294 | 0 | sb_puts(out, "null"); |
1295 | 51 | } |
1296 | | |
1297 | | static bool tag_is_valid(unsigned int tag) |
1298 | 124 | { |
1299 | 124 | return (/* tag >= JSON_NULL && */ tag <= JSON_OBJECT); |
1300 | 124 | } |
1301 | | |
1302 | | static bool number_is_valid(const char *num) |
1303 | 51 | { |
1304 | 51 | return (parse_number(&num, NULL) && *num == '\0'); |
1305 | 51 | } |
1306 | | |
1307 | | static bool expect_literal(const char **sp, const char *str) |
1308 | 0 | { |
1309 | 0 | const char *s = *sp; |
1310 | |
|
1311 | 0 | while (*str != '\0') |
1312 | 0 | if (*s++ != *str++) |
1313 | 0 | return false; |
1314 | | |
1315 | 0 | *sp = s; |
1316 | 0 | return true; |
1317 | 0 | } |
1318 | | |
1319 | | /* |
1320 | | * Parses exactly 4 hex characters (capital or lowercase). |
1321 | | * Fails if any input chars are not [0-9A-Fa-f]. |
1322 | | */ |
1323 | | static bool parse_hex16(const char **sp, uint16_t *out) |
1324 | 0 | { |
1325 | 0 | const char *s = *sp; |
1326 | 0 | uint16_t ret = 0; |
1327 | 0 | uint16_t i; |
1328 | 0 | uint16_t tmp; |
1329 | 0 | char c; |
1330 | |
|
1331 | 0 | for (i = 0; i < 4; i++) { |
1332 | 0 | c = *s++; |
1333 | 0 | if (c >= '0' && c <= '9') |
1334 | 0 | tmp = c - '0'; |
1335 | 0 | else if (c >= 'A' && c <= 'F') |
1336 | 0 | tmp = c - 'A' + 10; |
1337 | 0 | else if (c >= 'a' && c <= 'f') |
1338 | 0 | tmp = c - 'a' + 10; |
1339 | 0 | else |
1340 | 0 | return false; |
1341 | | |
1342 | 0 | ret <<= 4; |
1343 | 0 | ret += tmp; |
1344 | 0 | } |
1345 | | |
1346 | 0 | if (out) |
1347 | 0 | *out = ret; |
1348 | 0 | *sp = s; |
1349 | 0 | return true; |
1350 | 0 | } |
1351 | | |
1352 | | /* |
1353 | | * Encodes a 16-bit number into hexadecimal, |
1354 | | * writing exactly 4 hex chars. |
1355 | | */ |
1356 | | static int write_hex16(char *out, uint16_t val) |
1357 | 1 | { |
1358 | 1 | const char *hex = "0123456789ABCDEF"; |
1359 | | |
1360 | 1 | *out++ = hex[(val >> 12) & 0xF]; |
1361 | 1 | *out++ = hex[(val >> 8) & 0xF]; |
1362 | 1 | *out++ = hex[(val >> 4) & 0xF]; |
1363 | 1 | *out++ = hex[ val & 0xF]; |
1364 | | |
1365 | 1 | return 4; |
1366 | 1 | } |
1367 | | |
1368 | | bool json_check(const JsonNode *node, char errmsg[256]) |
1369 | 0 | { |
1370 | 0 | #define problem(...) do { \ |
1371 | 0 | if (errmsg != NULL) \ |
1372 | 0 | snprintf(errmsg, 256, __VA_ARGS__); \ |
1373 | 0 | return false; \ |
1374 | 0 | } while (0) |
1375 | |
|
1376 | 0 | if (node->key != NULL && !utf8_validate(node->key)) |
1377 | 0 | problem("key contains invalid UTF-8"); |
1378 | | |
1379 | 0 | if (!tag_is_valid(node->tag)) |
1380 | 0 | problem("tag is invalid (%u)", node->tag); |
1381 | | |
1382 | 0 | if (node->tag == JSON_BOOL) { |
1383 | 0 | if (node->bool_ != false && node->bool_ != true) |
1384 | 0 | problem("bool_ is neither false (%d) nor true (%d)", (int)false, (int)true); |
1385 | 0 | } else if (node->tag == JSON_STRING) { |
1386 | 0 | if (node->string_ == NULL) |
1387 | 0 | problem("string_ is NULL"); |
1388 | 0 | if (!utf8_validate(node->string_)) |
1389 | 0 | problem("string_ contains invalid UTF-8"); |
1390 | 0 | } else if (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT) { |
1391 | 0 | JsonNode *head = node->children.head; |
1392 | 0 | JsonNode *tail = node->children.tail; |
1393 | |
|
1394 | 0 | if (head == NULL || tail == NULL) { |
1395 | 0 | if (head != NULL) |
1396 | 0 | problem("tail is NULL, but head is not"); |
1397 | 0 | if (tail != NULL) |
1398 | 0 | problem("head is NULL, but tail is not"); |
1399 | 0 | } else { |
1400 | 0 | JsonNode *child; |
1401 | 0 | JsonNode *last = NULL; |
1402 | |
|
1403 | 0 | if (head->prev != NULL) |
1404 | 0 | problem("First child's prev pointer is not NULL"); |
1405 | | |
1406 | 0 | for (child = head; child != NULL; last = child, child = child->next) { |
1407 | 0 | if (child == node) |
1408 | 0 | problem("node is its own child"); |
1409 | 0 | if (child->next == child) |
1410 | 0 | problem("child->next == child (cycle)"); |
1411 | 0 | if (child->next == head) |
1412 | 0 | problem("child->next == head (cycle)"); |
1413 | | |
1414 | 0 | if (child->parent != node) |
1415 | 0 | problem("child does not point back to parent"); |
1416 | 0 | if (child->next != NULL && child->next->prev != child) |
1417 | 0 | problem("child->next does not point back to child"); |
1418 | | |
1419 | 0 | if (node->tag == JSON_ARRAY && child->key != NULL) |
1420 | 0 | problem("Array element's key is not NULL"); |
1421 | 0 | if (node->tag == JSON_OBJECT && child->key == NULL) |
1422 | 0 | problem("Object member's key is NULL"); |
1423 | | |
1424 | 0 | if (!json_check(child, errmsg)) |
1425 | 0 | return false; |
1426 | 0 | } |
1427 | | |
1428 | 0 | if (last != tail) |
1429 | 0 | problem("tail does not match pointer found by starting at head and following next links"); |
1430 | 0 | } |
1431 | 0 | } |
1432 | | |
1433 | 0 | return true; |
1434 | |
|
1435 | 0 | #undef problem |
1436 | 0 | } |