/src/json-c/json_tokener.c
Line | Count | Source |
1 | | /* |
2 | | * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $ |
3 | | * |
4 | | * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd. |
5 | | * Michael Clark <michael@metaparadigm.com> |
6 | | * |
7 | | * This library is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the MIT license. See COPYING for details. |
9 | | * |
10 | | * |
11 | | * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved. |
12 | | * The copyrights to the contents of this file are licensed under the MIT License |
13 | | * (https://www.opensource.org/licenses/mit-license.php) |
14 | | */ |
15 | | |
16 | | #include "config.h" |
17 | | |
18 | | #include "math_compat.h" |
19 | | #include <assert.h> |
20 | | #include <errno.h> |
21 | | #include <limits.h> |
22 | | #include <math.h> |
23 | | #include <stddef.h> |
24 | | #include <stdio.h> |
25 | | #include <stdlib.h> |
26 | | #include <string.h> |
27 | | |
28 | | #include "debug.h" |
29 | | #include "json_inttypes.h" |
30 | | #include "json_object.h" |
31 | | #include "json_object_private.h" |
32 | | #include "json_tokener.h" |
33 | | #include "json_util.h" |
34 | | #include "printbuf.h" |
35 | | #include "strdup_compat.h" |
36 | | |
37 | | #ifdef HAVE_LOCALE_H |
38 | | #include <locale.h> |
39 | | #endif /* HAVE_LOCALE_H */ |
40 | | #ifdef HAVE_XLOCALE_H |
41 | | #include <xlocale.h> |
42 | | #endif |
43 | | #ifdef HAVE_STRINGS_H |
44 | | #include <strings.h> |
45 | | #endif /* HAVE_STRINGS_H */ |
46 | | |
47 | 12.9k | #define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9) |
48 | | |
49 | | #if !HAVE_STRNCASECMP && defined(_MSC_VER) |
50 | | /* MSC has the version as _strnicmp */ |
51 | | #define strncasecmp _strnicmp |
52 | | #elif !HAVE_STRNCASECMP |
53 | | #error You do not have strncasecmp on your system. |
54 | | #endif /* HAVE_STRNCASECMP */ |
55 | | |
56 | | #if defined(_MSC_VER) && (_MSC_VER <= 1800) |
57 | | /* VS2013 doesn't know about "inline" */ |
58 | | #define inline __inline |
59 | | #elif defined(AIX_CC) |
60 | | #define inline |
61 | | #endif |
62 | | |
63 | | /* The following helper functions are used to speed up parsing. They |
64 | | * are faster than their ctype counterparts because they assume that |
65 | | * the input is in ASCII and that the locale is set to "C". The |
66 | | * compiler will also inline these functions, providing an additional |
67 | | * speedup by saving on function calls. |
68 | | */ |
69 | | static inline int is_ws_char(char c) |
70 | 232k | { |
71 | 232k | return c == ' ' |
72 | 230k | || c == '\t' |
73 | 227k | || c == '\n' |
74 | 226k | || c == '\r'; |
75 | 232k | } |
76 | | |
77 | | static inline int is_hex_char(char c) |
78 | 13.0k | { |
79 | 13.0k | return (c >= '0' && c <= '9') |
80 | 9.11k | || (c >= 'A' && c <= 'F') |
81 | 5.98k | || (c >= 'a' && c <= 'f'); |
82 | 13.0k | } |
83 | | |
84 | | /* Use C99 NAN by default; if not available, nan("") should work too. */ |
85 | | #ifndef NAN |
86 | | #define NAN nan("") |
87 | | #endif /* !NAN */ |
88 | | |
89 | | static const char json_null_str[] = "null"; |
90 | | static const int json_null_str_len = sizeof(json_null_str) - 1; |
91 | | static const char json_inf_str[] = "Infinity"; |
92 | | /* Swapped case "Infinity" to avoid need to call tolower() on input chars: */ |
93 | | static const char json_inf_str_invert[] = "iNFINITY"; |
94 | | static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1; |
95 | | static const char json_nan_str[] = "NaN"; |
96 | | static const int json_nan_str_len = sizeof(json_nan_str) - 1; |
97 | | static const char json_true_str[] = "true"; |
98 | | static const int json_true_str_len = sizeof(json_true_str) - 1; |
99 | | static const char json_false_str[] = "false"; |
100 | | static const int json_false_str_len = sizeof(json_false_str) - 1; |
101 | | |
102 | | /* clang-format off */ |
103 | | static const char *json_tokener_errors[] = { |
104 | | "success", |
105 | | "continue", |
106 | | "nesting too deep", |
107 | | "unexpected end of data", |
108 | | "unexpected character", |
109 | | "null expected", |
110 | | "boolean expected", |
111 | | "number expected", |
112 | | "array value separator ',' expected", |
113 | | "quoted object property name expected", |
114 | | "object property name separator ':' expected", |
115 | | "object value separator ',' expected", |
116 | | "invalid string sequence", |
117 | | "expected comment", |
118 | | "invalid utf-8 string", |
119 | | "buffer size overflow", |
120 | | "out of memory" |
121 | | }; |
122 | | /* clang-format on */ |
123 | | |
124 | | /** |
125 | | * validete the utf-8 string in strict model. |
126 | | * if not utf-8 format, return err. |
127 | | */ |
128 | | static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes); |
129 | | |
130 | | static int json_tokener_parse_double(const char *buf, int len, double *retval); |
131 | | |
132 | | const char *json_tokener_error_desc(enum json_tokener_error jerr) |
133 | 0 | { |
134 | 0 | int jerr_int = (int)jerr; |
135 | 0 | if (jerr_int < 0 || |
136 | 0 | jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0]))) |
137 | 0 | return "Unknown error, " |
138 | 0 | "invalid json_tokener_error value passed to json_tokener_error_desc()"; |
139 | 0 | return json_tokener_errors[jerr]; |
140 | 0 | } |
141 | | |
142 | | enum json_tokener_error json_tokener_get_error(struct json_tokener *tok) |
143 | 0 | { |
144 | 0 | return tok->err; |
145 | 0 | } |
146 | | |
147 | | /* Stuff for decoding unicode sequences */ |
148 | 2.64k | #define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800) |
149 | 1.98k | #define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00) |
150 | 409 | #define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000) |
151 | | static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD}; |
152 | | |
153 | | struct json_tokener *json_tokener_new_ex(int depth) |
154 | 2.24k | { |
155 | 2.24k | struct json_tokener *tok; |
156 | | |
157 | 2.24k | tok = (struct json_tokener *)calloc(1, sizeof(struct json_tokener)); |
158 | 2.24k | if (!tok) |
159 | 0 | return NULL; |
160 | 2.24k | tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec)); |
161 | 2.24k | if (!tok->stack) |
162 | 0 | { |
163 | 0 | free(tok); |
164 | 0 | return NULL; |
165 | 0 | } |
166 | 2.24k | tok->pb = printbuf_new(); |
167 | 2.24k | if (!tok->pb) |
168 | 0 | { |
169 | 0 | free(tok->stack); |
170 | 0 | free(tok); |
171 | 0 | return NULL; |
172 | 0 | } |
173 | 2.24k | tok->max_depth = depth; |
174 | 2.24k | json_tokener_reset(tok); |
175 | 2.24k | return tok; |
176 | 2.24k | } |
177 | | |
178 | | struct json_tokener *json_tokener_new(void) |
179 | 2.24k | { |
180 | 2.24k | return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH); |
181 | 2.24k | } |
182 | | |
183 | | void json_tokener_free(struct json_tokener *tok) |
184 | 2.24k | { |
185 | 2.24k | json_tokener_reset(tok); |
186 | 2.24k | if (tok->pb) |
187 | 2.24k | printbuf_free(tok->pb); |
188 | 2.24k | free(tok->stack); |
189 | 2.24k | free(tok); |
190 | 2.24k | } |
191 | | |
192 | | static void json_tokener_reset_level(struct json_tokener *tok, int depth) |
193 | 94.4k | { |
194 | 94.4k | tok->stack[depth].state = json_tokener_state_eatws; |
195 | 94.4k | tok->stack[depth].saved_state = json_tokener_state_start; |
196 | 94.4k | json_object_put(tok->stack[depth].current); |
197 | 94.4k | tok->stack[depth].current = NULL; |
198 | 94.4k | free(tok->stack[depth].obj_field_name); |
199 | 94.4k | tok->stack[depth].obj_field_name = NULL; |
200 | 94.4k | } |
201 | | |
202 | | void json_tokener_reset(struct json_tokener *tok) |
203 | 4.49k | { |
204 | 4.49k | int i; |
205 | 4.49k | if (!tok) |
206 | 0 | return; |
207 | | |
208 | 10.9k | for (i = tok->depth; i >= 0; i--) |
209 | 6.46k | json_tokener_reset_level(tok, i); |
210 | 4.49k | tok->depth = 0; |
211 | 4.49k | tok->err = json_tokener_success; |
212 | 4.49k | } |
213 | | |
214 | | struct json_object *json_tokener_parse(const char *str) |
215 | 2.24k | { |
216 | 2.24k | enum json_tokener_error jerr_ignored; |
217 | 2.24k | struct json_object *obj; |
218 | 2.24k | obj = json_tokener_parse_verbose(str, &jerr_ignored); |
219 | 2.24k | return obj; |
220 | 2.24k | } |
221 | | |
222 | | struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokener_error *error) |
223 | 2.24k | { |
224 | 2.24k | struct json_tokener *tok; |
225 | 2.24k | struct json_object *obj; |
226 | | |
227 | 2.24k | tok = json_tokener_new(); |
228 | 2.24k | if (!tok) |
229 | 0 | { |
230 | 0 | *error = json_tokener_error_memory; |
231 | 0 | return NULL; |
232 | 0 | } |
233 | 2.24k | obj = json_tokener_parse_ex(tok, str, -1); |
234 | 2.24k | *error = tok->err; |
235 | 2.24k | if (tok->err != json_tokener_success |
236 | | #if 0 |
237 | | /* This would be a more sensible default, and cause parsing |
238 | | * things like "null123" to fail when the caller can't know |
239 | | * where the parsing left off, but starting to fail would |
240 | | * be a notable behaviour change. Save for a 1.0 release. |
241 | | */ |
242 | | || json_tokener_get_parse_end(tok) != strlen(str) |
243 | | #endif |
244 | 2.24k | ) |
245 | | |
246 | 1.86k | { |
247 | 1.86k | if (obj != NULL) |
248 | 0 | json_object_put(obj); |
249 | 1.86k | obj = NULL; |
250 | 1.86k | } |
251 | | |
252 | 2.24k | json_tokener_free(tok); |
253 | 2.24k | return obj; |
254 | 2.24k | } |
255 | | |
256 | 1.09M | #define state tok->stack[tok->depth].state |
257 | 405k | #define saved_state tok->stack[tok->depth].saved_state |
258 | 189k | #define current tok->stack[tok->depth].current |
259 | 75.2k | #define obj_field_name tok->stack[tok->depth].obj_field_name |
260 | | |
261 | | /* Optimization: |
262 | | * json_tokener_parse_ex() consumed a lot of CPU in its main loop, |
263 | | * iterating character-by character. A large performance boost is |
264 | | * achieved by using tighter loops to locally handle units such as |
265 | | * comments and strings. Loops that handle an entire token within |
266 | | * their scope also gather entire strings and pass them to |
267 | | * printbuf_memappend() in a single call, rather than calling |
268 | | * printbuf_memappend() one char at a time. |
269 | | * |
270 | | * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is |
271 | | * common to both the main loop and the tighter loops. |
272 | | */ |
273 | | |
274 | | /* PEEK_CHAR(dest, tok) macro: |
275 | | * Peeks at the current char and stores it in dest. |
276 | | * Returns 1 on success, sets tok->err and returns 0 if no more chars. |
277 | | * Implicit inputs: str, len, nBytesp vars |
278 | | */ |
279 | | #define PEEK_CHAR(dest, tok) \ |
280 | 575k | (((tok)->char_offset == len) \ |
281 | 575k | ? (((tok)->depth == 0 && state == json_tokener_state_eatws && \ |
282 | 0 | saved_state == json_tokener_state_finish) \ |
283 | 0 | ? (((tok)->err = json_tokener_success), 0) \ |
284 | 0 | : (((tok)->err = json_tokener_continue), 0)) \ |
285 | 575k | : (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && \ |
286 | 575k | (!json_tokener_validate_utf8(*str, nBytesp))) \ |
287 | 575k | ? ((tok->err = json_tokener_error_parse_utf8_string), 0) \ |
288 | 575k | : (((dest) = *str), 1))) |
289 | | |
290 | | /* ADVANCE_CHAR() macro: |
291 | | * Increments str & tok->char_offset. |
292 | | * For convenience of existing conditionals, returns the old value of c (0 on eof). |
293 | | * Implicit inputs: c var |
294 | | */ |
295 | 966k | #define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c) |
296 | | |
297 | | /* printbuf_memappend_checked(p, s, l) macro: |
298 | | * Add string s of length l to printbuffer p. |
299 | | * If operation fails abort parse operation with memory error. |
300 | | */ |
301 | | #define printbuf_memappend_checked(p, s, l) \ |
302 | 79.8k | do { \ |
303 | 79.8k | if (printbuf_memappend((p), (s), (l)) < 0) \ |
304 | 79.8k | { \ |
305 | 0 | tok->err = json_tokener_error_memory; \ |
306 | 0 | goto out; \ |
307 | 0 | } \ |
308 | 79.8k | } while (0) |
309 | | |
310 | | /* End optimization macro defs */ |
311 | | |
312 | | struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len) |
313 | 2.24k | { |
314 | 2.24k | struct json_object *obj = NULL; |
315 | 2.24k | char c = '\1'; |
316 | 2.24k | unsigned int nBytes = 0; |
317 | 2.24k | unsigned int *nBytesp = &nBytes; |
318 | | |
319 | 2.24k | #ifdef HAVE_USELOCALE |
320 | 2.24k | locale_t oldlocale = uselocale(NULL); |
321 | 2.24k | locale_t newloc; |
322 | | #elif defined(HAVE_SETLOCALE) |
323 | | char *oldlocale = NULL; |
324 | | #endif |
325 | | |
326 | 2.24k | tok->char_offset = 0; |
327 | 2.24k | tok->err = json_tokener_success; |
328 | | |
329 | | /* this interface is presently not 64-bit clean due to the int len argument |
330 | | * and the internal printbuf interface that takes 32-bit int len arguments |
331 | | * so the function limits the maximum string size to INT32_MAX (2GB). |
332 | | * If the function is called with len == -1 then strlen is called to check |
333 | | * the string length is less than INT32_MAX (2GB) |
334 | | */ |
335 | 2.24k | if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX)) |
336 | 0 | { |
337 | 0 | tok->err = json_tokener_error_size; |
338 | 0 | return NULL; |
339 | 0 | } |
340 | | |
341 | 2.24k | #ifdef HAVE_USELOCALE |
342 | 2.24k | { |
343 | 2.24k | locale_t duploc = duplocale(oldlocale); |
344 | 2.24k | if (duploc == NULL && errno == ENOMEM) |
345 | 0 | { |
346 | 0 | tok->err = json_tokener_error_memory; |
347 | 0 | return NULL; |
348 | 0 | } |
349 | 2.24k | newloc = newlocale(LC_NUMERIC_MASK, "C", duploc); |
350 | 2.24k | if (newloc == NULL) |
351 | 0 | { |
352 | 0 | tok->err = json_tokener_error_memory; |
353 | 0 | freelocale(duploc); |
354 | 0 | return NULL; |
355 | 0 | } |
356 | | #ifdef NEWLOCALE_NEEDS_FREELOCALE |
357 | | // Older versions of FreeBSD (<12.4) don't free the locale |
358 | | // passed to newlocale(), so do it here |
359 | | freelocale(duploc); |
360 | | #endif |
361 | 2.24k | uselocale(newloc); |
362 | 2.24k | } |
363 | | #elif defined(HAVE_SETLOCALE) |
364 | | { |
365 | | char *tmplocale; |
366 | | tmplocale = setlocale(LC_NUMERIC, NULL); |
367 | | if (tmplocale) |
368 | | { |
369 | | oldlocale = strdup(tmplocale); |
370 | | if (oldlocale == NULL) |
371 | | { |
372 | | tok->err = json_tokener_error_memory; |
373 | | return NULL; |
374 | | } |
375 | | } |
376 | | setlocale(LC_NUMERIC, "C"); |
377 | | } |
378 | | #endif |
379 | | |
380 | 166k | while (PEEK_CHAR(c, tok)) // Note: c might be '\0' ! |
381 | 166k | { |
382 | | |
383 | 570k | redo_char: |
384 | 570k | switch (state) |
385 | 570k | { |
386 | | |
387 | 224k | case json_tokener_state_eatws: |
388 | | /* Advance until we change state */ |
389 | 230k | while (is_ws_char(c)) |
390 | 5.90k | { |
391 | 5.90k | if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok))) |
392 | 0 | goto out; |
393 | 5.90k | } |
394 | 224k | if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) |
395 | 3.58k | { |
396 | 3.58k | printbuf_reset(tok->pb); |
397 | 3.58k | printbuf_memappend_checked(tok->pb, &c, 1); |
398 | 3.58k | state = json_tokener_state_comment_start; |
399 | 3.58k | } |
400 | 220k | else |
401 | 220k | { |
402 | 220k | state = saved_state; |
403 | 220k | goto redo_char; |
404 | 220k | } |
405 | 3.58k | break; |
406 | | |
407 | 46.7k | case json_tokener_state_start: |
408 | 46.7k | switch (c) |
409 | 46.7k | { |
410 | 4.02k | case '{': |
411 | 4.02k | state = json_tokener_state_eatws; |
412 | 4.02k | saved_state = json_tokener_state_object_field_start; |
413 | 4.02k | current = json_object_new_object(); |
414 | 4.02k | if (current == NULL) |
415 | 0 | { |
416 | 0 | tok->err = json_tokener_error_memory; |
417 | 0 | goto out; |
418 | 0 | } |
419 | 4.02k | break; |
420 | 14.3k | case '[': |
421 | 14.3k | state = json_tokener_state_eatws; |
422 | 14.3k | saved_state = json_tokener_state_array; |
423 | 14.3k | current = json_object_new_array(); |
424 | 14.3k | if (current == NULL) |
425 | 0 | { |
426 | 0 | tok->err = json_tokener_error_memory; |
427 | 0 | goto out; |
428 | 0 | } |
429 | 14.3k | break; |
430 | 14.3k | case 'I': |
431 | 78 | case 'i': |
432 | 78 | state = json_tokener_state_inf; |
433 | 78 | printbuf_reset(tok->pb); |
434 | 78 | tok->st_pos = 0; |
435 | 78 | goto redo_char; |
436 | 751 | case 'N': |
437 | 1.50k | case 'n': |
438 | 1.50k | state = json_tokener_state_null; // or NaN |
439 | 1.50k | printbuf_reset(tok->pb); |
440 | 1.50k | tok->st_pos = 0; |
441 | 1.50k | goto redo_char; |
442 | 1.49k | case '\'': |
443 | 1.49k | if (tok->flags & JSON_TOKENER_STRICT) |
444 | 0 | { |
445 | | /* in STRICT mode only double-quote are allowed */ |
446 | 0 | tok->err = json_tokener_error_parse_unexpected; |
447 | 0 | goto out; |
448 | 0 | } |
449 | | /* FALLTHRU */ |
450 | 2.02k | case '"': |
451 | 2.02k | state = json_tokener_state_string; |
452 | 2.02k | printbuf_reset(tok->pb); |
453 | 2.02k | tok->quote_char = c; |
454 | 2.02k | break; |
455 | 258 | case 'T': |
456 | 751 | case 't': |
457 | 1.16k | case 'F': |
458 | 1.63k | case 'f': |
459 | 1.63k | state = json_tokener_state_boolean; |
460 | 1.63k | printbuf_reset(tok->pb); |
461 | 1.63k | tok->st_pos = 0; |
462 | 1.63k | goto redo_char; |
463 | 3.17k | case '0': |
464 | 4.58k | case '1': |
465 | 6.23k | case '2': |
466 | 7.16k | case '3': |
467 | 9.13k | case '4': |
468 | 13.0k | case '5': |
469 | 15.1k | case '6': |
470 | 15.9k | case '7': |
471 | 17.2k | case '8': |
472 | 21.0k | case '9': |
473 | 22.9k | case '-': |
474 | 22.9k | state = json_tokener_state_number; |
475 | 22.9k | printbuf_reset(tok->pb); |
476 | 22.9k | tok->is_double = 0; |
477 | 22.9k | goto redo_char; |
478 | 226 | default: tok->err = json_tokener_error_parse_unexpected; goto out; |
479 | 46.7k | } |
480 | 20.3k | break; |
481 | | |
482 | 43.1k | case json_tokener_state_finish: |
483 | 43.1k | if (tok->depth == 0) |
484 | 360 | goto out; |
485 | 42.7k | obj = json_object_get(current); |
486 | 42.7k | json_tokener_reset_level(tok, tok->depth); |
487 | 42.7k | tok->depth--; |
488 | 42.7k | goto redo_char; |
489 | | |
490 | 119 | case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */ |
491 | 119 | { |
492 | | /* If we were guaranteed to have len set, then we could (usually) handle |
493 | | * the entire "Infinity" check in a single strncmp (strncasecmp), but |
494 | | * since len might be -1 (i.e. "read until \0"), we need to check it |
495 | | * a character at a time. |
496 | | * Trying to handle it both ways would make this code considerably more |
497 | | * complicated with likely little performance benefit. |
498 | | */ |
499 | 119 | int is_negative = 0; |
500 | | |
501 | | /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */ |
502 | 755 | while (tok->st_pos < (int)json_inf_str_len) |
503 | 687 | { |
504 | 687 | char inf_char = *str; |
505 | 687 | if (inf_char != json_inf_str[tok->st_pos] && |
506 | 372 | ((tok->flags & JSON_TOKENER_STRICT) || |
507 | 372 | inf_char != json_inf_str_invert[tok->st_pos]) |
508 | 687 | ) |
509 | 51 | { |
510 | 51 | tok->err = json_tokener_error_parse_unexpected; |
511 | 51 | goto out; |
512 | 51 | } |
513 | 636 | tok->st_pos++; |
514 | 636 | (void)ADVANCE_CHAR(str, tok); |
515 | 636 | if (!PEEK_CHAR(c, tok)) |
516 | 0 | { |
517 | | /* out of input chars, for now at least */ |
518 | 0 | goto out; |
519 | 0 | } |
520 | 636 | } |
521 | | /* We checked the full length of "Infinity", so create the object. |
522 | | * When handling -Infinity, the number parsing code will have dropped |
523 | | * the "-" into tok->pb for us, so check it now. |
524 | | */ |
525 | 68 | if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-') |
526 | 35 | { |
527 | 35 | is_negative = 1; |
528 | 35 | } |
529 | 68 | current = json_object_new_double(is_negative ? -INFINITY : INFINITY); |
530 | 68 | if (current == NULL) |
531 | 0 | { |
532 | 0 | tok->err = json_tokener_error_memory; |
533 | 0 | goto out; |
534 | 0 | } |
535 | 68 | saved_state = json_tokener_state_finish; |
536 | 68 | state = json_tokener_state_eatws; |
537 | 68 | goto redo_char; |
538 | 68 | } |
539 | 0 | break; |
540 | 6.84k | case json_tokener_state_null: /* aka starts with 'n' */ |
541 | 6.84k | { |
542 | 6.84k | int size; |
543 | 6.84k | int size_nan; |
544 | 6.84k | printbuf_memappend_checked(tok->pb, &c, 1); |
545 | 6.84k | size = json_min(tok->st_pos + 1, json_null_str_len); |
546 | 6.84k | size_nan = json_min(tok->st_pos + 1, json_nan_str_len); |
547 | 6.84k | if ((!(tok->flags & JSON_TOKENER_STRICT) && |
548 | 6.84k | strncasecmp(json_null_str, tok->pb->buf, size) == 0) || |
549 | 1.66k | (strncmp(json_null_str, tok->pb->buf, size) == 0)) |
550 | 5.17k | { |
551 | 5.17k | if (tok->st_pos == json_null_str_len) |
552 | 907 | { |
553 | 907 | current = NULL; |
554 | 907 | saved_state = json_tokener_state_finish; |
555 | 907 | state = json_tokener_state_eatws; |
556 | 907 | goto redo_char; |
557 | 907 | } |
558 | 5.17k | } |
559 | 1.66k | else if ((!(tok->flags & JSON_TOKENER_STRICT) && |
560 | 1.66k | strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) || |
561 | 74 | (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0)) |
562 | 1.59k | { |
563 | 1.59k | if (tok->st_pos == json_nan_str_len) |
564 | 527 | { |
565 | 527 | current = json_object_new_double(NAN); |
566 | 527 | if (current == NULL) |
567 | 0 | { |
568 | 0 | tok->err = json_tokener_error_memory; |
569 | 0 | goto out; |
570 | 0 | } |
571 | 527 | saved_state = json_tokener_state_finish; |
572 | 527 | state = json_tokener_state_eatws; |
573 | 527 | goto redo_char; |
574 | 527 | } |
575 | 1.59k | } |
576 | 74 | else |
577 | 74 | { |
578 | 74 | tok->err = json_tokener_error_parse_null; |
579 | 74 | goto out; |
580 | 74 | } |
581 | 5.33k | tok->st_pos++; |
582 | 5.33k | } |
583 | 0 | break; |
584 | | |
585 | 3.58k | case json_tokener_state_comment_start: |
586 | 3.58k | if (c == '*') |
587 | 353 | { |
588 | 353 | state = json_tokener_state_comment; |
589 | 353 | } |
590 | 3.23k | else if (c == '/') |
591 | 3.19k | { |
592 | 3.19k | state = json_tokener_state_comment_eol; |
593 | 3.19k | } |
594 | 32 | else |
595 | 32 | { |
596 | 32 | tok->err = json_tokener_error_parse_comment; |
597 | 32 | goto out; |
598 | 32 | } |
599 | 3.55k | printbuf_memappend_checked(tok->pb, &c, 1); |
600 | 3.55k | break; |
601 | | |
602 | 3.55k | case json_tokener_state_comment: |
603 | 1.93k | { |
604 | | /* Advance until we change state */ |
605 | 1.93k | const char *case_start = str; |
606 | 83.4k | while (c != '*') |
607 | 81.6k | { |
608 | 81.6k | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
609 | 107 | { |
610 | 107 | printbuf_memappend_checked(tok->pb, case_start, |
611 | 107 | str - case_start); |
612 | 107 | goto out; |
613 | 107 | } |
614 | 81.6k | } |
615 | 1.82k | printbuf_memappend_checked(tok->pb, case_start, 1 + str - case_start); |
616 | 1.82k | state = json_tokener_state_comment_end; |
617 | 1.82k | } |
618 | 0 | break; |
619 | | |
620 | 3.19k | case json_tokener_state_comment_eol: |
621 | 3.19k | { |
622 | | /* Advance until we change state */ |
623 | 3.19k | const char *case_start = str; |
624 | 31.5k | while (c != '\n') |
625 | 28.3k | { |
626 | 28.3k | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
627 | 50 | { |
628 | 50 | printbuf_memappend_checked(tok->pb, case_start, |
629 | 50 | str - case_start); |
630 | 50 | goto out; |
631 | 50 | } |
632 | 28.3k | } |
633 | 3.14k | printbuf_memappend_checked(tok->pb, case_start, str - case_start); |
634 | 3.14k | MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); |
635 | 3.14k | state = json_tokener_state_eatws; |
636 | 3.14k | } |
637 | 0 | break; |
638 | | |
639 | 1.82k | case json_tokener_state_comment_end: |
640 | 1.82k | printbuf_memappend_checked(tok->pb, &c, 1); |
641 | 1.82k | if (c == '/') |
642 | 222 | { |
643 | 222 | MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); |
644 | 222 | state = json_tokener_state_eatws; |
645 | 222 | } |
646 | 1.60k | else |
647 | 1.60k | { |
648 | 1.60k | state = json_tokener_state_comment; |
649 | 1.60k | } |
650 | 1.82k | break; |
651 | | |
652 | 4.82k | case json_tokener_state_string: |
653 | 4.82k | { |
654 | | /* Advance until we change state */ |
655 | 4.82k | const char *case_start = str; |
656 | 50.7k | while (1) |
657 | 50.7k | { |
658 | 50.7k | if (c == tok->quote_char) |
659 | 1.66k | { |
660 | 1.66k | printbuf_memappend_checked(tok->pb, case_start, |
661 | 1.66k | str - case_start); |
662 | 1.66k | current = |
663 | 1.66k | json_object_new_string_len(tok->pb->buf, tok->pb->bpos); |
664 | 1.66k | if (current == NULL) |
665 | 0 | { |
666 | 0 | tok->err = json_tokener_error_memory; |
667 | 0 | goto out; |
668 | 0 | } |
669 | 1.66k | saved_state = json_tokener_state_finish; |
670 | 1.66k | state = json_tokener_state_eatws; |
671 | 1.66k | break; |
672 | 1.66k | } |
673 | 49.1k | else if (c == '\\') |
674 | 2.89k | { |
675 | 2.89k | printbuf_memappend_checked(tok->pb, case_start, |
676 | 2.89k | str - case_start); |
677 | 2.89k | saved_state = json_tokener_state_string; |
678 | 2.89k | state = json_tokener_state_string_escape; |
679 | 2.89k | break; |
680 | 2.89k | } |
681 | 46.2k | else if ((tok->flags & JSON_TOKENER_STRICT) && c <= 0x1f) |
682 | 0 | { |
683 | | // Disallow control characters in strict mode |
684 | 0 | tok->err = json_tokener_error_parse_string; |
685 | 0 | goto out; |
686 | 0 | } |
687 | 46.2k | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
688 | 263 | { |
689 | 263 | printbuf_memappend_checked(tok->pb, case_start, |
690 | 263 | str - case_start); |
691 | 263 | goto out; |
692 | 263 | } |
693 | 46.2k | } |
694 | 4.82k | } |
695 | 4.56k | break; |
696 | | |
697 | 4.88k | case json_tokener_state_string_escape: |
698 | 4.88k | switch (c) |
699 | 4.88k | { |
700 | 224 | case '"': |
701 | 620 | case '\\': |
702 | 871 | case '/': |
703 | 871 | printbuf_memappend_checked(tok->pb, &c, 1); |
704 | 871 | state = saved_state; |
705 | 871 | break; |
706 | 331 | case 'b': |
707 | 702 | case 'n': |
708 | 935 | case 'r': |
709 | 1.23k | case 't': |
710 | 1.53k | case 'f': |
711 | 1.53k | if (c == 'b') |
712 | 331 | printbuf_memappend_checked(tok->pb, "\b", 1); |
713 | 1.20k | else if (c == 'n') |
714 | 371 | printbuf_memappend_checked(tok->pb, "\n", 1); |
715 | 830 | else if (c == 'r') |
716 | 233 | printbuf_memappend_checked(tok->pb, "\r", 1); |
717 | 597 | else if (c == 't') |
718 | 298 | printbuf_memappend_checked(tok->pb, "\t", 1); |
719 | 299 | else if (c == 'f') |
720 | 299 | printbuf_memappend_checked(tok->pb, "\f", 1); |
721 | 1.53k | state = saved_state; |
722 | 1.53k | break; |
723 | 2.42k | case 'u': |
724 | 2.42k | tok->ucs_char = 0; |
725 | 2.42k | tok->st_pos = 0; |
726 | 2.42k | state = json_tokener_state_escape_unicode; |
727 | 2.42k | break; |
728 | 60 | default: tok->err = json_tokener_error_parse_string; goto out; |
729 | 4.88k | } |
730 | 4.82k | break; |
731 | | |
732 | | // =================================================== |
733 | | |
734 | 4.82k | case json_tokener_state_escape_unicode: |
735 | 3.30k | { |
736 | | /* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */ |
737 | 13.0k | while (1) |
738 | 13.0k | { |
739 | 13.0k | if (!c || !is_hex_char(c)) |
740 | 76 | { |
741 | 76 | tok->err = json_tokener_error_parse_string; |
742 | 76 | goto out; |
743 | 76 | } |
744 | 12.9k | tok->ucs_char |= |
745 | 12.9k | ((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4)); |
746 | 12.9k | tok->st_pos++; |
747 | 12.9k | if (tok->st_pos >= 4) |
748 | 3.23k | break; |
749 | | |
750 | 9.75k | (void)ADVANCE_CHAR(str, tok); |
751 | 9.75k | if (!PEEK_CHAR(c, tok)) |
752 | 0 | { |
753 | | /* |
754 | | * We're out of characters in the current call to |
755 | | * json_tokener_parse(), but a subsequent call might |
756 | | * provide us with more, so leave our current state |
757 | | * as-is (including tok->high_surrogate) and return. |
758 | | */ |
759 | 0 | goto out; |
760 | 0 | } |
761 | 9.75k | } |
762 | 3.23k | tok->st_pos = 0; |
763 | | |
764 | | /* Now, we have a full \uNNNN sequence in tok->ucs_char */ |
765 | | |
766 | | /* If the *previous* sequence was a high surrogate ... */ |
767 | 3.23k | if (tok->high_surrogate) |
768 | 873 | { |
769 | 873 | if (IS_LOW_SURROGATE(tok->ucs_char)) |
770 | 409 | { |
771 | | /* Recalculate the ucs_char, then fall thru to process normally */ |
772 | 409 | tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate, |
773 | 409 | tok->ucs_char); |
774 | 409 | } |
775 | 464 | else |
776 | 464 | { |
777 | | /* High surrogate was not followed by a low surrogate |
778 | | * Replace the high and process the rest normally |
779 | | */ |
780 | 464 | printbuf_memappend_checked(tok->pb, |
781 | 464 | (char *)utf8_replacement_char, 3); |
782 | 464 | } |
783 | 873 | tok->high_surrogate = 0; |
784 | 873 | } |
785 | | |
786 | 3.23k | if (tok->ucs_char < 0x80) |
787 | 302 | { |
788 | 302 | unsigned char unescaped_utf[1]; |
789 | 302 | unescaped_utf[0] = tok->ucs_char; |
790 | 302 | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 1); |
791 | 302 | } |
792 | 2.92k | else if (tok->ucs_char < 0x800) |
793 | 282 | { |
794 | 282 | unsigned char unescaped_utf[2]; |
795 | 282 | unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6); |
796 | 282 | unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f); |
797 | 282 | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 2); |
798 | 282 | } |
799 | 2.64k | else if (IS_HIGH_SURROGATE(tok->ucs_char)) |
800 | 1.53k | { |
801 | | /* |
802 | | * The next two characters should be \u, HOWEVER, |
803 | | * we can't simply peek ahead here, because the |
804 | | * characters we need might not be passed to us |
805 | | * until a subsequent call to json_tokener_parse. |
806 | | * Instead, transition through a couple of states. |
807 | | * (now): |
808 | | * _escape_unicode => _unicode_need_escape |
809 | | * (see a '\\' char): |
810 | | * _unicode_need_escape => _unicode_need_u |
811 | | * (see a 'u' char): |
812 | | * _unicode_need_u => _escape_unicode |
813 | | * ...and we'll end up back around here. |
814 | | */ |
815 | 1.53k | tok->high_surrogate = tok->ucs_char; |
816 | 1.53k | tok->ucs_char = 0; |
817 | 1.53k | state = json_tokener_state_escape_unicode_need_escape; |
818 | 1.53k | break; |
819 | 1.53k | } |
820 | 1.11k | else if (IS_LOW_SURROGATE(tok->ucs_char)) |
821 | 340 | { |
822 | | /* Got a low surrogate not preceded by a high */ |
823 | 340 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
824 | 340 | } |
825 | 771 | else if (tok->ucs_char < 0x10000) |
826 | 368 | { |
827 | 368 | unsigned char unescaped_utf[3]; |
828 | 368 | unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12); |
829 | 368 | unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); |
830 | 368 | unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f); |
831 | 368 | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 3); |
832 | 368 | } |
833 | 403 | else if (tok->ucs_char < 0x110000) |
834 | 403 | { |
835 | 403 | unsigned char unescaped_utf[4]; |
836 | 403 | unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07); |
837 | 403 | unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f); |
838 | 403 | unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); |
839 | 403 | unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f); |
840 | 403 | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 4); |
841 | 403 | } |
842 | 0 | else |
843 | 0 | { |
844 | | /* Don't know what we got--insert the replacement char */ |
845 | 0 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
846 | 0 | } |
847 | 1.69k | state = saved_state; // i.e. _state_string or _state_object_field |
848 | 1.69k | } |
849 | 0 | break; |
850 | | |
851 | 1.53k | case json_tokener_state_escape_unicode_need_escape: |
852 | | // We get here after processing a high_surrogate |
853 | | // require a '\\' char |
854 | 1.53k | if (!c || c != '\\') |
855 | 378 | { |
856 | | /* Got a high surrogate without another sequence following |
857 | | * it. Put a replacement char in for the high surrogate |
858 | | * and pop back up to _state_string or _state_object_field. |
859 | | */ |
860 | 378 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
861 | 378 | tok->high_surrogate = 0; |
862 | 378 | tok->ucs_char = 0; |
863 | 378 | tok->st_pos = 0; |
864 | 378 | state = saved_state; |
865 | 378 | goto redo_char; |
866 | 378 | } |
867 | 1.15k | state = json_tokener_state_escape_unicode_need_u; |
868 | 1.15k | break; |
869 | | |
870 | 1.15k | case json_tokener_state_escape_unicode_need_u: |
871 | | /* We already had a \ char, check that it's \u */ |
872 | 1.15k | if (!c || c != 'u') |
873 | 273 | { |
874 | | /* Got a high surrogate with some non-unicode escape |
875 | | * sequence following it. |
876 | | * Put a replacement char in for the high surrogate |
877 | | * and handle the escape sequence normally. |
878 | | */ |
879 | 273 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
880 | 273 | tok->high_surrogate = 0; |
881 | 273 | tok->ucs_char = 0; |
882 | 273 | tok->st_pos = 0; |
883 | 273 | state = json_tokener_state_string_escape; |
884 | 273 | goto redo_char; |
885 | 273 | } |
886 | 885 | state = json_tokener_state_escape_unicode; |
887 | 885 | break; |
888 | | |
889 | | // =================================================== |
890 | | |
891 | 8.71k | case json_tokener_state_boolean: |
892 | 8.71k | { |
893 | 8.71k | int size1, size2; |
894 | 8.71k | printbuf_memappend_checked(tok->pb, &c, 1); |
895 | 8.71k | size1 = json_min(tok->st_pos + 1, json_true_str_len); |
896 | 8.71k | size2 = json_min(tok->st_pos + 1, json_false_str_len); |
897 | 8.71k | if ((!(tok->flags & JSON_TOKENER_STRICT) && |
898 | 8.71k | strncasecmp(json_true_str, tok->pb->buf, size1) == 0) || |
899 | 5.13k | (strncmp(json_true_str, tok->pb->buf, size1) == 0)) |
900 | 3.58k | { |
901 | 3.58k | if (tok->st_pos == json_true_str_len) |
902 | 699 | { |
903 | 699 | current = json_object_new_boolean(1); |
904 | 699 | if (current == NULL) |
905 | 0 | { |
906 | 0 | tok->err = json_tokener_error_memory; |
907 | 0 | goto out; |
908 | 0 | } |
909 | 699 | saved_state = json_tokener_state_finish; |
910 | 699 | state = json_tokener_state_eatws; |
911 | 699 | goto redo_char; |
912 | 699 | } |
913 | 3.58k | } |
914 | 5.13k | else if ((!(tok->flags & JSON_TOKENER_STRICT) && |
915 | 5.13k | strncasecmp(json_false_str, tok->pb->buf, size2) == 0) || |
916 | 124 | (strncmp(json_false_str, tok->pb->buf, size2) == 0)) |
917 | 5.01k | { |
918 | 5.01k | if (tok->st_pos == json_false_str_len) |
919 | 811 | { |
920 | 811 | current = json_object_new_boolean(0); |
921 | 811 | if (current == NULL) |
922 | 0 | { |
923 | 0 | tok->err = json_tokener_error_memory; |
924 | 0 | goto out; |
925 | 0 | } |
926 | 811 | saved_state = json_tokener_state_finish; |
927 | 811 | state = json_tokener_state_eatws; |
928 | 811 | goto redo_char; |
929 | 811 | } |
930 | 5.01k | } |
931 | 124 | else |
932 | 124 | { |
933 | 124 | tok->err = json_tokener_error_parse_boolean; |
934 | 124 | goto out; |
935 | 124 | } |
936 | 7.08k | tok->st_pos++; |
937 | 7.08k | } |
938 | 0 | break; |
939 | | |
940 | 22.9k | case json_tokener_state_number: |
941 | 22.9k | { |
942 | | /* Advance until we change state */ |
943 | 22.9k | const char *case_start = str; |
944 | 22.9k | int case_len = 0; |
945 | 22.9k | int is_exponent = 0; |
946 | 22.9k | int neg_sign_ok = 1; |
947 | 22.9k | int pos_sign_ok = 0; |
948 | 22.9k | if (printbuf_length(tok->pb) > 0) |
949 | 0 | { |
950 | | /* We don't save all state from the previous incremental parse |
951 | | so we need to re-generate it based on the saved string so far. |
952 | | */ |
953 | 0 | char *e_loc = strchr(tok->pb->buf, 'e'); |
954 | 0 | if (!e_loc) |
955 | 0 | e_loc = strchr(tok->pb->buf, 'E'); |
956 | 0 | if (e_loc) |
957 | 0 | { |
958 | 0 | char *last_saved_char = |
959 | 0 | &tok->pb->buf[printbuf_length(tok->pb) - 1]; |
960 | 0 | is_exponent = 1; |
961 | 0 | pos_sign_ok = neg_sign_ok = 1; |
962 | | /* If the "e" isn't at the end, we can't start with a '-' */ |
963 | 0 | if (e_loc != last_saved_char) |
964 | 0 | { |
965 | 0 | neg_sign_ok = 0; |
966 | 0 | pos_sign_ok = 0; |
967 | 0 | } |
968 | | // else leave it set to 1, i.e. start of the new input |
969 | 0 | } |
970 | 0 | } |
971 | | |
972 | 99.2k | while (c && ((c >= '0' && c <= '9') || |
973 | 30.9k | (!is_exponent && (c == 'e' || c == 'E')) || |
974 | 26.1k | (neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') || |
975 | 23.1k | (!tok->is_double && c == '.'))) |
976 | 76.3k | { |
977 | 76.3k | pos_sign_ok = neg_sign_ok = 0; |
978 | 76.3k | ++case_len; |
979 | | |
980 | | /* non-digit characters checks */ |
981 | | /* note: since the main loop condition to get here was |
982 | | * an input starting with 0-9 or '-', we are |
983 | | * protected from input starting with '.' or |
984 | | * e/E. |
985 | | */ |
986 | 76.3k | switch (c) |
987 | 76.3k | { |
988 | 566 | case '.': |
989 | 566 | tok->is_double = 1; |
990 | 566 | pos_sign_ok = 1; |
991 | 566 | neg_sign_ok = 1; |
992 | 566 | break; |
993 | 1.05k | case 'e': /* FALLTHRU */ |
994 | 4.74k | case 'E': |
995 | 4.74k | is_exponent = 1; |
996 | 4.74k | tok->is_double = 1; |
997 | | /* the exponent part can begin with a negative sign */ |
998 | 4.74k | pos_sign_ok = neg_sign_ok = 1; |
999 | 4.74k | break; |
1000 | 71.0k | default: break; |
1001 | 76.3k | } |
1002 | | |
1003 | 76.3k | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
1004 | 0 | { |
1005 | 0 | printbuf_memappend_checked(tok->pb, case_start, case_len); |
1006 | 0 | goto out; |
1007 | 0 | } |
1008 | 76.3k | } |
1009 | | /* |
1010 | | Now we know c isn't a valid number char, but check whether |
1011 | | it might have been intended to be, and return a potentially |
1012 | | more understandable error right away. |
1013 | | However, if we're at the top-level, use the number as-is |
1014 | | because c can be part of a new object to parse on the |
1015 | | next call to json_tokener_parse(). |
1016 | | */ |
1017 | 22.9k | if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' && |
1018 | 2.36k | c != 'I' && c != 'i' && !is_ws_char(c)) |
1019 | 164 | { |
1020 | 164 | tok->err = json_tokener_error_parse_number; |
1021 | 164 | goto out; |
1022 | 164 | } |
1023 | 22.7k | if (case_len > 0) |
1024 | 22.7k | printbuf_memappend_checked(tok->pb, case_start, case_len); |
1025 | | |
1026 | | // Check for -Infinity |
1027 | 22.7k | if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I')) |
1028 | 41 | { |
1029 | 41 | state = json_tokener_state_inf; |
1030 | 41 | tok->st_pos = 0; |
1031 | 41 | goto redo_char; |
1032 | 41 | } |
1033 | 22.7k | if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT)) |
1034 | 5.27k | { |
1035 | | /* Trim some chars off the end, to allow things |
1036 | | like "123e+" to parse ok. */ |
1037 | 11.0k | while (printbuf_length(tok->pb) > 1) |
1038 | 7.30k | { |
1039 | 7.30k | char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1]; |
1040 | 7.30k | if (last_char != 'e' && last_char != 'E' && |
1041 | 2.74k | last_char != '-' && last_char != '+') |
1042 | 1.57k | { |
1043 | 1.57k | break; |
1044 | 1.57k | } |
1045 | 5.73k | tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0'; |
1046 | 5.73k | printbuf_length(tok->pb)--; |
1047 | 5.73k | } |
1048 | 5.27k | } |
1049 | 22.7k | } |
1050 | 0 | { |
1051 | 22.7k | int64_t num64; |
1052 | 22.7k | uint64_t numuint64; |
1053 | 22.7k | double numd; |
1054 | 22.7k | if (!tok->is_double && tok->pb->buf[0] == '-' && |
1055 | 1.62k | json_parse_int64(tok->pb->buf, &num64) == 0) |
1056 | 1.59k | { |
1057 | 1.59k | if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT)) |
1058 | 0 | { |
1059 | 0 | tok->err = json_tokener_error_parse_number; |
1060 | 0 | goto out; |
1061 | 0 | } |
1062 | 1.59k | current = json_object_new_int64(num64); |
1063 | 1.59k | if (current == NULL) |
1064 | 0 | { |
1065 | 0 | tok->err = json_tokener_error_memory; |
1066 | 0 | goto out; |
1067 | 0 | } |
1068 | 1.59k | } |
1069 | 21.1k | else if (!tok->is_double && tok->pb->buf[0] != '-' && |
1070 | 15.8k | json_parse_uint64(tok->pb->buf, &numuint64) == 0) |
1071 | 15.8k | { |
1072 | 15.8k | if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT)) |
1073 | 0 | { |
1074 | 0 | tok->err = json_tokener_error_parse_number; |
1075 | 0 | goto out; |
1076 | 0 | } |
1077 | 15.8k | if (numuint64 && tok->pb->buf[0] == '0' && |
1078 | 674 | (tok->flags & JSON_TOKENER_STRICT)) |
1079 | 0 | { |
1080 | 0 | tok->err = json_tokener_error_parse_number; |
1081 | 0 | goto out; |
1082 | 0 | } |
1083 | 15.8k | if (numuint64 <= INT64_MAX) |
1084 | 15.5k | { |
1085 | 15.5k | num64 = (uint64_t)numuint64; |
1086 | 15.5k | current = json_object_new_int64(num64); |
1087 | 15.5k | if (current == NULL) |
1088 | 0 | { |
1089 | 0 | tok->err = json_tokener_error_memory; |
1090 | 0 | goto out; |
1091 | 0 | } |
1092 | 15.5k | } |
1093 | 336 | else |
1094 | 336 | { |
1095 | 336 | current = json_object_new_uint64(numuint64); |
1096 | 336 | if (current == NULL) |
1097 | 0 | { |
1098 | 0 | tok->err = json_tokener_error_memory; |
1099 | 0 | goto out; |
1100 | 0 | } |
1101 | 336 | } |
1102 | 15.8k | } |
1103 | 5.29k | else if (tok->is_double && |
1104 | 5.27k | json_tokener_parse_double( |
1105 | 5.27k | tok->pb->buf, printbuf_length(tok->pb), &numd) == 0) |
1106 | 5.26k | { |
1107 | 5.26k | current = json_object_new_double_s(numd, tok->pb->buf); |
1108 | 5.26k | if (current == NULL) |
1109 | 0 | { |
1110 | 0 | tok->err = json_tokener_error_memory; |
1111 | 0 | goto out; |
1112 | 0 | } |
1113 | 5.26k | } |
1114 | 34 | else |
1115 | 34 | { |
1116 | 34 | tok->err = json_tokener_error_parse_number; |
1117 | 34 | goto out; |
1118 | 34 | } |
1119 | 22.7k | saved_state = json_tokener_state_finish; |
1120 | 22.7k | state = json_tokener_state_eatws; |
1121 | 22.7k | goto redo_char; |
1122 | 22.7k | } |
1123 | 0 | break; |
1124 | | |
1125 | 25.5k | case json_tokener_state_array_after_sep: |
1126 | 39.8k | case json_tokener_state_array: |
1127 | 39.8k | if (c == ']') |
1128 | 10.6k | { |
1129 | | // Minimize memory usage; assume parsed objs are unlikely to be changed |
1130 | 10.6k | json_object_array_shrink(current, 0); |
1131 | | |
1132 | 10.6k | if (state == json_tokener_state_array_after_sep && |
1133 | 837 | (tok->flags & JSON_TOKENER_STRICT)) |
1134 | 0 | { |
1135 | 0 | tok->err = json_tokener_error_parse_unexpected; |
1136 | 0 | goto out; |
1137 | 0 | } |
1138 | 10.6k | saved_state = json_tokener_state_finish; |
1139 | 10.6k | state = json_tokener_state_eatws; |
1140 | 10.6k | } |
1141 | 29.2k | else |
1142 | 29.2k | { |
1143 | 29.2k | if (tok->depth >= tok->max_depth - 1) |
1144 | 2 | { |
1145 | 2 | tok->err = json_tokener_error_depth; |
1146 | 2 | goto out; |
1147 | 2 | } |
1148 | 29.2k | state = json_tokener_state_array_add; |
1149 | 29.2k | tok->depth++; |
1150 | 29.2k | json_tokener_reset_level(tok, tok->depth); |
1151 | 29.2k | goto redo_char; |
1152 | 29.2k | } |
1153 | 10.6k | break; |
1154 | | |
1155 | 27.9k | case json_tokener_state_array_add: |
1156 | 27.9k | if (json_object_array_add(current, obj) != 0) |
1157 | 0 | { |
1158 | 0 | tok->err = json_tokener_error_memory; |
1159 | 0 | goto out; |
1160 | 0 | } |
1161 | 27.9k | saved_state = json_tokener_state_array_sep; |
1162 | 27.9k | state = json_tokener_state_eatws; |
1163 | 27.9k | goto redo_char; |
1164 | | |
1165 | 27.9k | case json_tokener_state_array_sep: |
1166 | 27.9k | if (c == ']') |
1167 | 2.34k | { |
1168 | | // Minimize memory usage; assume parsed objs are unlikely to be changed |
1169 | 2.34k | json_object_array_shrink(current, 0); |
1170 | | |
1171 | 2.34k | saved_state = json_tokener_state_finish; |
1172 | 2.34k | state = json_tokener_state_eatws; |
1173 | 2.34k | } |
1174 | 25.6k | else if (c == ',') |
1175 | 25.5k | { |
1176 | 25.5k | saved_state = json_tokener_state_array_after_sep; |
1177 | 25.5k | state = json_tokener_state_eatws; |
1178 | 25.5k | } |
1179 | 136 | else |
1180 | 136 | { |
1181 | 136 | tok->err = json_tokener_error_parse_array; |
1182 | 136 | goto out; |
1183 | 136 | } |
1184 | 27.8k | break; |
1185 | | |
1186 | 27.8k | case json_tokener_state_object_field_start: |
1187 | 17.4k | case json_tokener_state_object_field_start_after_sep: |
1188 | 17.4k | if (c == '}') |
1189 | 1.65k | { |
1190 | 1.65k | if (state == json_tokener_state_object_field_start_after_sep && |
1191 | 319 | (tok->flags & JSON_TOKENER_STRICT)) |
1192 | 0 | { |
1193 | 0 | tok->err = json_tokener_error_parse_unexpected; |
1194 | 0 | goto out; |
1195 | 0 | } |
1196 | 1.65k | saved_state = json_tokener_state_finish; |
1197 | 1.65k | state = json_tokener_state_eatws; |
1198 | 1.65k | } |
1199 | 15.7k | else if (c == '"' || c == '\'') |
1200 | 15.7k | { |
1201 | 15.7k | tok->quote_char = c; |
1202 | 15.7k | printbuf_reset(tok->pb); |
1203 | 15.7k | state = json_tokener_state_object_field; |
1204 | 15.7k | } |
1205 | 71 | else |
1206 | 71 | { |
1207 | 71 | tok->err = json_tokener_error_parse_object_key_name; |
1208 | 71 | goto out; |
1209 | 71 | } |
1210 | 17.3k | break; |
1211 | | |
1212 | 17.3k | case json_tokener_state_object_field: |
1213 | 17.3k | { |
1214 | | /* Advance until we change state */ |
1215 | 17.3k | const char *case_start = str; |
1216 | 177k | while (1) |
1217 | 177k | { |
1218 | 177k | if (c == tok->quote_char) |
1219 | 15.5k | { |
1220 | 15.5k | printbuf_memappend_checked(tok->pb, case_start, |
1221 | 15.5k | str - case_start); |
1222 | 15.5k | obj_field_name = strdup(tok->pb->buf); |
1223 | 15.5k | if (obj_field_name == NULL) |
1224 | 0 | { |
1225 | 0 | tok->err = json_tokener_error_memory; |
1226 | 0 | goto out; |
1227 | 0 | } |
1228 | 15.5k | saved_state = json_tokener_state_object_field_end; |
1229 | 15.5k | state = json_tokener_state_eatws; |
1230 | 15.5k | break; |
1231 | 15.5k | } |
1232 | 161k | else if (c == '\\') |
1233 | 1.71k | { |
1234 | 1.71k | printbuf_memappend_checked(tok->pb, case_start, |
1235 | 1.71k | str - case_start); |
1236 | 1.71k | saved_state = json_tokener_state_object_field; |
1237 | 1.71k | state = json_tokener_state_string_escape; |
1238 | 1.71k | break; |
1239 | 1.71k | } |
1240 | 160k | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
1241 | 160 | { |
1242 | 160 | printbuf_memappend_checked(tok->pb, case_start, |
1243 | 160 | str - case_start); |
1244 | 160 | goto out; |
1245 | 160 | } |
1246 | 160k | } |
1247 | 17.3k | } |
1248 | 17.2k | break; |
1249 | | |
1250 | 17.2k | case json_tokener_state_object_field_end: |
1251 | 15.5k | if (c == ':') |
1252 | 15.4k | { |
1253 | 15.4k | saved_state = json_tokener_state_object_value; |
1254 | 15.4k | state = json_tokener_state_eatws; |
1255 | 15.4k | } |
1256 | 38 | else |
1257 | 38 | { |
1258 | 38 | tok->err = json_tokener_error_parse_object_key_sep; |
1259 | 38 | goto out; |
1260 | 38 | } |
1261 | 15.4k | break; |
1262 | | |
1263 | 15.4k | case json_tokener_state_object_value: |
1264 | 15.4k | if (tok->depth >= tok->max_depth - 1) |
1265 | 2 | { |
1266 | 2 | tok->err = json_tokener_error_depth; |
1267 | 2 | goto out; |
1268 | 2 | } |
1269 | 15.4k | state = json_tokener_state_object_value_add; |
1270 | 15.4k | tok->depth++; |
1271 | 15.4k | json_tokener_reset_level(tok, tok->depth); |
1272 | 15.4k | goto redo_char; |
1273 | | |
1274 | 14.7k | case json_tokener_state_object_value_add: |
1275 | 14.7k | if (json_object_object_add(current, obj_field_name, obj) != 0) |
1276 | 0 | { |
1277 | 0 | tok->err = json_tokener_error_memory; |
1278 | 0 | goto out; |
1279 | 0 | } |
1280 | 14.7k | free(obj_field_name); |
1281 | 14.7k | obj_field_name = NULL; |
1282 | 14.7k | saved_state = json_tokener_state_object_sep; |
1283 | 14.7k | state = json_tokener_state_eatws; |
1284 | 14.7k | goto redo_char; |
1285 | | |
1286 | 14.7k | case json_tokener_state_object_sep: |
1287 | | /* { */ |
1288 | 14.7k | if (c == '}') |
1289 | 1.14k | { |
1290 | 1.14k | saved_state = json_tokener_state_finish; |
1291 | 1.14k | state = json_tokener_state_eatws; |
1292 | 1.14k | } |
1293 | 13.6k | else if (c == ',') |
1294 | 13.4k | { |
1295 | 13.4k | saved_state = json_tokener_state_object_field_start_after_sep; |
1296 | 13.4k | state = json_tokener_state_eatws; |
1297 | 13.4k | } |
1298 | 192 | else |
1299 | 192 | { |
1300 | 192 | tok->err = json_tokener_error_parse_object_value_sep; |
1301 | 192 | goto out; |
1302 | 192 | } |
1303 | 14.5k | break; |
1304 | 570k | } |
1305 | 164k | (void)ADVANCE_CHAR(str, tok); |
1306 | 164k | if (!c) // This is the char *before* advancing |
1307 | 24 | break; |
1308 | 164k | } /* while(PEEK_CHAR) */ |
1309 | | |
1310 | 2.24k | out: |
1311 | 2.24k | if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0)) |
1312 | 0 | { |
1313 | 0 | tok->err = json_tokener_error_parse_utf8_string; |
1314 | 0 | } |
1315 | 2.24k | if (c && (state == json_tokener_state_finish) && (tok->depth == 0) && |
1316 | 32 | (tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) == |
1317 | 32 | JSON_TOKENER_STRICT) |
1318 | 0 | { |
1319 | | /* unexpected char after JSON data */ |
1320 | 0 | tok->err = json_tokener_error_parse_unexpected; |
1321 | 0 | } |
1322 | 2.24k | if (!c) |
1323 | 1.75k | { |
1324 | | /* We hit an eof char (0) */ |
1325 | 1.75k | if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish) |
1326 | 1.39k | tok->err = json_tokener_error_parse_eof; |
1327 | 1.75k | } |
1328 | | |
1329 | 2.24k | #ifdef HAVE_USELOCALE |
1330 | 2.24k | uselocale(oldlocale); |
1331 | 2.24k | freelocale(newloc); |
1332 | | #elif defined(HAVE_SETLOCALE) |
1333 | | setlocale(LC_NUMERIC, oldlocale); |
1334 | | free(oldlocale); |
1335 | | #endif |
1336 | | |
1337 | 2.24k | if (tok->err == json_tokener_success) |
1338 | 385 | { |
1339 | 385 | json_object *ret = json_object_get(current); |
1340 | 385 | int ii; |
1341 | | |
1342 | | /* Partially reset, so we parse additional objects on subsequent calls. */ |
1343 | 895 | for (ii = tok->depth; ii >= 0; ii--) |
1344 | 510 | json_tokener_reset_level(tok, ii); |
1345 | 385 | return ret; |
1346 | 385 | } |
1347 | | |
1348 | 1.86k | MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err], |
1349 | 1.86k | tok->char_offset); |
1350 | 1.86k | return NULL; |
1351 | 2.24k | } |
1352 | | |
1353 | | static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes) |
1354 | 0 | { |
1355 | 0 | unsigned char chr = c; |
1356 | 0 | if (*nBytes == 0) |
1357 | 0 | { |
1358 | 0 | if (chr >= 0x80) |
1359 | 0 | { |
1360 | 0 | if ((chr & 0xe0) == 0xc0) |
1361 | 0 | *nBytes = 1; |
1362 | 0 | else if ((chr & 0xf0) == 0xe0) |
1363 | 0 | *nBytes = 2; |
1364 | 0 | else if ((chr & 0xf8) == 0xf0) |
1365 | 0 | *nBytes = 3; |
1366 | 0 | else |
1367 | 0 | return 0; |
1368 | 0 | } |
1369 | 0 | } |
1370 | 0 | else |
1371 | 0 | { |
1372 | 0 | if ((chr & 0xC0) != 0x80) |
1373 | 0 | return 0; |
1374 | 0 | (*nBytes)--; |
1375 | 0 | } |
1376 | 0 | return 1; |
1377 | 0 | } |
1378 | | |
1379 | | void json_tokener_set_flags(struct json_tokener *tok, int flags) |
1380 | 0 | { |
1381 | 0 | tok->flags = flags; |
1382 | 0 | } |
1383 | | |
1384 | | size_t json_tokener_get_parse_end(struct json_tokener *tok) |
1385 | 0 | { |
1386 | 0 | assert(tok->char_offset >= 0); /* Drop this line when char_offset becomes a size_t */ |
1387 | 0 | return (size_t)tok->char_offset; |
1388 | 0 | } |
1389 | | |
1390 | | static int json_tokener_parse_double(const char *buf, int len, double *retval) |
1391 | 5.27k | { |
1392 | 5.27k | char *end; |
1393 | 5.27k | *retval = strtod(buf, &end); |
1394 | 5.27k | if (buf + len == end) |
1395 | 5.26k | return 0; // It worked |
1396 | 6 | return 1; |
1397 | 5.27k | } |