/src/json-c/json_tokener.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $ |
3 | | * |
4 | | * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd. |
5 | | * Michael Clark <michael@metaparadigm.com> |
6 | | * |
7 | | * This library is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the MIT license. See COPYING for details. |
9 | | * |
10 | | * |
11 | | * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved. |
12 | | * The copyrights to the contents of this file are licensed under the MIT License |
13 | | * (https://www.opensource.org/licenses/mit-license.php) |
14 | | */ |
15 | | |
16 | | #include "config.h" |
17 | | |
18 | | #include "math_compat.h" |
19 | | #include <assert.h> |
20 | | #include <errno.h> |
21 | | #include <limits.h> |
22 | | #include <math.h> |
23 | | #include <stddef.h> |
24 | | #include <stdio.h> |
25 | | #include <stdlib.h> |
26 | | #include <string.h> |
27 | | |
28 | | #include "debug.h" |
29 | | #include "json_inttypes.h" |
30 | | #include "json_object.h" |
31 | | #include "json_object_private.h" |
32 | | #include "json_tokener.h" |
33 | | #include "json_util.h" |
34 | | #include "printbuf.h" |
35 | | #include "strdup_compat.h" |
36 | | |
37 | | #ifdef HAVE_LOCALE_H |
38 | | #include <locale.h> |
39 | | #endif /* HAVE_LOCALE_H */ |
40 | | #ifdef HAVE_XLOCALE_H |
41 | | #include <xlocale.h> |
42 | | #endif |
43 | | #ifdef HAVE_STRINGS_H |
44 | | #include <strings.h> |
45 | | #endif /* HAVE_STRINGS_H */ |
46 | | |
47 | 27.1k | #define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9) |
48 | | |
49 | | #if !HAVE_STRNCASECMP && defined(_MSC_VER) |
50 | | /* MSC has the version as _strnicmp */ |
51 | | #define strncasecmp _strnicmp |
52 | | #elif !HAVE_STRNCASECMP |
53 | | #error You do not have strncasecmp on your system. |
54 | | #endif /* HAVE_STRNCASECMP */ |
55 | | |
56 | | #if defined(_MSC_VER) && (_MSC_VER <= 1800) |
57 | | /* VS2013 doesn't know about "inline" */ |
58 | | #define inline __inline |
59 | | #elif defined(AIX_CC) |
60 | | #define inline |
61 | | #endif |
62 | | |
63 | | /* The following helper functions are used to speed up parsing. They |
64 | | * are faster than their ctype counterparts because they assume that |
65 | | * the input is in ASCII and that the locale is set to "C". The |
66 | | * compiler will also inline these functions, providing an additional |
67 | | * speedup by saving on function calls. |
68 | | */ |
69 | | static inline int is_ws_char(char c) |
70 | 1.93M | { |
71 | 1.93M | return c == ' ' |
72 | 1.93M | || c == '\t' |
73 | 1.93M | || c == '\n' |
74 | 1.93M | || c == '\r'; |
75 | 1.93M | } |
76 | | |
77 | | static inline int is_hex_char(char c) |
78 | 27.2k | { |
79 | 27.2k | return (c >= '0' && c <= '9') |
80 | 27.2k | || (c >= 'A' && c <= 'F') |
81 | 27.2k | || (c >= 'a' && c <= 'f'); |
82 | 27.2k | } |
83 | | |
84 | | /* Use C99 NAN by default; if not available, nan("") should work too. */ |
85 | | #ifndef NAN |
86 | | #define NAN nan("") |
87 | | #endif /* !NAN */ |
88 | | |
89 | | static const char json_null_str[] = "null"; |
90 | | static const int json_null_str_len = sizeof(json_null_str) - 1; |
91 | | static const char json_inf_str[] = "Infinity"; |
92 | | /* Swapped case "Infinity" to avoid need to call tolower() on input chars: */ |
93 | | static const char json_inf_str_invert[] = "iNFINITY"; |
94 | | static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1; |
95 | | static const char json_nan_str[] = "NaN"; |
96 | | static const int json_nan_str_len = sizeof(json_nan_str) - 1; |
97 | | static const char json_true_str[] = "true"; |
98 | | static const int json_true_str_len = sizeof(json_true_str) - 1; |
99 | | static const char json_false_str[] = "false"; |
100 | | static const int json_false_str_len = sizeof(json_false_str) - 1; |
101 | | |
102 | | /* clang-format off */ |
103 | | static const char *json_tokener_errors[] = { |
104 | | "success", |
105 | | "continue", |
106 | | "nesting too deep", |
107 | | "unexpected end of data", |
108 | | "unexpected character", |
109 | | "null expected", |
110 | | "boolean expected", |
111 | | "number expected", |
112 | | "array value separator ',' expected", |
113 | | "quoted object property name expected", |
114 | | "object property name separator ':' expected", |
115 | | "object value separator ',' expected", |
116 | | "invalid string sequence", |
117 | | "expected comment", |
118 | | "invalid utf-8 string", |
119 | | "buffer size overflow", |
120 | | "out of memory" |
121 | | }; |
122 | | /* clang-format on */ |
123 | | |
124 | | /** |
125 | | * validete the utf-8 string in strict model. |
126 | | * if not utf-8 format, return err. |
127 | | */ |
128 | | static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes); |
129 | | |
130 | | static int json_tokener_parse_double(const char *buf, int len, double *retval); |
131 | | |
132 | | const char *json_tokener_error_desc(enum json_tokener_error jerr) |
133 | 2.35k | { |
134 | 2.35k | int jerr_int = (int)jerr; |
135 | 2.35k | if (jerr_int < 0 || |
136 | 2.35k | jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0]))) |
137 | 0 | return "Unknown error, " |
138 | 0 | "invalid json_tokener_error value passed to json_tokener_error_desc()"; |
139 | 2.35k | return json_tokener_errors[jerr]; |
140 | 2.35k | } |
141 | | |
142 | | enum json_tokener_error json_tokener_get_error(struct json_tokener *tok) |
143 | 4.71k | { |
144 | 4.71k | return tok->err; |
145 | 4.71k | } |
146 | | |
147 | | /* Stuff for decoding unicode sequences */ |
148 | 4.88k | #define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800) |
149 | 3.60k | #define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00) |
150 | 736 | #define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000) |
151 | | static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD}; |
152 | | |
153 | | struct json_tokener *json_tokener_new_ex(int depth) |
154 | 10.3k | { |
155 | 10.3k | struct json_tokener *tok; |
156 | | |
157 | 10.3k | if (depth < 1) |
158 | 0 | return NULL; |
159 | | |
160 | 10.3k | tok = (struct json_tokener *)calloc(1, sizeof(struct json_tokener)); |
161 | 10.3k | if (!tok) |
162 | 0 | return NULL; |
163 | 10.3k | tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec)); |
164 | 10.3k | if (!tok->stack) |
165 | 0 | { |
166 | 0 | free(tok); |
167 | 0 | return NULL; |
168 | 0 | } |
169 | 10.3k | tok->pb = printbuf_new(); |
170 | 10.3k | if (!tok->pb) |
171 | 0 | { |
172 | 0 | free(tok->stack); |
173 | 0 | free(tok); |
174 | 0 | return NULL; |
175 | 0 | } |
176 | 10.3k | tok->max_depth = depth; |
177 | 10.3k | json_tokener_reset(tok); |
178 | 10.3k | return tok; |
179 | 10.3k | } |
180 | | |
181 | | struct json_tokener *json_tokener_new(void) |
182 | 10.3k | { |
183 | 10.3k | return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH); |
184 | 10.3k | } |
185 | | |
186 | | void json_tokener_free(struct json_tokener *tok) |
187 | 10.3k | { |
188 | 10.3k | if (!tok) |
189 | 0 | return; |
190 | 10.3k | json_tokener_reset(tok); |
191 | 10.3k | if (tok->pb) |
192 | 10.3k | printbuf_free(tok->pb); |
193 | 10.3k | free(tok->stack); |
194 | 10.3k | free(tok); |
195 | 10.3k | } |
196 | | |
197 | | static void json_tokener_reset_level(struct json_tokener *tok, int depth) |
198 | 746k | { |
199 | 746k | tok->stack[depth].state = json_tokener_state_eatws; |
200 | 746k | tok->stack[depth].saved_state = json_tokener_state_start; |
201 | 746k | json_object_put(tok->stack[depth].current); |
202 | 746k | tok->stack[depth].current = NULL; |
203 | 746k | free(tok->stack[depth].obj_field_name); |
204 | 746k | tok->stack[depth].obj_field_name = NULL; |
205 | 746k | } |
206 | | |
207 | | void json_tokener_reset(struct json_tokener *tok) |
208 | 20.7k | { |
209 | 20.7k | int i; |
210 | 20.7k | if (!tok) |
211 | 0 | return; |
212 | | |
213 | 45.1k | for (i = tok->depth; i >= 0; i--) |
214 | 24.4k | json_tokener_reset_level(tok, i); |
215 | 20.7k | tok->depth = 0; |
216 | 20.7k | tok->err = json_tokener_success; |
217 | 20.7k | } |
218 | | |
219 | | struct json_object *json_tokener_parse(const char *str) |
220 | 0 | { |
221 | 0 | enum json_tokener_error jerr_ignored; |
222 | 0 | struct json_object *obj; |
223 | 0 | obj = json_tokener_parse_verbose(str, &jerr_ignored); |
224 | 0 | return obj; |
225 | 0 | } |
226 | | |
227 | | struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokener_error *error) |
228 | 0 | { |
229 | 0 | struct json_tokener *tok; |
230 | 0 | struct json_object *obj; |
231 | |
|
232 | 0 | tok = json_tokener_new(); |
233 | 0 | if (!tok) |
234 | 0 | { |
235 | 0 | *error = json_tokener_error_memory; |
236 | 0 | return NULL; |
237 | 0 | } |
238 | 0 | obj = json_tokener_parse_ex(tok, str, -1); |
239 | 0 | *error = tok->err; |
240 | 0 | if (tok->err != json_tokener_success |
241 | | #if 0 |
242 | | /* This would be a more sensible default, and cause parsing |
243 | | * things like "null123" to fail when the caller can't know |
244 | | * where the parsing left off, but starting to fail would |
245 | | * be a notable behaviour change. Save for a 1.0 release. |
246 | | */ |
247 | | || json_tokener_get_parse_end(tok) != strlen(str) |
248 | | #endif |
249 | 0 | ) |
250 | | |
251 | 0 | { |
252 | 0 | if (obj != NULL) |
253 | 0 | json_object_put(obj); |
254 | 0 | obj = NULL; |
255 | 0 | } |
256 | |
|
257 | 0 | json_tokener_free(tok); |
258 | 0 | return obj; |
259 | 0 | } |
260 | | |
261 | 8.61M | #define state tok->stack[tok->depth].state |
262 | 3.25M | #define saved_state tok->stack[tok->depth].saved_state |
263 | 1.45M | #define current tok->stack[tok->depth].current |
264 | 674k | #define obj_field_name tok->stack[tok->depth].obj_field_name |
265 | | |
266 | | /* Optimization: |
267 | | * json_tokener_parse_ex() consumed a lot of CPU in its main loop, |
268 | | * iterating character-by character. A large performance boost is |
269 | | * achieved by using tighter loops to locally handle units such as |
270 | | * comments and strings. Loops that handle an entire token within |
271 | | * their scope also gather entire strings and pass them to |
272 | | * printbuf_memappend() in a single call, rather than calling |
273 | | * printbuf_memappend() one char at a time. |
274 | | * |
275 | | * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is |
276 | | * common to both the main loop and the tighter loops. |
277 | | */ |
278 | | |
279 | | /* PEEK_CHAR(dest, tok) macro: |
280 | | * Peeks at the current char and stores it in dest. |
281 | | * Returns 1 on success, sets tok->err and returns 0 if no more chars. |
282 | | * Implicit inputs: str, len, nBytesp vars |
283 | | */ |
284 | | #define PEEK_CHAR(dest, tok) \ |
285 | 7.07M | (((tok)->char_offset == len) \ |
286 | 7.07M | ? (((tok)->depth == 0 && state == json_tokener_state_eatws && \ |
287 | 0 | saved_state == json_tokener_state_finish) \ |
288 | 0 | ? (((tok)->err = json_tokener_success), 0) \ |
289 | 0 | : (((tok)->err = json_tokener_continue), 0)) \ |
290 | 7.07M | : (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && \ |
291 | 7.07M | (!json_tokener_validate_utf8(*str, nBytesp))) \ |
292 | 7.07M | ? ((tok->err = json_tokener_error_parse_utf8_string), 0) \ |
293 | 7.07M | : (((dest) = *str), 1))) |
294 | | |
295 | | /* ADVANCE_CHAR() macro: |
296 | | * Increments str & tok->char_offset. |
297 | | * For convenience of existing conditionals, returns the old value of c (0 on eof). |
298 | | * Implicit inputs: c var |
299 | | */ |
300 | 12.6M | #define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c) |
301 | | |
302 | | /* printbuf_memappend_checked(p, s, l) macro: |
303 | | * Add string s of length l to printbuffer p. |
304 | | * If operation fails abort parse operation with memory error. |
305 | | */ |
306 | | #define printbuf_memappend_checked(p, s, l) \ |
307 | 655k | do { \ |
308 | 655k | if (printbuf_memappend((p), (s), (l)) < 0) \ |
309 | 655k | { \ |
310 | 0 | tok->err = json_tokener_error_memory; \ |
311 | 0 | goto out; \ |
312 | 0 | } \ |
313 | 655k | } while (0) |
314 | | |
315 | | /* End optimization macro defs */ |
316 | | |
317 | | struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len) |
318 | 10.3k | { |
319 | 10.3k | struct json_object *obj = NULL; |
320 | 10.3k | char c = '\1'; |
321 | 10.3k | unsigned int nBytes = 0; |
322 | 10.3k | unsigned int *nBytesp = &nBytes; |
323 | | |
324 | 10.3k | #ifdef HAVE_USELOCALE |
325 | 10.3k | locale_t oldlocale = uselocale(NULL); |
326 | 10.3k | locale_t newloc; |
327 | | #elif defined(HAVE_SETLOCALE) |
328 | | char *oldlocale = NULL; |
329 | | #endif |
330 | | |
331 | 10.3k | tok->char_offset = 0; |
332 | 10.3k | tok->err = json_tokener_success; |
333 | | |
334 | | /* this interface is presently not 64-bit clean due to the int len argument |
335 | | * and the internal printbuf interface that takes 32-bit int len arguments |
336 | | * so the function limits the maximum string size to INT32_MAX (2GB). |
337 | | * If the function is called with len == -1 then strlen is called to check |
338 | | * the string length is less than INT32_MAX (2GB) |
339 | | */ |
340 | 10.3k | if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX)) |
341 | 0 | { |
342 | 0 | tok->err = json_tokener_error_size; |
343 | 0 | return NULL; |
344 | 0 | } |
345 | | |
346 | 10.3k | #ifdef HAVE_USELOCALE |
347 | 10.3k | { |
348 | 10.3k | #ifdef HAVE_DUPLOCALE |
349 | 10.3k | locale_t duploc = duplocale(oldlocale); |
350 | 10.3k | if (duploc == NULL && errno == ENOMEM) |
351 | 0 | { |
352 | 0 | tok->err = json_tokener_error_memory; |
353 | 0 | return NULL; |
354 | 0 | } |
355 | 10.3k | newloc = newlocale(LC_NUMERIC_MASK, "C", duploc); |
356 | | #else |
357 | | newloc = newlocale(LC_NUMERIC_MASK, "C", oldlocale); |
358 | | #endif |
359 | 10.3k | if (newloc == NULL) |
360 | 0 | { |
361 | 0 | tok->err = json_tokener_error_memory; |
362 | 0 | #ifdef HAVE_DUPLOCALE |
363 | 0 | freelocale(duploc); |
364 | 0 | #endif |
365 | 0 | return NULL; |
366 | 0 | } |
367 | | #ifdef NEWLOCALE_NEEDS_FREELOCALE |
368 | | #ifdef HAVE_DUPLOCALE |
369 | | // Older versions of FreeBSD (<12.4) don't free the locale |
370 | | // passed to newlocale(), so do it here |
371 | | freelocale(duploc); |
372 | | #endif |
373 | | #endif |
374 | 10.3k | uselocale(newloc); |
375 | 10.3k | } |
376 | | #elif defined(HAVE_SETLOCALE) |
377 | | { |
378 | | char *tmplocale; |
379 | | tmplocale = setlocale(LC_NUMERIC, NULL); |
380 | | if (tmplocale) |
381 | | { |
382 | | oldlocale = strdup(tmplocale); |
383 | | if (oldlocale == NULL) |
384 | | { |
385 | | tok->err = json_tokener_error_memory; |
386 | | return NULL; |
387 | | } |
388 | | } |
389 | | setlocale(LC_NUMERIC, "C"); |
390 | | } |
391 | | #endif |
392 | | |
393 | 1.27M | while (PEEK_CHAR(c, tok)) // Note: c might be '\0' ! |
394 | 1.27M | { |
395 | | |
396 | 4.50M | redo_char: |
397 | 4.50M | switch (state) |
398 | 4.50M | { |
399 | | |
400 | 1.74M | case json_tokener_state_eatws: |
401 | | /* Advance until we change state */ |
402 | 1.92M | while (is_ws_char(c)) |
403 | 182k | { |
404 | 182k | if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok))) |
405 | 0 | goto out; |
406 | 182k | } |
407 | 1.74M | if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) |
408 | 3.02k | { |
409 | 3.02k | printbuf_reset(tok->pb); |
410 | 3.02k | printbuf_memappend_checked(tok->pb, &c, 1); |
411 | 3.02k | state = json_tokener_state_comment_start; |
412 | 3.02k | } |
413 | 1.74M | else |
414 | 1.74M | { |
415 | 1.74M | state = saved_state; |
416 | 1.74M | goto redo_char; |
417 | 1.74M | } |
418 | 3.02k | break; |
419 | | |
420 | 369k | case json_tokener_state_start: |
421 | 369k | switch (c) |
422 | 369k | { |
423 | 58.3k | case '{': |
424 | 58.3k | state = json_tokener_state_eatws; |
425 | 58.3k | saved_state = json_tokener_state_object_field_start; |
426 | 58.3k | current = json_object_new_object(); |
427 | 58.3k | if (current == NULL) |
428 | 0 | { |
429 | 0 | tok->err = json_tokener_error_memory; |
430 | 0 | goto out; |
431 | 0 | } |
432 | 58.3k | break; |
433 | 58.3k | case '[': |
434 | 15.4k | state = json_tokener_state_eatws; |
435 | 15.4k | saved_state = json_tokener_state_array; |
436 | 15.4k | current = json_object_new_array(); |
437 | 15.4k | if (current == NULL) |
438 | 0 | { |
439 | 0 | tok->err = json_tokener_error_memory; |
440 | 0 | goto out; |
441 | 0 | } |
442 | 15.4k | break; |
443 | 15.4k | case 'I': |
444 | 615 | case 'i': |
445 | 615 | state = json_tokener_state_inf; |
446 | 615 | printbuf_reset(tok->pb); |
447 | 615 | tok->st_pos = 0; |
448 | 615 | goto redo_char; |
449 | 1.17k | case 'N': |
450 | 14.3k | case 'n': |
451 | 14.3k | state = json_tokener_state_null; // or NaN |
452 | 14.3k | printbuf_reset(tok->pb); |
453 | 14.3k | tok->st_pos = 0; |
454 | 14.3k | goto redo_char; |
455 | 514 | case '\'': |
456 | 514 | if (tok->flags & JSON_TOKENER_STRICT) |
457 | 0 | { |
458 | | /* in STRICT mode only double-quote are allowed */ |
459 | 0 | tok->err = json_tokener_error_parse_unexpected; |
460 | 0 | goto out; |
461 | 0 | } |
462 | | /* FALLTHRU */ |
463 | 86.0k | case '"': |
464 | 86.0k | state = json_tokener_state_string; |
465 | 86.0k | printbuf_reset(tok->pb); |
466 | 86.0k | tok->quote_char = c; |
467 | 86.0k | break; |
468 | 610 | case 'T': |
469 | 1.12k | case 't': |
470 | 1.26k | case 'F': |
471 | 1.69k | case 'f': |
472 | 1.69k | state = json_tokener_state_boolean; |
473 | 1.69k | printbuf_reset(tok->pb); |
474 | 1.69k | tok->st_pos = 0; |
475 | 1.69k | goto redo_char; |
476 | 33.0k | case '0': |
477 | 38.2k | case '1': |
478 | 42.4k | case '2': |
479 | 44.6k | case '3': |
480 | 47.9k | case '4': |
481 | 185k | case '5': |
482 | 186k | case '6': |
483 | 187k | case '7': |
484 | 187k | case '8': |
485 | 188k | case '9': |
486 | 192k | case '-': |
487 | 192k | state = json_tokener_state_number; |
488 | 192k | printbuf_reset(tok->pb); |
489 | 192k | tok->is_double = 0; |
490 | 192k | goto redo_char; |
491 | 128 | default: tok->err = json_tokener_error_parse_unexpected; goto out; |
492 | 369k | } |
493 | 159k | break; |
494 | | |
495 | 363k | case json_tokener_state_finish: |
496 | 363k | if (tok->depth == 0) |
497 | 7.93k | goto out; |
498 | 355k | obj = json_object_get(current); |
499 | 355k | json_tokener_reset_level(tok, tok->depth); |
500 | 355k | tok->depth--; |
501 | 355k | goto redo_char; |
502 | | |
503 | 1.11k | case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */ |
504 | 1.11k | { |
505 | | /* If we were guaranteed to have len set, then we could (usually) handle |
506 | | * the entire "Infinity" check in a single strncmp (strncasecmp), but |
507 | | * since len might be -1 (i.e. "read until \0"), we need to check it |
508 | | * a character at a time. |
509 | | * Trying to handle it both ways would make this code considerably more |
510 | | * complicated with likely little performance benefit. |
511 | | */ |
512 | 1.11k | int is_negative = 0; |
513 | | |
514 | | /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */ |
515 | 9.38k | while (tok->st_pos < (int)json_inf_str_len) |
516 | 8.37k | { |
517 | 8.37k | char inf_char = *str; |
518 | 8.37k | if (inf_char != json_inf_str[tok->st_pos] && |
519 | 8.37k | ((tok->flags & JSON_TOKENER_STRICT) || |
520 | 3.65k | inf_char != json_inf_str_invert[tok->st_pos]) |
521 | 8.37k | ) |
522 | 104 | { |
523 | 104 | tok->err = json_tokener_error_parse_unexpected; |
524 | 104 | goto out; |
525 | 104 | } |
526 | 8.27k | tok->st_pos++; |
527 | 8.27k | (void)ADVANCE_CHAR(str, tok); |
528 | 8.27k | if (!PEEK_CHAR(c, tok)) |
529 | 0 | { |
530 | | /* out of input chars, for now at least */ |
531 | 0 | goto out; |
532 | 0 | } |
533 | 8.27k | } |
534 | | /* We checked the full length of "Infinity", so create the object. |
535 | | * When handling -Infinity, the number parsing code will have dropped |
536 | | * the "-" into tok->pb for us, so check it now. |
537 | | */ |
538 | 1.00k | if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-') |
539 | 474 | { |
540 | 474 | is_negative = 1; |
541 | 474 | } |
542 | 1.00k | current = json_object_new_double(is_negative ? -INFINITY : INFINITY); |
543 | 1.00k | if (current == NULL) |
544 | 0 | { |
545 | 0 | tok->err = json_tokener_error_memory; |
546 | 0 | goto out; |
547 | 0 | } |
548 | 1.00k | saved_state = json_tokener_state_finish; |
549 | 1.00k | state = json_tokener_state_eatws; |
550 | 1.00k | goto redo_char; |
551 | 1.00k | } |
552 | 0 | break; |
553 | 71.1k | case json_tokener_state_null: /* aka starts with 'n' */ |
554 | 71.1k | { |
555 | 71.1k | int size; |
556 | 71.1k | int size_nan; |
557 | 71.1k | printbuf_memappend_checked(tok->pb, &c, 1); |
558 | 71.1k | size = json_min(tok->st_pos + 1, json_null_str_len); |
559 | 71.1k | size_nan = json_min(tok->st_pos + 1, json_nan_str_len); |
560 | 71.1k | if ((!(tok->flags & JSON_TOKENER_STRICT) && |
561 | 71.1k | strncasecmp(json_null_str, tok->pb->buf, size) == 0) || |
562 | 71.1k | (strncmp(json_null_str, tok->pb->buf, size) == 0)) |
563 | 69.4k | { |
564 | 69.4k | if (tok->st_pos == json_null_str_len) |
565 | 13.7k | { |
566 | 13.7k | current = NULL; |
567 | 13.7k | saved_state = json_tokener_state_finish; |
568 | 13.7k | state = json_tokener_state_eatws; |
569 | 13.7k | goto redo_char; |
570 | 13.7k | } |
571 | 69.4k | } |
572 | 1.63k | else if ((!(tok->flags & JSON_TOKENER_STRICT) && |
573 | 1.63k | strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) || |
574 | 1.63k | (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0)) |
575 | 1.54k | { |
576 | 1.54k | if (tok->st_pos == json_nan_str_len) |
577 | 507 | { |
578 | 507 | current = json_object_new_double(NAN); |
579 | 507 | if (current == NULL) |
580 | 0 | { |
581 | 0 | tok->err = json_tokener_error_memory; |
582 | 0 | goto out; |
583 | 0 | } |
584 | 507 | saved_state = json_tokener_state_finish; |
585 | 507 | state = json_tokener_state_eatws; |
586 | 507 | goto redo_char; |
587 | 507 | } |
588 | 1.54k | } |
589 | 96 | else |
590 | 96 | { |
591 | 96 | tok->err = json_tokener_error_parse_null; |
592 | 96 | goto out; |
593 | 96 | } |
594 | 56.7k | tok->st_pos++; |
595 | 56.7k | } |
596 | 0 | break; |
597 | | |
598 | 3.02k | case json_tokener_state_comment_start: |
599 | 3.02k | if (c == '*') |
600 | 885 | { |
601 | 885 | state = json_tokener_state_comment; |
602 | 885 | } |
603 | 2.13k | else if (c == '/') |
604 | 2.06k | { |
605 | 2.06k | state = json_tokener_state_comment_eol; |
606 | 2.06k | } |
607 | 74 | else |
608 | 74 | { |
609 | 74 | tok->err = json_tokener_error_parse_comment; |
610 | 74 | goto out; |
611 | 74 | } |
612 | 2.94k | printbuf_memappend_checked(tok->pb, &c, 1); |
613 | 2.94k | break; |
614 | | |
615 | 4.44k | case json_tokener_state_comment: |
616 | 4.44k | { |
617 | | /* Advance until we change state */ |
618 | 4.44k | const char *case_start = str; |
619 | 540k | while (c != '*') |
620 | 536k | { |
621 | 536k | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
622 | 190 | { |
623 | 190 | printbuf_memappend_checked(tok->pb, case_start, |
624 | 190 | str - case_start); |
625 | 190 | goto out; |
626 | 190 | } |
627 | 536k | } |
628 | 4.25k | printbuf_memappend_checked(tok->pb, case_start, 1 + str - case_start); |
629 | 4.25k | state = json_tokener_state_comment_end; |
630 | 4.25k | } |
631 | 0 | break; |
632 | | |
633 | 2.06k | case json_tokener_state_comment_eol: |
634 | 2.06k | { |
635 | | /* Advance until we change state */ |
636 | 2.06k | const char *case_start = str; |
637 | 185k | while (c != '\n') |
638 | 183k | { |
639 | 183k | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
640 | 120 | { |
641 | 120 | printbuf_memappend_checked(tok->pb, case_start, |
642 | 120 | str - case_start); |
643 | 120 | goto out; |
644 | 120 | } |
645 | 183k | } |
646 | 1.94k | printbuf_memappend_checked(tok->pb, case_start, str - case_start); |
647 | 1.94k | MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); |
648 | 1.94k | state = json_tokener_state_eatws; |
649 | 1.94k | } |
650 | 0 | break; |
651 | | |
652 | 4.25k | case json_tokener_state_comment_end: |
653 | 4.25k | printbuf_memappend_checked(tok->pb, &c, 1); |
654 | 4.25k | if (c == '/') |
655 | 686 | { |
656 | 686 | MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); |
657 | 686 | state = json_tokener_state_eatws; |
658 | 686 | } |
659 | 3.57k | else |
660 | 3.57k | { |
661 | 3.57k | state = json_tokener_state_comment; |
662 | 3.57k | } |
663 | 4.25k | break; |
664 | | |
665 | 146k | case json_tokener_state_string: |
666 | 146k | { |
667 | | /* Advance until we change state */ |
668 | 146k | const char *case_start = str; |
669 | 2.57M | while (1) |
670 | 2.57M | { |
671 | 2.57M | if (c == tok->quote_char) |
672 | 85.8k | { |
673 | 85.8k | printbuf_memappend_checked(tok->pb, case_start, |
674 | 85.8k | str - case_start); |
675 | 85.8k | current = |
676 | 85.8k | json_object_new_string_len(tok->pb->buf, tok->pb->bpos); |
677 | 85.8k | if (current == NULL) |
678 | 0 | { |
679 | 0 | tok->err = json_tokener_error_memory; |
680 | 0 | goto out; |
681 | 0 | } |
682 | 85.8k | saved_state = json_tokener_state_finish; |
683 | 85.8k | state = json_tokener_state_eatws; |
684 | 85.8k | break; |
685 | 85.8k | } |
686 | 2.49M | else if (c == '\\') |
687 | 60.3k | { |
688 | 60.3k | printbuf_memappend_checked(tok->pb, case_start, |
689 | 60.3k | str - case_start); |
690 | 60.3k | saved_state = json_tokener_state_string; |
691 | 60.3k | state = json_tokener_state_string_escape; |
692 | 60.3k | break; |
693 | 60.3k | } |
694 | 2.43M | else if ((tok->flags & JSON_TOKENER_STRICT) && (unsigned char)c <= 0x1f) |
695 | 0 | { |
696 | | // Disallow control characters in strict mode |
697 | 0 | tok->err = json_tokener_error_parse_string; |
698 | 0 | goto out; |
699 | 0 | } |
700 | 2.43M | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
701 | 226 | { |
702 | 226 | printbuf_memappend_checked(tok->pb, case_start, |
703 | 226 | str - case_start); |
704 | 226 | goto out; |
705 | 226 | } |
706 | 2.43M | } |
707 | 146k | } |
708 | 146k | break; |
709 | | |
710 | 146k | case json_tokener_state_string_escape: |
711 | 71.9k | switch (c) |
712 | 71.9k | { |
713 | 12.6k | case '"': |
714 | 41.9k | case '\\': |
715 | 42.3k | case '/': |
716 | 42.3k | printbuf_memappend_checked(tok->pb, &c, 1); |
717 | 42.3k | state = saved_state; |
718 | 42.3k | break; |
719 | 729 | case 'b': |
720 | 22.0k | case 'n': |
721 | 22.6k | case 'r': |
722 | 23.8k | case 't': |
723 | 24.3k | case 'f': |
724 | 24.3k | if (c == 'b') |
725 | 729 | printbuf_memappend_checked(tok->pb, "\b", 1); |
726 | 23.6k | else if (c == 'n') |
727 | 21.3k | printbuf_memappend_checked(tok->pb, "\n", 1); |
728 | 2.26k | else if (c == 'r') |
729 | 599 | printbuf_memappend_checked(tok->pb, "\r", 1); |
730 | 1.66k | else if (c == 't') |
731 | 1.13k | printbuf_memappend_checked(tok->pb, "\t", 1); |
732 | 530 | else if (c == 'f') |
733 | 530 | printbuf_memappend_checked(tok->pb, "\f", 1); |
734 | 24.3k | state = saved_state; |
735 | 24.3k | break; |
736 | 5.19k | case 'u': |
737 | 5.19k | tok->ucs_char = 0; |
738 | 5.19k | tok->st_pos = 0; |
739 | 5.19k | state = json_tokener_state_escape_unicode; |
740 | 5.19k | break; |
741 | 34 | default: tok->err = json_tokener_error_parse_string; goto out; |
742 | 71.9k | } |
743 | 71.8k | break; |
744 | | |
745 | | // =================================================== |
746 | | |
747 | 71.8k | case json_tokener_state_escape_unicode: |
748 | 6.86k | { |
749 | | /* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */ |
750 | 27.2k | while (1) |
751 | 27.2k | { |
752 | 27.2k | if (!c || !is_hex_char(c)) |
753 | 92 | { |
754 | 92 | tok->err = json_tokener_error_parse_string; |
755 | 92 | goto out; |
756 | 92 | } |
757 | 27.1k | tok->ucs_char |= |
758 | 27.1k | ((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4)); |
759 | 27.1k | tok->st_pos++; |
760 | 27.1k | if (tok->st_pos >= 4) |
761 | 6.77k | break; |
762 | | |
763 | 20.4k | (void)ADVANCE_CHAR(str, tok); |
764 | 20.4k | if (!PEEK_CHAR(c, tok)) |
765 | 0 | { |
766 | | /* |
767 | | * We're out of characters in the current call to |
768 | | * json_tokener_parse(), but a subsequent call might |
769 | | * provide us with more, so leave our current state |
770 | | * as-is (including tok->high_surrogate) and return. |
771 | | */ |
772 | 0 | goto out; |
773 | 0 | } |
774 | 20.4k | } |
775 | 6.77k | tok->st_pos = 0; |
776 | | |
777 | | /* Now, we have a full \uNNNN sequence in tok->ucs_char */ |
778 | | |
779 | | /* If the *previous* sequence was a high surrogate ... */ |
780 | 6.77k | if (tok->high_surrogate) |
781 | 1.66k | { |
782 | 1.66k | if (IS_LOW_SURROGATE(tok->ucs_char)) |
783 | 736 | { |
784 | | /* Recalculate the ucs_char, then fall thru to process normally */ |
785 | 736 | tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate, |
786 | 736 | tok->ucs_char); |
787 | 736 | } |
788 | 925 | else |
789 | 925 | { |
790 | | /* High surrogate was not followed by a low surrogate |
791 | | * Replace the high and process the rest normally |
792 | | */ |
793 | 925 | printbuf_memappend_checked(tok->pb, |
794 | 925 | (char *)utf8_replacement_char, 3); |
795 | 925 | } |
796 | 1.66k | tok->high_surrogate = 0; |
797 | 1.66k | } |
798 | | |
799 | 6.77k | if (tok->ucs_char < 0x80) |
800 | 1.46k | { |
801 | 1.46k | unsigned char unescaped_utf[1]; |
802 | 1.46k | unescaped_utf[0] = tok->ucs_char; |
803 | 1.46k | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 1); |
804 | 1.46k | } |
805 | 5.30k | else if (tok->ucs_char < 0x800) |
806 | 429 | { |
807 | 429 | unsigned char unescaped_utf[2]; |
808 | 429 | unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6); |
809 | 429 | unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f); |
810 | 429 | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 2); |
811 | 429 | } |
812 | 4.88k | else if (IS_HIGH_SURROGATE(tok->ucs_char)) |
813 | 2.93k | { |
814 | | /* |
815 | | * The next two characters should be \u, HOWEVER, |
816 | | * we can't simply peek ahead here, because the |
817 | | * characters we need might not be passed to us |
818 | | * until a subsequent call to json_tokener_parse. |
819 | | * Instead, transition through a couple of states. |
820 | | * (now): |
821 | | * _escape_unicode => _unicode_need_escape |
822 | | * (see a '\\' char): |
823 | | * _unicode_need_escape => _unicode_need_u |
824 | | * (see a 'u' char): |
825 | | * _unicode_need_u => _escape_unicode |
826 | | * ...and we'll end up back around here. |
827 | | */ |
828 | 2.93k | tok->high_surrogate = tok->ucs_char; |
829 | 2.93k | tok->ucs_char = 0; |
830 | 2.93k | state = json_tokener_state_escape_unicode_need_escape; |
831 | 2.93k | break; |
832 | 2.93k | } |
833 | 1.94k | else if (IS_LOW_SURROGATE(tok->ucs_char)) |
834 | 616 | { |
835 | | /* Got a low surrogate not preceded by a high */ |
836 | 616 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
837 | 616 | } |
838 | 1.33k | else if (tok->ucs_char < 0x10000) |
839 | 681 | { |
840 | 681 | unsigned char unescaped_utf[3]; |
841 | 681 | unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12); |
842 | 681 | unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); |
843 | 681 | unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f); |
844 | 681 | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 3); |
845 | 681 | } |
846 | 650 | else if (tok->ucs_char < 0x110000) |
847 | 650 | { |
848 | 650 | unsigned char unescaped_utf[4]; |
849 | 650 | unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07); |
850 | 650 | unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f); |
851 | 650 | unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); |
852 | 650 | unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f); |
853 | 650 | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 4); |
854 | 650 | } |
855 | 0 | else |
856 | 0 | { |
857 | | /* Don't know what we got--insert the replacement char */ |
858 | 0 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
859 | 0 | } |
860 | 3.83k | state = saved_state; // i.e. _state_string or _state_object_field |
861 | 3.83k | } |
862 | 0 | break; |
863 | | |
864 | 2.93k | case json_tokener_state_escape_unicode_need_escape: |
865 | | // We get here after processing a high_surrogate |
866 | | // require a '\\' char |
867 | 2.93k | if (!c || c != '\\') |
868 | 639 | { |
869 | | /* Got a high surrogate without another sequence following |
870 | | * it. Put a replacement char in for the high surrogate |
871 | | * and pop back up to _state_string or _state_object_field. |
872 | | */ |
873 | 639 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
874 | 639 | tok->high_surrogate = 0; |
875 | 639 | tok->ucs_char = 0; |
876 | 639 | tok->st_pos = 0; |
877 | 639 | state = saved_state; |
878 | 639 | goto redo_char; |
879 | 639 | } |
880 | 2.29k | state = json_tokener_state_escape_unicode_need_u; |
881 | 2.29k | break; |
882 | | |
883 | 2.29k | case json_tokener_state_escape_unicode_need_u: |
884 | | /* We already had a \ char, check that it's \u */ |
885 | 2.29k | if (!c || c != 'u') |
886 | 625 | { |
887 | | /* Got a high surrogate with some non-unicode escape |
888 | | * sequence following it. |
889 | | * Put a replacement char in for the high surrogate |
890 | | * and handle the escape sequence normally. |
891 | | */ |
892 | 625 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
893 | 625 | tok->high_surrogate = 0; |
894 | 625 | tok->ucs_char = 0; |
895 | 625 | tok->st_pos = 0; |
896 | 625 | state = json_tokener_state_string_escape; |
897 | 625 | goto redo_char; |
898 | 625 | } |
899 | 1.66k | state = json_tokener_state_escape_unicode; |
900 | 1.66k | break; |
901 | | |
902 | | // =================================================== |
903 | | |
904 | 8.66k | case json_tokener_state_boolean: |
905 | 8.66k | { |
906 | 8.66k | int size1, size2; |
907 | 8.66k | printbuf_memappend_checked(tok->pb, &c, 1); |
908 | 8.66k | size1 = json_min(tok->st_pos + 1, json_true_str_len); |
909 | 8.66k | size2 = json_min(tok->st_pos + 1, json_false_str_len); |
910 | 8.66k | if ((!(tok->flags & JSON_TOKENER_STRICT) && |
911 | 8.66k | strncasecmp(json_true_str, tok->pb->buf, size1) == 0) || |
912 | 8.66k | (strncmp(json_true_str, tok->pb->buf, size1) == 0)) |
913 | 5.30k | { |
914 | 5.30k | if (tok->st_pos == json_true_str_len) |
915 | 1.03k | { |
916 | 1.03k | current = json_object_new_boolean(1); |
917 | 1.03k | if (current == NULL) |
918 | 0 | { |
919 | 0 | tok->err = json_tokener_error_memory; |
920 | 0 | goto out; |
921 | 0 | } |
922 | 1.03k | saved_state = json_tokener_state_finish; |
923 | 1.03k | state = json_tokener_state_eatws; |
924 | 1.03k | goto redo_char; |
925 | 1.03k | } |
926 | 5.30k | } |
927 | 3.36k | else if ((!(tok->flags & JSON_TOKENER_STRICT) && |
928 | 3.36k | strncasecmp(json_false_str, tok->pb->buf, size2) == 0) || |
929 | 3.36k | (strncmp(json_false_str, tok->pb->buf, size2) == 0)) |
930 | 3.21k | { |
931 | 3.21k | if (tok->st_pos == json_false_str_len) |
932 | 511 | { |
933 | 511 | current = json_object_new_boolean(0); |
934 | 511 | if (current == NULL) |
935 | 0 | { |
936 | 0 | tok->err = json_tokener_error_memory; |
937 | 0 | goto out; |
938 | 0 | } |
939 | 511 | saved_state = json_tokener_state_finish; |
940 | 511 | state = json_tokener_state_eatws; |
941 | 511 | goto redo_char; |
942 | 511 | } |
943 | 3.21k | } |
944 | 152 | else |
945 | 152 | { |
946 | 152 | tok->err = json_tokener_error_parse_boolean; |
947 | 152 | goto out; |
948 | 152 | } |
949 | 6.97k | tok->st_pos++; |
950 | 6.97k | } |
951 | 0 | break; |
952 | | |
953 | 192k | case json_tokener_state_number: |
954 | 192k | { |
955 | | /* Advance until we change state */ |
956 | 192k | const char *case_start = str; |
957 | 192k | int case_len = 0; |
958 | 192k | int is_exponent = 0; |
959 | 192k | int neg_sign_ok = 1; |
960 | 192k | int pos_sign_ok = 0; |
961 | 192k | if (printbuf_length(tok->pb) > 0) |
962 | 0 | { |
963 | | /* We don't save all state from the previous incremental parse |
964 | | so we need to re-generate it based on the saved string so far. |
965 | | */ |
966 | 0 | char *e_loc = strchr(tok->pb->buf, 'e'); |
967 | 0 | if (!e_loc) |
968 | 0 | e_loc = strchr(tok->pb->buf, 'E'); |
969 | 0 | if (e_loc) |
970 | 0 | { |
971 | 0 | char *last_saved_char = |
972 | 0 | &tok->pb->buf[printbuf_length(tok->pb) - 1]; |
973 | 0 | is_exponent = 1; |
974 | 0 | pos_sign_ok = neg_sign_ok = 1; |
975 | | /* If the "e" isn't at the end, we can't start with a '-' */ |
976 | 0 | if (e_loc != last_saved_char) |
977 | 0 | { |
978 | 0 | neg_sign_ok = 0; |
979 | 0 | pos_sign_ok = 0; |
980 | 0 | } |
981 | | // else leave it set to 1, i.e. start of the new input |
982 | 0 | } |
983 | 0 | } |
984 | | |
985 | 530k | while (c && ((c >= '0' && c <= '9') || |
986 | 530k | (!is_exponent && (c == 'e' || c == 'E')) || |
987 | 530k | (neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') || |
988 | 530k | (!tok->is_double && c == '.'))) |
989 | 338k | { |
990 | 338k | pos_sign_ok = neg_sign_ok = 0; |
991 | 338k | ++case_len; |
992 | | |
993 | | /* non-digit characters checks */ |
994 | | /* note: since the main loop condition to get here was |
995 | | * an input starting with 0-9 or '-', we are |
996 | | * protected from input starting with '.' or |
997 | | * e/E. |
998 | | */ |
999 | 338k | switch (c) |
1000 | 338k | { |
1001 | 820 | case '.': |
1002 | 820 | tok->is_double = 1; |
1003 | 820 | pos_sign_ok = 1; |
1004 | 820 | neg_sign_ok = 1; |
1005 | 820 | break; |
1006 | 1.68k | case 'e': /* FALLTHRU */ |
1007 | 3.23k | case 'E': |
1008 | 3.23k | is_exponent = 1; |
1009 | 3.23k | tok->is_double = 1; |
1010 | | /* the exponent part can begin with a negative sign */ |
1011 | 3.23k | pos_sign_ok = neg_sign_ok = 1; |
1012 | 3.23k | break; |
1013 | 334k | default: break; |
1014 | 338k | } |
1015 | | |
1016 | 338k | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
1017 | 0 | { |
1018 | 0 | printbuf_memappend_checked(tok->pb, case_start, case_len); |
1019 | 0 | goto out; |
1020 | 0 | } |
1021 | 338k | } |
1022 | | /* |
1023 | | Now we know c isn't a valid number char, but check whether |
1024 | | it might have been intended to be, and return a potentially |
1025 | | more understandable error right away. |
1026 | | However, if we're at the top-level, use the number as-is |
1027 | | because c can be part of a new object to parse on the |
1028 | | next call to json_tokener_parse(). |
1029 | | */ |
1030 | 192k | if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' && |
1031 | 192k | c != 'I' && c != 'i' && !is_ws_char(c)) |
1032 | 281 | { |
1033 | 281 | tok->err = json_tokener_error_parse_number; |
1034 | 281 | goto out; |
1035 | 281 | } |
1036 | 192k | if (case_len > 0) |
1037 | 192k | printbuf_memappend_checked(tok->pb, case_start, case_len); |
1038 | | |
1039 | | // Check for -Infinity |
1040 | 192k | if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I')) |
1041 | 497 | { |
1042 | 497 | state = json_tokener_state_inf; |
1043 | 497 | tok->st_pos = 0; |
1044 | 497 | goto redo_char; |
1045 | 497 | } |
1046 | 191k | if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT)) |
1047 | 3.61k | { |
1048 | | /* Trim some chars off the end, to allow things |
1049 | | like "123e+" to parse ok. */ |
1050 | 7.81k | while (printbuf_length(tok->pb) > 1) |
1051 | 5.98k | { |
1052 | 5.98k | char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1]; |
1053 | 5.98k | if (last_char != 'e' && last_char != 'E' && |
1054 | 5.98k | last_char != '-' && last_char != '+') |
1055 | 1.78k | { |
1056 | 1.78k | break; |
1057 | 1.78k | } |
1058 | 4.20k | tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0'; |
1059 | 4.20k | printbuf_length(tok->pb)--; |
1060 | 4.20k | } |
1061 | 3.61k | } |
1062 | 191k | } |
1063 | 0 | { |
1064 | 191k | int64_t num64; |
1065 | 191k | uint64_t numuint64; |
1066 | 191k | double numd; |
1067 | 191k | if (!tok->is_double && tok->pb->buf[0] == '-' && |
1068 | 191k | json_parse_int64(tok->pb->buf, &num64) == 0) |
1069 | 2.55k | { |
1070 | 2.55k | if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT)) |
1071 | 0 | { |
1072 | 0 | tok->err = json_tokener_error_parse_number; |
1073 | 0 | goto out; |
1074 | 0 | } |
1075 | 2.55k | current = json_object_new_int64(num64); |
1076 | 2.55k | if (current == NULL) |
1077 | 0 | { |
1078 | 0 | tok->err = json_tokener_error_memory; |
1079 | 0 | goto out; |
1080 | 0 | } |
1081 | 2.55k | } |
1082 | 189k | else if (!tok->is_double && tok->pb->buf[0] != '-' && |
1083 | 189k | json_parse_uint64(tok->pb->buf, &numuint64) == 0) |
1084 | 185k | { |
1085 | 185k | if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT)) |
1086 | 0 | { |
1087 | 0 | tok->err = json_tokener_error_parse_number; |
1088 | 0 | goto out; |
1089 | 0 | } |
1090 | 185k | if (numuint64 && tok->pb->buf[0] == '0' && |
1091 | 185k | (tok->flags & JSON_TOKENER_STRICT)) |
1092 | 0 | { |
1093 | 0 | tok->err = json_tokener_error_parse_number; |
1094 | 0 | goto out; |
1095 | 0 | } |
1096 | 185k | if (numuint64 <= INT64_MAX) |
1097 | 184k | { |
1098 | 184k | num64 = (uint64_t)numuint64; |
1099 | 184k | current = json_object_new_int64(num64); |
1100 | 184k | if (current == NULL) |
1101 | 0 | { |
1102 | 0 | tok->err = json_tokener_error_memory; |
1103 | 0 | goto out; |
1104 | 0 | } |
1105 | 184k | } |
1106 | 806 | else |
1107 | 806 | { |
1108 | 806 | current = json_object_new_uint64(numuint64); |
1109 | 806 | if (current == NULL) |
1110 | 0 | { |
1111 | 0 | tok->err = json_tokener_error_memory; |
1112 | 0 | goto out; |
1113 | 0 | } |
1114 | 806 | } |
1115 | 185k | } |
1116 | 3.64k | else if (tok->is_double && |
1117 | 3.64k | json_tokener_parse_double( |
1118 | 3.61k | tok->pb->buf, printbuf_length(tok->pb), &numd) == 0) |
1119 | 3.58k | { |
1120 | 3.58k | current = json_object_new_double_s(numd, tok->pb->buf); |
1121 | 3.58k | if (current == NULL) |
1122 | 0 | { |
1123 | 0 | tok->err = json_tokener_error_memory; |
1124 | 0 | goto out; |
1125 | 0 | } |
1126 | 3.58k | } |
1127 | 61 | else |
1128 | 61 | { |
1129 | 61 | tok->err = json_tokener_error_parse_number; |
1130 | 61 | goto out; |
1131 | 61 | } |
1132 | 191k | saved_state = json_tokener_state_finish; |
1133 | 191k | state = json_tokener_state_eatws; |
1134 | 191k | goto redo_char; |
1135 | 191k | } |
1136 | 0 | break; |
1137 | | |
1138 | 209k | case json_tokener_state_array_after_sep: |
1139 | 224k | case json_tokener_state_array: |
1140 | 224k | if (c == ']') |
1141 | 1.76k | { |
1142 | | // Minimize memory usage; assume parsed objs are unlikely to be changed |
1143 | 1.76k | json_object_array_shrink(current, 0); |
1144 | | |
1145 | 1.76k | if (state == json_tokener_state_array_after_sep && |
1146 | 1.76k | (tok->flags & JSON_TOKENER_STRICT)) |
1147 | 0 | { |
1148 | 0 | tok->err = json_tokener_error_parse_unexpected; |
1149 | 0 | goto out; |
1150 | 0 | } |
1151 | 1.76k | saved_state = json_tokener_state_finish; |
1152 | 1.76k | state = json_tokener_state_eatws; |
1153 | 1.76k | } |
1154 | 222k | else |
1155 | 222k | { |
1156 | 222k | if (tok->depth >= tok->max_depth - 1) |
1157 | 4 | { |
1158 | 4 | tok->err = json_tokener_error_depth; |
1159 | 4 | goto out; |
1160 | 4 | } |
1161 | 222k | state = json_tokener_state_array_add; |
1162 | 222k | tok->depth++; |
1163 | 222k | json_tokener_reset_level(tok, tok->depth); |
1164 | 222k | goto redo_char; |
1165 | 222k | } |
1166 | 1.76k | break; |
1167 | | |
1168 | 221k | case json_tokener_state_array_add: |
1169 | 221k | if (json_object_array_add(current, obj) != 0) |
1170 | 0 | { |
1171 | 0 | tok->err = json_tokener_error_memory; |
1172 | 0 | goto out; |
1173 | 0 | } |
1174 | 221k | saved_state = json_tokener_state_array_sep; |
1175 | 221k | state = json_tokener_state_eatws; |
1176 | 221k | goto redo_char; |
1177 | | |
1178 | 221k | case json_tokener_state_array_sep: |
1179 | 221k | if (c == ']') |
1180 | 11.7k | { |
1181 | | // Minimize memory usage; assume parsed objs are unlikely to be changed |
1182 | 11.7k | json_object_array_shrink(current, 0); |
1183 | | |
1184 | 11.7k | saved_state = json_tokener_state_finish; |
1185 | 11.7k | state = json_tokener_state_eatws; |
1186 | 11.7k | } |
1187 | 209k | else if (c == ',') |
1188 | 209k | { |
1189 | 209k | saved_state = json_tokener_state_array_after_sep; |
1190 | 209k | state = json_tokener_state_eatws; |
1191 | 209k | } |
1192 | 175 | else |
1193 | 175 | { |
1194 | 175 | tok->err = json_tokener_error_parse_array; |
1195 | 175 | goto out; |
1196 | 175 | } |
1197 | 220k | break; |
1198 | | |
1199 | 220k | case json_tokener_state_object_field_start: |
1200 | 157k | case json_tokener_state_object_field_start_after_sep: |
1201 | 157k | if (c == '}') |
1202 | 21.1k | { |
1203 | 21.1k | if (state == json_tokener_state_object_field_start_after_sep && |
1204 | 21.1k | (tok->flags & JSON_TOKENER_STRICT)) |
1205 | 0 | { |
1206 | 0 | tok->err = json_tokener_error_parse_unexpected; |
1207 | 0 | goto out; |
1208 | 0 | } |
1209 | 21.1k | saved_state = json_tokener_state_finish; |
1210 | 21.1k | state = json_tokener_state_eatws; |
1211 | 21.1k | } |
1212 | 136k | else if (c == '"' || c == '\'') |
1213 | 136k | { |
1214 | 136k | tok->quote_char = c; |
1215 | 136k | printbuf_reset(tok->pb); |
1216 | 136k | state = json_tokener_state_object_field; |
1217 | 136k | } |
1218 | 59 | else |
1219 | 59 | { |
1220 | 59 | tok->err = json_tokener_error_parse_object_key_name; |
1221 | 59 | goto out; |
1222 | 59 | } |
1223 | 157k | break; |
1224 | | |
1225 | 157k | case json_tokener_state_object_field: |
1226 | 147k | { |
1227 | | /* Advance until we change state */ |
1228 | 147k | const char *case_start = str; |
1229 | 2.24M | while (1) |
1230 | 2.24M | { |
1231 | 2.24M | if (c == tok->quote_char) |
1232 | 136k | { |
1233 | 136k | printbuf_memappend_checked(tok->pb, case_start, |
1234 | 136k | str - case_start); |
1235 | 136k | obj_field_name = strdup(tok->pb->buf); |
1236 | 136k | if (obj_field_name == NULL) |
1237 | 0 | { |
1238 | 0 | tok->err = json_tokener_error_memory; |
1239 | 0 | goto out; |
1240 | 0 | } |
1241 | 136k | saved_state = json_tokener_state_object_field_end; |
1242 | 136k | state = json_tokener_state_eatws; |
1243 | 136k | break; |
1244 | 136k | } |
1245 | 2.11M | else if (c == '\\') |
1246 | 10.8k | { |
1247 | 10.8k | printbuf_memappend_checked(tok->pb, case_start, |
1248 | 10.8k | str - case_start); |
1249 | 10.8k | saved_state = json_tokener_state_object_field; |
1250 | 10.8k | state = json_tokener_state_string_escape; |
1251 | 10.8k | break; |
1252 | 10.8k | } |
1253 | 2.10M | else if ((tok->flags & JSON_TOKENER_STRICT) && (unsigned char)c <= 0x1f) |
1254 | 0 | { |
1255 | | // Disallow control characters in strict mode |
1256 | 0 | tok->err = json_tokener_error_parse_string; |
1257 | 0 | goto out; |
1258 | 0 | } |
1259 | 2.10M | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
1260 | 204 | { |
1261 | 204 | printbuf_memappend_checked(tok->pb, case_start, |
1262 | 204 | str - case_start); |
1263 | 204 | goto out; |
1264 | 204 | } |
1265 | 2.10M | } |
1266 | 147k | } |
1267 | 147k | break; |
1268 | | |
1269 | 147k | case json_tokener_state_object_field_end: |
1270 | 136k | if (c == ':') |
1271 | 135k | { |
1272 | 135k | saved_state = json_tokener_state_object_value; |
1273 | 135k | state = json_tokener_state_eatws; |
1274 | 135k | } |
1275 | 142 | else |
1276 | 142 | { |
1277 | 142 | tok->err = json_tokener_error_parse_object_key_sep; |
1278 | 142 | goto out; |
1279 | 142 | } |
1280 | 135k | break; |
1281 | | |
1282 | 135k | case json_tokener_state_object_value: |
1283 | 135k | if (tok->depth >= tok->max_depth - 1) |
1284 | 3 | { |
1285 | 3 | tok->err = json_tokener_error_depth; |
1286 | 3 | goto out; |
1287 | 3 | } |
1288 | 135k | state = json_tokener_state_object_value_add; |
1289 | 135k | tok->depth++; |
1290 | 135k | json_tokener_reset_level(tok, tok->depth); |
1291 | 135k | goto redo_char; |
1292 | | |
1293 | 133k | case json_tokener_state_object_value_add: |
1294 | 133k | if (json_object_object_add(current, obj_field_name, obj) != 0) |
1295 | 0 | { |
1296 | 0 | tok->err = json_tokener_error_memory; |
1297 | 0 | goto out; |
1298 | 0 | } |
1299 | 133k | free(obj_field_name); |
1300 | 133k | obj_field_name = NULL; |
1301 | 133k | saved_state = json_tokener_state_object_sep; |
1302 | 133k | state = json_tokener_state_eatws; |
1303 | 133k | goto redo_char; |
1304 | | |
1305 | 133k | case json_tokener_state_object_sep: |
1306 | | /* { */ |
1307 | 133k | if (c == '}') |
1308 | 34.3k | { |
1309 | 34.3k | saved_state = json_tokener_state_finish; |
1310 | 34.3k | state = json_tokener_state_eatws; |
1311 | 34.3k | } |
1312 | 99.6k | else if (c == ',') |
1313 | 99.3k | { |
1314 | 99.3k | saved_state = json_tokener_state_object_field_start_after_sep; |
1315 | 99.3k | state = json_tokener_state_eatws; |
1316 | 99.3k | } |
1317 | 291 | else |
1318 | 291 | { |
1319 | 291 | tok->err = json_tokener_error_parse_object_value_sep; |
1320 | 291 | goto out; |
1321 | 291 | } |
1322 | 133k | break; |
1323 | 4.50M | } |
1324 | 1.26M | (void)ADVANCE_CHAR(str, tok); |
1325 | 1.26M | if (!c) // This is the char *before* advancing |
1326 | 9 | break; |
1327 | 1.26M | } /* while(PEEK_CHAR) */ |
1328 | | |
1329 | 10.3k | out: |
1330 | 10.3k | if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0)) |
1331 | 0 | { |
1332 | 0 | tok->err = json_tokener_error_parse_utf8_string; |
1333 | 0 | } |
1334 | 10.3k | if (c && (state == json_tokener_state_finish) && (tok->depth == 0) && |
1335 | 10.3k | (tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) == |
1336 | 94 | JSON_TOKENER_STRICT) |
1337 | 0 | { |
1338 | | /* unexpected char after JSON data */ |
1339 | 0 | tok->err = json_tokener_error_parse_unexpected; |
1340 | 0 | } |
1341 | 10.3k | if (!c) |
1342 | 8.71k | { |
1343 | | /* We hit an eof char (0) */ |
1344 | 8.71k | if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish) |
1345 | 785 | tok->err = json_tokener_error_parse_eof; |
1346 | 8.71k | } |
1347 | | |
1348 | 10.3k | #ifdef HAVE_USELOCALE |
1349 | 10.3k | uselocale(oldlocale); |
1350 | 10.3k | freelocale(newloc); |
1351 | | #elif defined(HAVE_SETLOCALE) |
1352 | | setlocale(LC_NUMERIC, oldlocale); |
1353 | | free(oldlocale); |
1354 | | #endif |
1355 | | |
1356 | 10.3k | if (tok->err == json_tokener_success) |
1357 | 8.02k | { |
1358 | 8.02k | json_object *ret = json_object_get(current); |
1359 | 8.02k | int ii; |
1360 | | |
1361 | | /* Partially reset, so we parse additional objects on subsequent calls. */ |
1362 | 16.3k | for (ii = tok->depth; ii >= 0; ii--) |
1363 | 8.36k | json_tokener_reset_level(tok, ii); |
1364 | 8.02k | return ret; |
1365 | 8.02k | } |
1366 | | |
1367 | 2.35k | MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err], |
1368 | 2.35k | tok->char_offset); |
1369 | 2.35k | return NULL; |
1370 | 10.3k | } |
1371 | | |
1372 | | static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes) |
1373 | 0 | { |
1374 | 0 | unsigned char chr = c; |
1375 | 0 | if (*nBytes == 0) |
1376 | 0 | { |
1377 | 0 | if (chr >= 0x80) |
1378 | 0 | { |
1379 | 0 | if ((chr & 0xe0) == 0xc0) |
1380 | 0 | *nBytes = 1; |
1381 | 0 | else if ((chr & 0xf0) == 0xe0) |
1382 | 0 | *nBytes = 2; |
1383 | 0 | else if ((chr & 0xf8) == 0xf0) |
1384 | 0 | *nBytes = 3; |
1385 | 0 | else |
1386 | 0 | return 0; |
1387 | 0 | } |
1388 | 0 | } |
1389 | 0 | else |
1390 | 0 | { |
1391 | 0 | if ((chr & 0xC0) != 0x80) |
1392 | 0 | return 0; |
1393 | 0 | (*nBytes)--; |
1394 | 0 | } |
1395 | 0 | return 1; |
1396 | 0 | } |
1397 | | |
1398 | | void json_tokener_set_flags(struct json_tokener *tok, int flags) |
1399 | 0 | { |
1400 | 0 | tok->flags = flags; |
1401 | 0 | } |
1402 | | |
1403 | | size_t json_tokener_get_parse_end(struct json_tokener *tok) |
1404 | 0 | { |
1405 | 0 | assert(tok->char_offset >= 0); /* Drop this line when char_offset becomes a size_t */ |
1406 | 0 | return (size_t)tok->char_offset; |
1407 | 0 | } |
1408 | | |
1409 | | static int json_tokener_parse_double(const char *buf, int len, double *retval) |
1410 | 3.61k | { |
1411 | 3.61k | char *end; |
1412 | 3.61k | *retval = strtod(buf, &end); |
1413 | 3.61k | if (buf + len == end) |
1414 | 3.58k | return 0; // It worked |
1415 | 32 | return 1; |
1416 | 3.61k | } |