/src/json-c/json_tokener.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $ |
3 | | * |
4 | | * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd. |
5 | | * Michael Clark <michael@metaparadigm.com> |
6 | | * |
7 | | * This library is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the MIT license. See COPYING for details. |
9 | | * |
10 | | * |
11 | | * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved. |
12 | | * The copyrights to the contents of this file are licensed under the MIT License |
13 | | * (https://www.opensource.org/licenses/mit-license.php) |
14 | | */ |
15 | | |
16 | | #include "config.h" |
17 | | |
18 | | #include "math_compat.h" |
19 | | #include <assert.h> |
20 | | #include <errno.h> |
21 | | #include <limits.h> |
22 | | #include <math.h> |
23 | | #include <stddef.h> |
24 | | #include <stdio.h> |
25 | | #include <stdlib.h> |
26 | | #include <string.h> |
27 | | |
28 | | #include "debug.h" |
29 | | #include "json_inttypes.h" |
30 | | #include "json_object.h" |
31 | | #include "json_object_private.h" |
32 | | #include "json_tokener.h" |
33 | | #include "json_util.h" |
34 | | #include "printbuf.h" |
35 | | #include "strdup_compat.h" |
36 | | |
37 | | #ifdef HAVE_LOCALE_H |
38 | | #include <locale.h> |
39 | | #endif /* HAVE_LOCALE_H */ |
40 | | #ifdef HAVE_XLOCALE_H |
41 | | #include <xlocale.h> |
42 | | #endif |
43 | | #ifdef HAVE_STRINGS_H |
44 | | #include <strings.h> |
45 | | #endif /* HAVE_STRINGS_H */ |
46 | | |
47 | 47.6k | #define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9) |
48 | | |
49 | | #if !HAVE_STRNCASECMP && defined(_MSC_VER) |
50 | | /* MSC has the version as _strnicmp */ |
51 | | #define strncasecmp _strnicmp |
52 | | #elif !HAVE_STRNCASECMP |
53 | | #error You do not have strncasecmp on your system. |
54 | | #endif /* HAVE_STRNCASECMP */ |
55 | | |
56 | | #if defined(_MSC_VER) && (_MSC_VER <= 1800) |
57 | | /* VS2013 doesn't know about "inline" */ |
58 | | #define inline __inline |
59 | | #elif defined(AIX_CC) |
60 | | #define inline |
61 | | #endif |
62 | | |
63 | | /* The following helper functions are used to speed up parsing. They |
64 | | * are faster than their ctype counterparts because they assume that |
65 | | * the input is in ASCII and that the locale is set to "C". The |
66 | | * compiler will also inline these functions, providing an additional |
67 | | * speedup by saving on function calls. |
68 | | */ |
69 | | static inline int is_ws_char(char c) |
70 | 37.6M | { |
71 | 37.6M | return c == ' ' |
72 | 37.6M | || c == '\t' |
73 | 37.6M | || c == '\n' |
74 | 37.6M | || c == '\r'; |
75 | 37.6M | } |
76 | | |
77 | | static inline int is_hex_char(char c) |
78 | 47.7k | { |
79 | 47.7k | return (c >= '0' && c <= '9') |
80 | 47.7k | || (c >= 'A' && c <= 'F') |
81 | 47.7k | || (c >= 'a' && c <= 'f'); |
82 | 47.7k | } |
83 | | |
84 | | /* Use C99 NAN by default; if not available, nan("") should work too. */ |
85 | | #ifndef NAN |
86 | | #define NAN nan("") |
87 | | #endif /* !NAN */ |
88 | | |
89 | | static const char json_null_str[] = "null"; |
90 | | static const int json_null_str_len = sizeof(json_null_str) - 1; |
91 | | static const char json_inf_str[] = "Infinity"; |
92 | | /* Swapped case "Infinity" to avoid need to call tolower() on input chars: */ |
93 | | static const char json_inf_str_invert[] = "iNFINITY"; |
94 | | static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1; |
95 | | static const char json_nan_str[] = "NaN"; |
96 | | static const int json_nan_str_len = sizeof(json_nan_str) - 1; |
97 | | static const char json_true_str[] = "true"; |
98 | | static const int json_true_str_len = sizeof(json_true_str) - 1; |
99 | | static const char json_false_str[] = "false"; |
100 | | static const int json_false_str_len = sizeof(json_false_str) - 1; |
101 | | |
102 | | /* clang-format off */ |
103 | | static const char *json_tokener_errors[] = { |
104 | | "success", |
105 | | "continue", |
106 | | "nesting too deep", |
107 | | "unexpected end of data", |
108 | | "unexpected character", |
109 | | "null expected", |
110 | | "boolean expected", |
111 | | "number expected", |
112 | | "array value separator ',' expected", |
113 | | "quoted object property name expected", |
114 | | "object property name separator ':' expected", |
115 | | "object value separator ',' expected", |
116 | | "invalid string sequence", |
117 | | "expected comment", |
118 | | "invalid utf-8 string", |
119 | | "buffer size overflow", |
120 | | "out of memory" |
121 | | }; |
122 | | /* clang-format on */ |
123 | | |
124 | | /** |
125 | | * validete the utf-8 string in strict model. |
126 | | * if not utf-8 format, return err. |
127 | | */ |
128 | | static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes); |
129 | | |
130 | | static int json_tokener_parse_double(const char *buf, int len, double *retval); |
131 | | |
132 | | const char *json_tokener_error_desc(enum json_tokener_error jerr) |
133 | 0 | { |
134 | 0 | int jerr_int = (int)jerr; |
135 | 0 | if (jerr_int < 0 || |
136 | 0 | jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0]))) |
137 | 0 | return "Unknown error, " |
138 | 0 | "invalid json_tokener_error value passed to json_tokener_error_desc()"; |
139 | 0 | return json_tokener_errors[jerr]; |
140 | 0 | } |
141 | | |
142 | | enum json_tokener_error json_tokener_get_error(struct json_tokener *tok) |
143 | 0 | { |
144 | 0 | return tok->err; |
145 | 0 | } |
146 | | |
147 | | /* Stuff for decoding unicode sequences */ |
148 | 7.13k | #define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800) |
149 | 5.06k | #define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00) |
150 | 839 | #define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000) |
151 | | static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD}; |
152 | | |
153 | | struct json_tokener *json_tokener_new_ex(int depth) |
154 | 14.9k | { |
155 | 14.9k | struct json_tokener *tok; |
156 | | |
157 | 14.9k | if (depth < 1) |
158 | 0 | return NULL; |
159 | | |
160 | 14.9k | tok = (struct json_tokener *)calloc(1, sizeof(struct json_tokener)); |
161 | 14.9k | if (!tok) |
162 | 0 | return NULL; |
163 | 14.9k | tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec)); |
164 | 14.9k | if (!tok->stack) |
165 | 0 | { |
166 | 0 | free(tok); |
167 | 0 | return NULL; |
168 | 0 | } |
169 | 14.9k | tok->pb = printbuf_new(); |
170 | 14.9k | if (!tok->pb) |
171 | 0 | { |
172 | 0 | free(tok->stack); |
173 | 0 | free(tok); |
174 | 0 | return NULL; |
175 | 0 | } |
176 | 14.9k | tok->max_depth = depth; |
177 | 14.9k | json_tokener_reset(tok); |
178 | 14.9k | return tok; |
179 | 14.9k | } |
180 | | |
181 | | struct json_tokener *json_tokener_new(void) |
182 | 11.9k | { |
183 | 11.9k | return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH); |
184 | 11.9k | } |
185 | | |
186 | | void json_tokener_free(struct json_tokener *tok) |
187 | 14.9k | { |
188 | 14.9k | if (!tok) |
189 | 0 | return; |
190 | 14.9k | json_tokener_reset(tok); |
191 | 14.9k | if (tok->pb) |
192 | 14.9k | printbuf_free(tok->pb); |
193 | 14.9k | free(tok->stack); |
194 | 14.9k | free(tok); |
195 | 14.9k | } |
196 | | |
197 | | static void json_tokener_reset_level(struct json_tokener *tok, int depth) |
198 | 18.7M | { |
199 | 18.7M | tok->stack[depth].state = json_tokener_state_eatws; |
200 | 18.7M | tok->stack[depth].saved_state = json_tokener_state_start; |
201 | 18.7M | json_object_put(tok->stack[depth].current); |
202 | 18.7M | tok->stack[depth].current = NULL; |
203 | 18.7M | free(tok->stack[depth].obj_field_name); |
204 | 18.7M | tok->stack[depth].obj_field_name = NULL; |
205 | 18.7M | } |
206 | | |
207 | | void json_tokener_reset(struct json_tokener *tok) |
208 | 29.9k | { |
209 | 29.9k | int i; |
210 | 29.9k | if (!tok) |
211 | 0 | return; |
212 | | |
213 | 65.6k | for (i = tok->depth; i >= 0; i--) |
214 | 35.6k | json_tokener_reset_level(tok, i); |
215 | 29.9k | tok->depth = 0; |
216 | 29.9k | tok->err = json_tokener_success; |
217 | 29.9k | } |
218 | | |
219 | | struct json_object *json_tokener_parse(const char *str) |
220 | 8.88k | { |
221 | 8.88k | enum json_tokener_error jerr_ignored; |
222 | 8.88k | struct json_object *obj; |
223 | 8.88k | obj = json_tokener_parse_verbose(str, &jerr_ignored); |
224 | 8.88k | return obj; |
225 | 8.88k | } |
226 | | |
227 | | struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokener_error *error) |
228 | 8.88k | { |
229 | 8.88k | struct json_tokener *tok; |
230 | 8.88k | struct json_object *obj; |
231 | | |
232 | 8.88k | tok = json_tokener_new(); |
233 | 8.88k | if (!tok) |
234 | 0 | { |
235 | 0 | *error = json_tokener_error_memory; |
236 | 0 | return NULL; |
237 | 0 | } |
238 | 8.88k | obj = json_tokener_parse_ex(tok, str, -1); |
239 | 8.88k | *error = tok->err; |
240 | 8.88k | if (tok->err != json_tokener_success |
241 | | #if 0 |
242 | | /* This would be a more sensible default, and cause parsing |
243 | | * things like "null123" to fail when the caller can't know |
244 | | * where the parsing left off, but starting to fail would |
245 | | * be a notable behaviour change. Save for a 1.0 release. |
246 | | */ |
247 | | || json_tokener_get_parse_end(tok) != strlen(str) |
248 | | #endif |
249 | 8.88k | ) |
250 | | |
251 | 7.51k | { |
252 | 7.51k | if (obj != NULL) |
253 | 0 | json_object_put(obj); |
254 | 7.51k | obj = NULL; |
255 | 7.51k | } |
256 | | |
257 | 8.88k | json_tokener_free(tok); |
258 | 8.88k | return obj; |
259 | 8.88k | } |
260 | | |
261 | 180M | #define state tok->stack[tok->depth].state |
262 | 65.8M | #define saved_state tok->stack[tok->depth].saved_state |
263 | 37.4M | #define current tok->stack[tok->depth].current |
264 | 267k | #define obj_field_name tok->stack[tok->depth].obj_field_name |
265 | | |
266 | | /* Optimization: |
267 | | * json_tokener_parse_ex() consumed a lot of CPU in its main loop, |
268 | | * iterating character-by character. A large performance boost is |
269 | | * achieved by using tighter loops to locally handle units such as |
270 | | * comments and strings. Loops that handle an entire token within |
271 | | * their scope also gather entire strings and pass them to |
272 | | * printbuf_memappend() in a single call, rather than calling |
273 | | * printbuf_memappend() one char at a time. |
274 | | * |
275 | | * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is |
276 | | * common to both the main loop and the tighter loops. |
277 | | */ |
278 | | |
279 | | /* PEEK_CHAR(dest, tok) macro: |
280 | | * Peeks at the current char and stores it in dest. |
281 | | * Returns 1 on success, sets tok->err and returns 0 if no more chars. |
282 | | * Implicit inputs: str, len, nBytesp vars |
283 | | */ |
284 | | #define PEEK_CHAR(dest, tok) \ |
285 | 115M | (((tok)->char_offset == len) \ |
286 | 115M | ? (((tok)->depth == 0 && state == json_tokener_state_eatws && \ |
287 | 4.42k | saved_state == json_tokener_state_finish) \ |
288 | 4.42k | ? (((tok)->err = json_tokener_success), 0) \ |
289 | 4.42k | : (((tok)->err = json_tokener_continue), 0)) \ |
290 | 115M | : (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && \ |
291 | 115M | (!json_tokener_validate_utf8(*str, nBytesp))) \ |
292 | 115M | ? ((tok->err = json_tokener_error_parse_utf8_string), 0) \ |
293 | 115M | : (((dest) = *str), 1))) |
294 | | |
295 | | /* ADVANCE_CHAR() macro: |
296 | | * Increments str & tok->char_offset. |
297 | | * For convenience of existing conditionals, returns the old value of c (0 on eof). |
298 | | * Implicit inputs: c var |
299 | | */ |
300 | 219M | #define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c) |
301 | | |
302 | | /* printbuf_memappend_checked(p, s, l) macro: |
303 | | * Add string s of length l to printbuffer p. |
304 | | * If operation fails abort parse operation with memory error. |
305 | | */ |
306 | | #define printbuf_memappend_checked(p, s, l) \ |
307 | 10.3M | do { \ |
308 | 10.3M | if (printbuf_memappend((p), (s), (l)) < 0) \ |
309 | 10.3M | { \ |
310 | 0 | tok->err = json_tokener_error_memory; \ |
311 | 0 | goto out; \ |
312 | 0 | } \ |
313 | 10.3M | } while (0) |
314 | | |
315 | | /* End optimization macro defs */ |
316 | | |
317 | | struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len) |
318 | 14.9k | { |
319 | 14.9k | struct json_object *obj = NULL; |
320 | 14.9k | char c = '\1'; |
321 | 14.9k | unsigned int nBytes = 0; |
322 | 14.9k | unsigned int *nBytesp = &nBytes; |
323 | | |
324 | 14.9k | #ifdef HAVE_USELOCALE |
325 | 14.9k | locale_t oldlocale = uselocale(NULL); |
326 | 14.9k | locale_t newloc; |
327 | | #elif defined(HAVE_SETLOCALE) |
328 | | char *oldlocale = NULL; |
329 | | #endif |
330 | | |
331 | 14.9k | tok->char_offset = 0; |
332 | 14.9k | tok->err = json_tokener_success; |
333 | | |
334 | | /* this interface is presently not 64-bit clean due to the int len argument |
335 | | * and the internal printbuf interface that takes 32-bit int len arguments |
336 | | * so the function limits the maximum string size to INT32_MAX (2GB). |
337 | | * If the function is called with len == -1 then strlen is called to check |
338 | | * the string length is less than INT32_MAX (2GB) |
339 | | */ |
340 | 14.9k | if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX)) |
341 | 0 | { |
342 | 0 | tok->err = json_tokener_error_size; |
343 | 0 | return NULL; |
344 | 0 | } |
345 | | |
346 | 14.9k | #ifdef HAVE_USELOCALE |
347 | 14.9k | { |
348 | 14.9k | #ifdef HAVE_DUPLOCALE |
349 | 14.9k | locale_t duploc = duplocale(oldlocale); |
350 | 14.9k | if (duploc == NULL && errno == ENOMEM) |
351 | 0 | { |
352 | 0 | tok->err = json_tokener_error_memory; |
353 | 0 | return NULL; |
354 | 0 | } |
355 | 14.9k | newloc = newlocale(LC_NUMERIC_MASK, "C", duploc); |
356 | | #else |
357 | | newloc = newlocale(LC_NUMERIC_MASK, "C", oldlocale); |
358 | | #endif |
359 | 14.9k | if (newloc == NULL) |
360 | 0 | { |
361 | 0 | tok->err = json_tokener_error_memory; |
362 | 0 | #ifdef HAVE_DUPLOCALE |
363 | 0 | freelocale(duploc); |
364 | 0 | #endif |
365 | 0 | return NULL; |
366 | 0 | } |
367 | | #ifdef NEWLOCALE_NEEDS_FREELOCALE |
368 | | #ifdef HAVE_DUPLOCALE |
369 | | // Older versions of FreeBSD (<12.4) don't free the locale |
370 | | // passed to newlocale(), so do it here |
371 | | freelocale(duploc); |
372 | | #endif |
373 | | #endif |
374 | 14.9k | uselocale(newloc); |
375 | 14.9k | } |
376 | | #elif defined(HAVE_SETLOCALE) |
377 | | { |
378 | | char *tmplocale; |
379 | | tmplocale = setlocale(LC_NUMERIC, NULL); |
380 | | if (tmplocale) |
381 | | { |
382 | | oldlocale = strdup(tmplocale); |
383 | | if (oldlocale == NULL) |
384 | | { |
385 | | tok->err = json_tokener_error_memory; |
386 | | return NULL; |
387 | | } |
388 | | } |
389 | | setlocale(LC_NUMERIC, "C"); |
390 | | } |
391 | | #endif |
392 | | |
393 | 10.5M | while (PEEK_CHAR(c, tok)) // Note: c might be '\0' ! |
394 | 10.5M | { |
395 | | |
396 | 94.9M | redo_char: |
397 | 94.9M | switch (state) |
398 | 94.9M | { |
399 | | |
400 | 37.6M | case json_tokener_state_eatws: |
401 | | /* Advance until we change state */ |
402 | 37.6M | while (is_ws_char(c)) |
403 | 40.2k | { |
404 | 40.2k | if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok))) |
405 | 137 | goto out; |
406 | 40.2k | } |
407 | 37.6M | if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) |
408 | 3.06k | { |
409 | 3.06k | printbuf_reset(tok->pb); |
410 | 3.06k | printbuf_memappend_checked(tok->pb, &c, 1); |
411 | 3.06k | state = json_tokener_state_comment_start; |
412 | 3.06k | } |
413 | 37.5M | else |
414 | 37.5M | { |
415 | 37.5M | state = saved_state; |
416 | 37.5M | goto redo_char; |
417 | 37.5M | } |
418 | 3.06k | break; |
419 | | |
420 | 9.38M | case json_tokener_state_start: |
421 | 9.38M | switch (c) |
422 | 9.38M | { |
423 | 10.9k | case '{': |
424 | 10.9k | state = json_tokener_state_eatws; |
425 | 10.9k | saved_state = json_tokener_state_object_field_start; |
426 | 10.9k | current = json_object_new_object(); |
427 | 10.9k | if (current == NULL) |
428 | 0 | { |
429 | 0 | tok->err = json_tokener_error_memory; |
430 | 0 | goto out; |
431 | 0 | } |
432 | 10.9k | break; |
433 | 18.0k | case '[': |
434 | 18.0k | state = json_tokener_state_eatws; |
435 | 18.0k | saved_state = json_tokener_state_array; |
436 | 18.0k | current = json_object_new_array(); |
437 | 18.0k | if (current == NULL) |
438 | 0 | { |
439 | 0 | tok->err = json_tokener_error_memory; |
440 | 0 | goto out; |
441 | 0 | } |
442 | 18.0k | break; |
443 | 18.0k | case 'I': |
444 | 8.21k | case 'i': |
445 | 8.21k | state = json_tokener_state_inf; |
446 | 8.21k | printbuf_reset(tok->pb); |
447 | 8.21k | tok->st_pos = 0; |
448 | 8.21k | goto redo_char; |
449 | 1.95k | case 'N': |
450 | 5.05k | case 'n': |
451 | 5.05k | state = json_tokener_state_null; // or NaN |
452 | 5.05k | printbuf_reset(tok->pb); |
453 | 5.05k | tok->st_pos = 0; |
454 | 5.05k | goto redo_char; |
455 | 22.1k | case '\'': |
456 | 22.1k | if (tok->flags & JSON_TOKENER_STRICT) |
457 | 2 | { |
458 | | /* in STRICT mode only double-quote are allowed */ |
459 | 2 | tok->err = json_tokener_error_parse_unexpected; |
460 | 2 | goto out; |
461 | 2 | } |
462 | | /* FALLTHRU */ |
463 | 24.6k | case '"': |
464 | 24.6k | state = json_tokener_state_string; |
465 | 24.6k | printbuf_reset(tok->pb); |
466 | 24.6k | tok->quote_char = c; |
467 | 24.6k | break; |
468 | 727 | case 'T': |
469 | 1.74k | case 't': |
470 | 2.54k | case 'F': |
471 | 3.64k | case 'f': |
472 | 3.64k | state = json_tokener_state_boolean; |
473 | 3.64k | printbuf_reset(tok->pb); |
474 | 3.64k | tok->st_pos = 0; |
475 | 3.64k | goto redo_char; |
476 | 126k | case '0': |
477 | 386k | case '1': |
478 | 2.26M | case '2': |
479 | 2.44M | case '3': |
480 | 2.67M | case '4': |
481 | 2.73M | case '5': |
482 | 8.85M | case '6': |
483 | 8.89M | case '7': |
484 | 9.15M | case '8': |
485 | 9.29M | case '9': |
486 | 9.30M | case '-': |
487 | 9.30M | state = json_tokener_state_number; |
488 | 9.30M | printbuf_reset(tok->pb); |
489 | 9.30M | tok->is_double = 0; |
490 | 9.30M | goto redo_char; |
491 | 4.70k | default: tok->err = json_tokener_error_parse_unexpected; goto out; |
492 | 9.38M | } |
493 | 53.6k | break; |
494 | | |
495 | 9.36M | case json_tokener_state_finish: |
496 | 9.36M | if (tok->depth == 0) |
497 | 2.00k | goto out; |
498 | 9.36M | obj = json_object_get(current); |
499 | 9.36M | json_tokener_reset_level(tok, tok->depth); |
500 | 9.36M | tok->depth--; |
501 | 9.36M | goto redo_char; |
502 | | |
503 | 11.6k | case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */ |
504 | 11.6k | { |
505 | | /* If we were guaranteed to have len set, then we could (usually) handle |
506 | | * the entire "Infinity" check in a single strncmp (strncasecmp), but |
507 | | * since len might be -1 (i.e. "read until \0"), we need to check it |
508 | | * a character at a time. |
509 | | * Trying to handle it both ways would make this code considerably more |
510 | | * complicated with likely little performance benefit. |
511 | | */ |
512 | 11.6k | int is_negative = 0; |
513 | | |
514 | | /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */ |
515 | 102k | while (tok->st_pos < (int)json_inf_str_len) |
516 | 91.3k | { |
517 | 91.3k | char inf_char = *str; |
518 | 91.3k | if (inf_char != json_inf_str[tok->st_pos] && |
519 | 91.3k | ((tok->flags & JSON_TOKENER_STRICT) || |
520 | 16.3k | inf_char != json_inf_str_invert[tok->st_pos]) |
521 | 91.3k | ) |
522 | 196 | { |
523 | 196 | tok->err = json_tokener_error_parse_unexpected; |
524 | 196 | goto out; |
525 | 196 | } |
526 | 91.1k | tok->st_pos++; |
527 | 91.1k | (void)ADVANCE_CHAR(str, tok); |
528 | 91.1k | if (!PEEK_CHAR(c, tok)) |
529 | 104 | { |
530 | | /* out of input chars, for now at least */ |
531 | 104 | goto out; |
532 | 104 | } |
533 | 91.1k | } |
534 | | /* We checked the full length of "Infinity", so create the object. |
535 | | * When handling -Infinity, the number parsing code will have dropped |
536 | | * the "-" into tok->pb for us, so check it now. |
537 | | */ |
538 | 11.3k | if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-') |
539 | 3.34k | { |
540 | 3.34k | is_negative = 1; |
541 | 3.34k | } |
542 | 11.3k | current = json_object_new_double(is_negative ? -INFINITY : INFINITY); |
543 | 11.3k | if (current == NULL) |
544 | 0 | { |
545 | 0 | tok->err = json_tokener_error_memory; |
546 | 0 | goto out; |
547 | 0 | } |
548 | 11.3k | saved_state = json_tokener_state_finish; |
549 | 11.3k | state = json_tokener_state_eatws; |
550 | 11.3k | goto redo_char; |
551 | 11.3k | } |
552 | 0 | break; |
553 | 22.2k | case json_tokener_state_null: /* aka starts with 'n' */ |
554 | 22.2k | { |
555 | 22.2k | int size; |
556 | 22.2k | int size_nan; |
557 | 22.2k | printbuf_memappend_checked(tok->pb, &c, 1); |
558 | 22.2k | size = json_min(tok->st_pos + 1, json_null_str_len); |
559 | 22.2k | size_nan = json_min(tok->st_pos + 1, json_nan_str_len); |
560 | 22.2k | if ((!(tok->flags & JSON_TOKENER_STRICT) && |
561 | 22.2k | strncasecmp(json_null_str, tok->pb->buf, size) == 0) || |
562 | 22.2k | (strncmp(json_null_str, tok->pb->buf, size) == 0)) |
563 | 15.4k | { |
564 | 15.4k | if (tok->st_pos == json_null_str_len) |
565 | 2.60k | { |
566 | 2.60k | current = NULL; |
567 | 2.60k | saved_state = json_tokener_state_finish; |
568 | 2.60k | state = json_tokener_state_eatws; |
569 | 2.60k | goto redo_char; |
570 | 2.60k | } |
571 | 15.4k | } |
572 | 6.74k | else if ((!(tok->flags & JSON_TOKENER_STRICT) && |
573 | 6.74k | strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) || |
574 | 6.74k | (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0)) |
575 | 6.42k | { |
576 | 6.42k | if (tok->st_pos == json_nan_str_len) |
577 | 2.05k | { |
578 | 2.05k | current = json_object_new_double(NAN); |
579 | 2.05k | if (current == NULL) |
580 | 0 | { |
581 | 0 | tok->err = json_tokener_error_memory; |
582 | 0 | goto out; |
583 | 0 | } |
584 | 2.05k | saved_state = json_tokener_state_finish; |
585 | 2.05k | state = json_tokener_state_eatws; |
586 | 2.05k | goto redo_char; |
587 | 2.05k | } |
588 | 6.42k | } |
589 | 324 | else |
590 | 324 | { |
591 | 324 | tok->err = json_tokener_error_parse_null; |
592 | 324 | goto out; |
593 | 324 | } |
594 | 17.2k | tok->st_pos++; |
595 | 17.2k | } |
596 | 0 | break; |
597 | | |
598 | 3.02k | case json_tokener_state_comment_start: |
599 | 3.02k | if (c == '*') |
600 | 983 | { |
601 | 983 | state = json_tokener_state_comment; |
602 | 983 | } |
603 | 2.04k | else if (c == '/') |
604 | 1.90k | { |
605 | 1.90k | state = json_tokener_state_comment_eol; |
606 | 1.90k | } |
607 | 144 | else |
608 | 144 | { |
609 | 144 | tok->err = json_tokener_error_parse_comment; |
610 | 144 | goto out; |
611 | 144 | } |
612 | 2.88k | printbuf_memappend_checked(tok->pb, &c, 1); |
613 | 2.88k | break; |
614 | | |
615 | 410k | case json_tokener_state_comment: |
616 | 410k | { |
617 | | /* Advance until we change state */ |
618 | 410k | const char *case_start = str; |
619 | 14.0M | while (c != '*') |
620 | 13.5M | { |
621 | 13.5M | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
622 | 325 | { |
623 | 325 | printbuf_memappend_checked(tok->pb, case_start, |
624 | 325 | str - case_start); |
625 | 325 | goto out; |
626 | 325 | } |
627 | 13.5M | } |
628 | 410k | printbuf_memappend_checked(tok->pb, case_start, 1 + str - case_start); |
629 | 410k | state = json_tokener_state_comment_end; |
630 | 410k | } |
631 | 0 | break; |
632 | | |
633 | 1.89k | case json_tokener_state_comment_eol: |
634 | 1.89k | { |
635 | | /* Advance until we change state */ |
636 | 1.89k | const char *case_start = str; |
637 | 6.82M | while (c != '\n') |
638 | 6.82M | { |
639 | 6.82M | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
640 | 272 | { |
641 | 272 | printbuf_memappend_checked(tok->pb, case_start, |
642 | 272 | str - case_start); |
643 | 272 | goto out; |
644 | 272 | } |
645 | 6.82M | } |
646 | 1.61k | printbuf_memappend_checked(tok->pb, case_start, str - case_start); |
647 | 1.61k | MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); |
648 | 1.61k | state = json_tokener_state_eatws; |
649 | 1.61k | } |
650 | 0 | break; |
651 | | |
652 | 410k | case json_tokener_state_comment_end: |
653 | 410k | printbuf_memappend_checked(tok->pb, &c, 1); |
654 | 410k | if (c == '/') |
655 | 496 | { |
656 | 496 | MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); |
657 | 496 | state = json_tokener_state_eatws; |
658 | 496 | } |
659 | 410k | else |
660 | 410k | { |
661 | 410k | state = json_tokener_state_comment; |
662 | 410k | } |
663 | 410k | break; |
664 | | |
665 | 34.4k | case json_tokener_state_string: |
666 | 34.4k | { |
667 | | /* Advance until we change state */ |
668 | 34.4k | const char *case_start = str; |
669 | 25.3M | while (1) |
670 | 25.3M | { |
671 | 25.3M | if (c == tok->quote_char) |
672 | 23.1k | { |
673 | 23.1k | printbuf_memappend_checked(tok->pb, case_start, |
674 | 23.1k | str - case_start); |
675 | 23.1k | current = |
676 | 23.1k | json_object_new_string_len(tok->pb->buf, tok->pb->bpos); |
677 | 23.1k | if (current == NULL) |
678 | 0 | { |
679 | 0 | tok->err = json_tokener_error_memory; |
680 | 0 | goto out; |
681 | 0 | } |
682 | 23.1k | saved_state = json_tokener_state_finish; |
683 | 23.1k | state = json_tokener_state_eatws; |
684 | 23.1k | break; |
685 | 23.1k | } |
686 | 25.3M | else if (c == '\\') |
687 | 10.5k | { |
688 | 10.5k | printbuf_memappend_checked(tok->pb, case_start, |
689 | 10.5k | str - case_start); |
690 | 10.5k | saved_state = json_tokener_state_string; |
691 | 10.5k | state = json_tokener_state_string_escape; |
692 | 10.5k | break; |
693 | 10.5k | } |
694 | 25.3M | else if ((tok->flags & JSON_TOKENER_STRICT) && (unsigned char)c <= 0x1f) |
695 | 7 | { |
696 | | // Disallow control characters in strict mode |
697 | 7 | tok->err = json_tokener_error_parse_string; |
698 | 7 | goto out; |
699 | 7 | } |
700 | 25.3M | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
701 | 686 | { |
702 | 686 | printbuf_memappend_checked(tok->pb, case_start, |
703 | 686 | str - case_start); |
704 | 686 | goto out; |
705 | 686 | } |
706 | 25.3M | } |
707 | 34.4k | } |
708 | 33.7k | break; |
709 | | |
710 | 33.7k | case json_tokener_state_string_escape: |
711 | 28.7k | switch (c) |
712 | 28.7k | { |
713 | 4.45k | case '"': |
714 | 11.1k | case '\\': |
715 | 12.6k | case '/': |
716 | 12.6k | printbuf_memappend_checked(tok->pb, &c, 1); |
717 | 12.6k | state = saved_state; |
718 | 12.6k | break; |
719 | 502 | case 'b': |
720 | 1.74k | case 'n': |
721 | 3.46k | case 'r': |
722 | 4.66k | case 't': |
723 | 5.89k | case 'f': |
724 | 5.89k | if (c == 'b') |
725 | 502 | printbuf_memappend_checked(tok->pb, "\b", 1); |
726 | 5.38k | else if (c == 'n') |
727 | 1.23k | printbuf_memappend_checked(tok->pb, "\n", 1); |
728 | 4.14k | else if (c == 'r') |
729 | 1.72k | printbuf_memappend_checked(tok->pb, "\r", 1); |
730 | 2.42k | else if (c == 't') |
731 | 1.20k | printbuf_memappend_checked(tok->pb, "\t", 1); |
732 | 1.22k | else if (c == 'f') |
733 | 1.22k | printbuf_memappend_checked(tok->pb, "\f", 1); |
734 | 5.89k | state = saved_state; |
735 | 5.89k | break; |
736 | 10.0k | case 'u': |
737 | 10.0k | tok->ucs_char = 0; |
738 | 10.0k | tok->st_pos = 0; |
739 | 10.0k | state = json_tokener_state_escape_unicode; |
740 | 10.0k | break; |
741 | 134 | default: tok->err = json_tokener_error_parse_string; goto out; |
742 | 28.7k | } |
743 | 28.5k | break; |
744 | | |
745 | | // =================================================== |
746 | | |
747 | 28.5k | case json_tokener_state_escape_unicode: |
748 | 12.1k | { |
749 | | /* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */ |
750 | 47.8k | while (1) |
751 | 47.8k | { |
752 | 47.8k | if (!c || !is_hex_char(c)) |
753 | 213 | { |
754 | 213 | tok->err = json_tokener_error_parse_string; |
755 | 213 | goto out; |
756 | 213 | } |
757 | 47.6k | tok->ucs_char |= |
758 | 47.6k | ((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4)); |
759 | 47.6k | tok->st_pos++; |
760 | 47.6k | if (tok->st_pos >= 4) |
761 | 11.8k | break; |
762 | | |
763 | 35.7k | (void)ADVANCE_CHAR(str, tok); |
764 | 35.7k | if (!PEEK_CHAR(c, tok)) |
765 | 41 | { |
766 | | /* |
767 | | * We're out of characters in the current call to |
768 | | * json_tokener_parse(), but a subsequent call might |
769 | | * provide us with more, so leave our current state |
770 | | * as-is (including tok->high_surrogate) and return. |
771 | | */ |
772 | 41 | goto out; |
773 | 41 | } |
774 | 35.7k | } |
775 | 11.8k | tok->st_pos = 0; |
776 | | |
777 | | /* Now, we have a full \uNNNN sequence in tok->ucs_char */ |
778 | | |
779 | | /* If the *previous* sequence was a high surrogate ... */ |
780 | 11.8k | if (tok->high_surrogate) |
781 | 2.04k | { |
782 | 2.04k | if (IS_LOW_SURROGATE(tok->ucs_char)) |
783 | 839 | { |
784 | | /* Recalculate the ucs_char, then fall thru to process normally */ |
785 | 839 | tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate, |
786 | 839 | tok->ucs_char); |
787 | 839 | } |
788 | 1.20k | else |
789 | 1.20k | { |
790 | | /* High surrogate was not followed by a low surrogate |
791 | | * Replace the high and process the rest normally |
792 | | */ |
793 | 1.20k | printbuf_memappend_checked(tok->pb, |
794 | 1.20k | (char *)utf8_replacement_char, 3); |
795 | 1.20k | } |
796 | 2.04k | tok->high_surrogate = 0; |
797 | 2.04k | } |
798 | | |
799 | 11.8k | if (tok->ucs_char < 0x80) |
800 | 4.19k | { |
801 | 4.19k | unsigned char unescaped_utf[1]; |
802 | 4.19k | unescaped_utf[0] = tok->ucs_char; |
803 | 4.19k | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 1); |
804 | 4.19k | } |
805 | 7.65k | else if (tok->ucs_char < 0x800) |
806 | 516 | { |
807 | 516 | unsigned char unescaped_utf[2]; |
808 | 516 | unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6); |
809 | 516 | unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f); |
810 | 516 | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 2); |
811 | 516 | } |
812 | 7.13k | else if (IS_HIGH_SURROGATE(tok->ucs_char)) |
813 | 4.12k | { |
814 | | /* |
815 | | * The next two characters should be \u, HOWEVER, |
816 | | * we can't simply peek ahead here, because the |
817 | | * characters we need might not be passed to us |
818 | | * until a subsequent call to json_tokener_parse. |
819 | | * Instead, transition through a couple of states. |
820 | | * (now): |
821 | | * _escape_unicode => _unicode_need_escape |
822 | | * (see a '\\' char): |
823 | | * _unicode_need_escape => _unicode_need_u |
824 | | * (see a 'u' char): |
825 | | * _unicode_need_u => _escape_unicode |
826 | | * ...and we'll end up back around here. |
827 | | */ |
828 | 4.12k | tok->high_surrogate = tok->ucs_char; |
829 | 4.12k | tok->ucs_char = 0; |
830 | 4.12k | state = json_tokener_state_escape_unicode_need_escape; |
831 | 4.12k | break; |
832 | 4.12k | } |
833 | 3.01k | else if (IS_LOW_SURROGATE(tok->ucs_char)) |
834 | 1.04k | { |
835 | | /* Got a low surrogate not preceded by a high */ |
836 | 1.04k | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
837 | 1.04k | } |
838 | 1.97k | else if (tok->ucs_char < 0x10000) |
839 | 1.39k | { |
840 | 1.39k | unsigned char unescaped_utf[3]; |
841 | 1.39k | unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12); |
842 | 1.39k | unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); |
843 | 1.39k | unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f); |
844 | 1.39k | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 3); |
845 | 1.39k | } |
846 | 585 | else if (tok->ucs_char < 0x110000) |
847 | 585 | { |
848 | 585 | unsigned char unescaped_utf[4]; |
849 | 585 | unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07); |
850 | 585 | unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f); |
851 | 585 | unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); |
852 | 585 | unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f); |
853 | 585 | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 4); |
854 | 585 | } |
855 | 0 | else |
856 | 0 | { |
857 | | /* Don't know what we got--insert the replacement char */ |
858 | 0 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
859 | 0 | } |
860 | 7.73k | state = saved_state; // i.e. _state_string or _state_object_field |
861 | 7.73k | } |
862 | 0 | break; |
863 | | |
864 | 4.08k | case json_tokener_state_escape_unicode_need_escape: |
865 | | // We get here after processing a high_surrogate |
866 | | // require a '\\' char |
867 | 4.08k | if (!c || c != '\\') |
868 | 1.16k | { |
869 | | /* Got a high surrogate without another sequence following |
870 | | * it. Put a replacement char in for the high surrogate |
871 | | * and pop back up to _state_string or _state_object_field. |
872 | | */ |
873 | 1.16k | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
874 | 1.16k | tok->high_surrogate = 0; |
875 | 1.16k | tok->ucs_char = 0; |
876 | 1.16k | tok->st_pos = 0; |
877 | 1.16k | state = saved_state; |
878 | 1.16k | goto redo_char; |
879 | 1.16k | } |
880 | 2.92k | state = json_tokener_state_escape_unicode_need_u; |
881 | 2.92k | break; |
882 | | |
883 | 2.91k | case json_tokener_state_escape_unicode_need_u: |
884 | | /* We already had a \ char, check that it's \u */ |
885 | 2.91k | if (!c || c != 'u') |
886 | 836 | { |
887 | | /* Got a high surrogate with some non-unicode escape |
888 | | * sequence following it. |
889 | | * Put a replacement char in for the high surrogate |
890 | | * and handle the escape sequence normally. |
891 | | */ |
892 | 836 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
893 | 836 | tok->high_surrogate = 0; |
894 | 836 | tok->ucs_char = 0; |
895 | 836 | tok->st_pos = 0; |
896 | 836 | state = json_tokener_state_string_escape; |
897 | 836 | goto redo_char; |
898 | 836 | } |
899 | 2.07k | state = json_tokener_state_escape_unicode; |
900 | 2.07k | break; |
901 | | |
902 | | // =================================================== |
903 | | |
904 | 18.4k | case json_tokener_state_boolean: |
905 | 18.4k | { |
906 | 18.4k | int size1, size2; |
907 | 18.4k | printbuf_memappend_checked(tok->pb, &c, 1); |
908 | 18.4k | size1 = json_min(tok->st_pos + 1, json_true_str_len); |
909 | 18.4k | size2 = json_min(tok->st_pos + 1, json_false_str_len); |
910 | 18.4k | if ((!(tok->flags & JSON_TOKENER_STRICT) && |
911 | 18.4k | strncasecmp(json_true_str, tok->pb->buf, size1) == 0) || |
912 | 18.4k | (strncmp(json_true_str, tok->pb->buf, size1) == 0)) |
913 | 7.92k | { |
914 | 7.92k | if (tok->st_pos == json_true_str_len) |
915 | 1.50k | { |
916 | 1.50k | current = json_object_new_boolean(1); |
917 | 1.50k | if (current == NULL) |
918 | 0 | { |
919 | 0 | tok->err = json_tokener_error_memory; |
920 | 0 | goto out; |
921 | 0 | } |
922 | 1.50k | saved_state = json_tokener_state_finish; |
923 | 1.50k | state = json_tokener_state_eatws; |
924 | 1.50k | goto redo_char; |
925 | 1.50k | } |
926 | 7.92k | } |
927 | 10.5k | else if ((!(tok->flags & JSON_TOKENER_STRICT) && |
928 | 10.5k | strncasecmp(json_false_str, tok->pb->buf, size2) == 0) || |
929 | 10.5k | (strncmp(json_false_str, tok->pb->buf, size2) == 0)) |
930 | 10.0k | { |
931 | 10.0k | if (tok->st_pos == json_false_str_len) |
932 | 1.56k | { |
933 | 1.56k | current = json_object_new_boolean(0); |
934 | 1.56k | if (current == NULL) |
935 | 0 | { |
936 | 0 | tok->err = json_tokener_error_memory; |
937 | 0 | goto out; |
938 | 0 | } |
939 | 1.56k | saved_state = json_tokener_state_finish; |
940 | 1.56k | state = json_tokener_state_eatws; |
941 | 1.56k | goto redo_char; |
942 | 1.56k | } |
943 | 10.0k | } |
944 | 455 | else |
945 | 455 | { |
946 | 455 | tok->err = json_tokener_error_parse_boolean; |
947 | 455 | goto out; |
948 | 455 | } |
949 | 14.9k | tok->st_pos++; |
950 | 14.9k | } |
951 | 0 | break; |
952 | | |
953 | 9.30M | case json_tokener_state_number: |
954 | 9.30M | { |
955 | | /* Advance until we change state */ |
956 | 9.30M | const char *case_start = str; |
957 | 9.30M | int case_len = 0; |
958 | 9.30M | int is_exponent = 0; |
959 | 9.30M | int neg_sign_ok = 1; |
960 | 9.30M | int pos_sign_ok = 0; |
961 | 9.30M | if (printbuf_length(tok->pb) > 0) |
962 | 0 | { |
963 | | /* We don't save all state from the previous incremental parse |
964 | | so we need to re-generate it based on the saved string so far. |
965 | | */ |
966 | 0 | char *e_loc = strchr(tok->pb->buf, 'e'); |
967 | 0 | if (!e_loc) |
968 | 0 | e_loc = strchr(tok->pb->buf, 'E'); |
969 | 0 | if (e_loc) |
970 | 0 | { |
971 | 0 | char *last_saved_char = |
972 | 0 | &tok->pb->buf[printbuf_length(tok->pb) - 1]; |
973 | 0 | is_exponent = 1; |
974 | 0 | pos_sign_ok = neg_sign_ok = 1; |
975 | | /* If the "e" isn't at the end, we can't start with a '-' */ |
976 | 0 | if (e_loc != last_saved_char) |
977 | 0 | { |
978 | 0 | neg_sign_ok = 0; |
979 | 0 | pos_sign_ok = 0; |
980 | 0 | } |
981 | | // else leave it set to 1, i.e. start of the new input |
982 | 0 | } |
983 | 0 | } |
984 | | |
985 | 36.9M | while (c && ((c >= '0' && c <= '9') || |
986 | 36.9M | (!is_exponent && (c == 'e' || c == 'E')) || |
987 | 36.9M | (neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') || |
988 | 36.9M | (!tok->is_double && c == '.'))) |
989 | 27.6M | { |
990 | 27.6M | pos_sign_ok = neg_sign_ok = 0; |
991 | 27.6M | ++case_len; |
992 | | |
993 | | /* non-digit characters checks */ |
994 | | /* note: since the main loop condition to get here was |
995 | | * an input starting with 0-9 or '-', we are |
996 | | * protected from input starting with '.' or |
997 | | * e/E. |
998 | | */ |
999 | 27.6M | switch (c) |
1000 | 27.6M | { |
1001 | 2.92k | case '.': |
1002 | 2.92k | tok->is_double = 1; |
1003 | 2.92k | pos_sign_ok = 1; |
1004 | 2.92k | neg_sign_ok = 1; |
1005 | 2.92k | break; |
1006 | 13.9k | case 'e': /* FALLTHRU */ |
1007 | 17.9k | case 'E': |
1008 | 17.9k | is_exponent = 1; |
1009 | 17.9k | tok->is_double = 1; |
1010 | | /* the exponent part can begin with a negative sign */ |
1011 | 17.9k | pos_sign_ok = neg_sign_ok = 1; |
1012 | 17.9k | break; |
1013 | 27.6M | default: break; |
1014 | 27.6M | } |
1015 | | |
1016 | 27.6M | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
1017 | 276 | { |
1018 | 276 | printbuf_memappend_checked(tok->pb, case_start, case_len); |
1019 | 276 | goto out; |
1020 | 276 | } |
1021 | 27.6M | } |
1022 | | /* |
1023 | | Now we know c isn't a valid number char, but check whether |
1024 | | it might have been intended to be, and return a potentially |
1025 | | more understandable error right away. |
1026 | | However, if we're at the top-level, use the number as-is |
1027 | | because c can be part of a new object to parse on the |
1028 | | next call to json_tokener_parse(). |
1029 | | */ |
1030 | 9.30M | if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' && |
1031 | 9.30M | c != 'I' && c != 'i' && !is_ws_char(c)) |
1032 | 385 | { |
1033 | 385 | tok->err = json_tokener_error_parse_number; |
1034 | 385 | goto out; |
1035 | 385 | } |
1036 | 9.30M | if (case_len > 0) |
1037 | 9.30M | printbuf_memappend_checked(tok->pb, case_start, case_len); |
1038 | | |
1039 | | // Check for -Infinity |
1040 | 9.30M | if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I')) |
1041 | 3.40k | { |
1042 | 3.40k | state = json_tokener_state_inf; |
1043 | 3.40k | tok->st_pos = 0; |
1044 | 3.40k | goto redo_char; |
1045 | 3.40k | } |
1046 | 9.30M | if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT)) |
1047 | 20.0k | { |
1048 | | /* Trim some chars off the end, to allow things |
1049 | | like "123e+" to parse ok. */ |
1050 | 29.3k | while (printbuf_length(tok->pb) > 1) |
1051 | 23.3k | { |
1052 | 23.3k | char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1]; |
1053 | 23.3k | if (last_char != 'e' && last_char != 'E' && |
1054 | 23.3k | last_char != '-' && last_char != '+') |
1055 | 14.0k | { |
1056 | 14.0k | break; |
1057 | 14.0k | } |
1058 | 9.31k | tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0'; |
1059 | 9.31k | printbuf_length(tok->pb)--; |
1060 | 9.31k | } |
1061 | 20.0k | } |
1062 | 9.30M | } |
1063 | 0 | { |
1064 | 9.30M | int64_t num64; |
1065 | 9.30M | uint64_t numuint64; |
1066 | 9.30M | double numd; |
1067 | 9.30M | if (!tok->is_double && tok->pb->buf[0] == '-' && |
1068 | 9.30M | json_parse_int64(tok->pb->buf, &num64) == 0) |
1069 | 3.02k | { |
1070 | 3.02k | if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT)) |
1071 | 3 | { |
1072 | 3 | tok->err = json_tokener_error_parse_number; |
1073 | 3 | goto out; |
1074 | 3 | } |
1075 | 3.01k | current = json_object_new_int64(num64); |
1076 | 3.01k | if (current == NULL) |
1077 | 0 | { |
1078 | 0 | tok->err = json_tokener_error_memory; |
1079 | 0 | goto out; |
1080 | 0 | } |
1081 | 3.01k | } |
1082 | 9.29M | else if (!tok->is_double && tok->pb->buf[0] != '-' && |
1083 | 9.29M | json_parse_uint64(tok->pb->buf, &numuint64) == 0) |
1084 | 9.27M | { |
1085 | 9.27M | if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT)) |
1086 | 2 | { |
1087 | 2 | tok->err = json_tokener_error_parse_number; |
1088 | 2 | goto out; |
1089 | 2 | } |
1090 | 9.27M | if (numuint64 && tok->pb->buf[0] == '0' && |
1091 | 9.27M | (tok->flags & JSON_TOKENER_STRICT)) |
1092 | 1 | { |
1093 | 1 | tok->err = json_tokener_error_parse_number; |
1094 | 1 | goto out; |
1095 | 1 | } |
1096 | 9.27M | if (numuint64 <= INT64_MAX) |
1097 | 9.26M | { |
1098 | 9.26M | num64 = (uint64_t)numuint64; |
1099 | 9.26M | current = json_object_new_int64(num64); |
1100 | 9.26M | if (current == NULL) |
1101 | 0 | { |
1102 | 0 | tok->err = json_tokener_error_memory; |
1103 | 0 | goto out; |
1104 | 0 | } |
1105 | 9.26M | } |
1106 | 14.5k | else |
1107 | 14.5k | { |
1108 | 14.5k | current = json_object_new_uint64(numuint64); |
1109 | 14.5k | if (current == NULL) |
1110 | 0 | { |
1111 | 0 | tok->err = json_tokener_error_memory; |
1112 | 0 | goto out; |
1113 | 0 | } |
1114 | 14.5k | } |
1115 | 9.27M | } |
1116 | 20.3k | else if (tok->is_double && |
1117 | 20.3k | json_tokener_parse_double( |
1118 | 20.2k | tok->pb->buf, printbuf_length(tok->pb), &numd) == 0) |
1119 | 20.2k | { |
1120 | 20.2k | current = json_object_new_double_s(numd, tok->pb->buf); |
1121 | 20.2k | if (current == NULL) |
1122 | 0 | { |
1123 | 0 | tok->err = json_tokener_error_memory; |
1124 | 0 | goto out; |
1125 | 0 | } |
1126 | 20.2k | } |
1127 | 184 | else |
1128 | 184 | { |
1129 | 184 | tok->err = json_tokener_error_parse_number; |
1130 | 184 | goto out; |
1131 | 184 | } |
1132 | 9.30M | saved_state = json_tokener_state_finish; |
1133 | 9.30M | state = json_tokener_state_eatws; |
1134 | 9.30M | goto redo_char; |
1135 | 9.30M | } |
1136 | 0 | break; |
1137 | | |
1138 | 9.30M | case json_tokener_state_array_after_sep: |
1139 | 9.31M | case json_tokener_state_array: |
1140 | 9.31M | if (c == ']') |
1141 | 4.37k | { |
1142 | | // Minimize memory usage; assume parsed objs are unlikely to be changed |
1143 | 4.37k | json_object_array_shrink(current, 0); |
1144 | | |
1145 | 4.37k | if (state == json_tokener_state_array_after_sep && |
1146 | 4.37k | (tok->flags & JSON_TOKENER_STRICT)) |
1147 | 2 | { |
1148 | 2 | tok->err = json_tokener_error_parse_unexpected; |
1149 | 2 | goto out; |
1150 | 2 | } |
1151 | 4.37k | saved_state = json_tokener_state_finish; |
1152 | 4.37k | state = json_tokener_state_eatws; |
1153 | 4.37k | } |
1154 | 9.31M | else |
1155 | 9.31M | { |
1156 | 9.31M | if (tok->depth >= tok->max_depth - 1) |
1157 | 4 | { |
1158 | 4 | tok->err = json_tokener_error_depth; |
1159 | 4 | goto out; |
1160 | 4 | } |
1161 | 9.31M | state = json_tokener_state_array_add; |
1162 | 9.31M | tok->depth++; |
1163 | 9.31M | json_tokener_reset_level(tok, tok->depth); |
1164 | 9.31M | goto redo_char; |
1165 | 9.31M | } |
1166 | 4.37k | break; |
1167 | | |
1168 | 9.31M | case json_tokener_state_array_add: |
1169 | 9.31M | if (json_object_array_add(current, obj) != 0) |
1170 | 0 | { |
1171 | 0 | tok->err = json_tokener_error_memory; |
1172 | 0 | goto out; |
1173 | 0 | } |
1174 | 9.31M | saved_state = json_tokener_state_array_sep; |
1175 | 9.31M | state = json_tokener_state_eatws; |
1176 | 9.31M | goto redo_char; |
1177 | | |
1178 | 9.31M | case json_tokener_state_array_sep: |
1179 | 9.31M | if (c == ']') |
1180 | 9.20k | { |
1181 | | // Minimize memory usage; assume parsed objs are unlikely to be changed |
1182 | 9.20k | json_object_array_shrink(current, 0); |
1183 | | |
1184 | 9.20k | saved_state = json_tokener_state_finish; |
1185 | 9.20k | state = json_tokener_state_eatws; |
1186 | 9.20k | } |
1187 | 9.30M | else if (c == ',') |
1188 | 9.30M | { |
1189 | 9.30M | saved_state = json_tokener_state_array_after_sep; |
1190 | 9.30M | state = json_tokener_state_eatws; |
1191 | 9.30M | } |
1192 | 313 | else |
1193 | 313 | { |
1194 | 313 | tok->err = json_tokener_error_parse_array; |
1195 | 313 | goto out; |
1196 | 313 | } |
1197 | 9.31M | break; |
1198 | | |
1199 | 9.31M | case json_tokener_state_object_field_start: |
1200 | 59.4k | case json_tokener_state_object_field_start_after_sep: |
1201 | 59.4k | if (c == '}') |
1202 | 4.27k | { |
1203 | 4.27k | if (state == json_tokener_state_object_field_start_after_sep && |
1204 | 4.27k | (tok->flags & JSON_TOKENER_STRICT)) |
1205 | 1 | { |
1206 | 1 | tok->err = json_tokener_error_parse_unexpected; |
1207 | 1 | goto out; |
1208 | 1 | } |
1209 | 4.27k | saved_state = json_tokener_state_finish; |
1210 | 4.27k | state = json_tokener_state_eatws; |
1211 | 4.27k | } |
1212 | 55.1k | else if (c == '"' || c == '\'') |
1213 | 55.0k | { |
1214 | 55.0k | tok->quote_char = c; |
1215 | 55.0k | printbuf_reset(tok->pb); |
1216 | 55.0k | state = json_tokener_state_object_field; |
1217 | 55.0k | } |
1218 | 165 | else |
1219 | 165 | { |
1220 | 165 | tok->err = json_tokener_error_parse_object_key_name; |
1221 | 165 | goto out; |
1222 | 165 | } |
1223 | 59.3k | break; |
1224 | | |
1225 | 72.3k | case json_tokener_state_object_field: |
1226 | 72.3k | { |
1227 | | /* Advance until we change state */ |
1228 | 72.3k | const char *case_start = str; |
1229 | 31.1M | while (1) |
1230 | 31.1M | { |
1231 | 31.1M | if (c == tok->quote_char) |
1232 | 54.6k | { |
1233 | 54.6k | printbuf_memappend_checked(tok->pb, case_start, |
1234 | 54.6k | str - case_start); |
1235 | 54.6k | obj_field_name = strdup(tok->pb->buf); |
1236 | 54.6k | if (obj_field_name == NULL) |
1237 | 0 | { |
1238 | 0 | tok->err = json_tokener_error_memory; |
1239 | 0 | goto out; |
1240 | 0 | } |
1241 | 54.6k | saved_state = json_tokener_state_object_field_end; |
1242 | 54.6k | state = json_tokener_state_eatws; |
1243 | 54.6k | break; |
1244 | 54.6k | } |
1245 | 31.1M | else if (c == '\\') |
1246 | 17.4k | { |
1247 | 17.4k | printbuf_memappend_checked(tok->pb, case_start, |
1248 | 17.4k | str - case_start); |
1249 | 17.4k | saved_state = json_tokener_state_object_field; |
1250 | 17.4k | state = json_tokener_state_string_escape; |
1251 | 17.4k | break; |
1252 | 17.4k | } |
1253 | 31.0M | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
1254 | 247 | { |
1255 | 247 | printbuf_memappend_checked(tok->pb, case_start, |
1256 | 247 | str - case_start); |
1257 | 247 | goto out; |
1258 | 247 | } |
1259 | 31.0M | } |
1260 | 72.3k | } |
1261 | 72.0k | break; |
1262 | | |
1263 | 72.0k | case json_tokener_state_object_field_end: |
1264 | 54.6k | if (c == ':') |
1265 | 54.5k | { |
1266 | 54.5k | saved_state = json_tokener_state_object_value; |
1267 | 54.5k | state = json_tokener_state_eatws; |
1268 | 54.5k | } |
1269 | 105 | else |
1270 | 105 | { |
1271 | 105 | tok->err = json_tokener_error_parse_object_key_sep; |
1272 | 105 | goto out; |
1273 | 105 | } |
1274 | 54.5k | break; |
1275 | | |
1276 | 54.5k | case json_tokener_state_object_value: |
1277 | 54.5k | if (tok->depth >= tok->max_depth - 1) |
1278 | 2 | { |
1279 | 2 | tok->err = json_tokener_error_depth; |
1280 | 2 | goto out; |
1281 | 2 | } |
1282 | 54.5k | state = json_tokener_state_object_value_add; |
1283 | 54.5k | tok->depth++; |
1284 | 54.5k | json_tokener_reset_level(tok, tok->depth); |
1285 | 54.5k | goto redo_char; |
1286 | | |
1287 | 52.8k | case json_tokener_state_object_value_add: |
1288 | 52.8k | if (json_object_object_add(current, obj_field_name, obj) != 0) |
1289 | 0 | { |
1290 | 0 | tok->err = json_tokener_error_memory; |
1291 | 0 | goto out; |
1292 | 0 | } |
1293 | 52.8k | free(obj_field_name); |
1294 | 52.8k | obj_field_name = NULL; |
1295 | 52.8k | saved_state = json_tokener_state_object_sep; |
1296 | 52.8k | state = json_tokener_state_eatws; |
1297 | 52.8k | goto redo_char; |
1298 | | |
1299 | 52.8k | case json_tokener_state_object_sep: |
1300 | | /* { */ |
1301 | 52.8k | if (c == '}') |
1302 | 3.81k | { |
1303 | 3.81k | saved_state = json_tokener_state_finish; |
1304 | 3.81k | state = json_tokener_state_eatws; |
1305 | 3.81k | } |
1306 | 48.9k | else if (c == ',') |
1307 | 48.7k | { |
1308 | 48.7k | saved_state = json_tokener_state_object_field_start_after_sep; |
1309 | 48.7k | state = json_tokener_state_eatws; |
1310 | 48.7k | } |
1311 | 187 | else |
1312 | 187 | { |
1313 | 187 | tok->err = json_tokener_error_parse_object_value_sep; |
1314 | 187 | goto out; |
1315 | 187 | } |
1316 | 52.6k | break; |
1317 | 94.9M | } |
1318 | 10.5M | (void)ADVANCE_CHAR(str, tok); |
1319 | 10.5M | if (!c) // This is the char *before* advancing |
1320 | 43 | break; |
1321 | 10.5M | } /* while(PEEK_CHAR) */ |
1322 | | |
1323 | 14.9k | out: |
1324 | 14.9k | if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0)) |
1325 | 159 | { |
1326 | 159 | tok->err = json_tokener_error_parse_utf8_string; |
1327 | 159 | } |
1328 | 14.9k | if (c && (state == json_tokener_state_finish) && (tok->depth == 0) && |
1329 | 14.9k | (tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) == |
1330 | 774 | JSON_TOKENER_STRICT) |
1331 | 22 | { |
1332 | | /* unexpected char after JSON data */ |
1333 | 22 | tok->err = json_tokener_error_parse_unexpected; |
1334 | 22 | } |
1335 | 14.9k | if (!c) |
1336 | 7.64k | { |
1337 | | /* We hit an eof char (0) */ |
1338 | 7.64k | if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish) |
1339 | 6.27k | tok->err = json_tokener_error_parse_eof; |
1340 | 7.64k | } |
1341 | | |
1342 | 14.9k | #ifdef HAVE_USELOCALE |
1343 | 14.9k | uselocale(oldlocale); |
1344 | 14.9k | freelocale(newloc); |
1345 | | #elif defined(HAVE_SETLOCALE) |
1346 | | setlocale(LC_NUMERIC, oldlocale); |
1347 | | free(oldlocale); |
1348 | | #endif |
1349 | | |
1350 | 14.9k | if (tok->err == json_tokener_success) |
1351 | 3.02k | { |
1352 | 3.02k | json_object *ret = json_object_get(current); |
1353 | 3.02k | int ii; |
1354 | | |
1355 | | /* Partially reset, so we parse additional objects on subsequent calls. */ |
1356 | 6.52k | for (ii = tok->depth; ii >= 0; ii--) |
1357 | 3.50k | json_tokener_reset_level(tok, ii); |
1358 | 3.02k | return ret; |
1359 | 3.02k | } |
1360 | | |
1361 | 11.9k | MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err], |
1362 | 11.9k | tok->char_offset); |
1363 | 11.9k | return NULL; |
1364 | 14.9k | } |
1365 | | |
1366 | | static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes) |
1367 | 3.28M | { |
1368 | 3.28M | unsigned char chr = c; |
1369 | 3.28M | if (*nBytes == 0) |
1370 | 3.27M | { |
1371 | 3.27M | if (chr >= 0x80) |
1372 | 3.38k | { |
1373 | 3.38k | if ((chr & 0xe0) == 0xc0) |
1374 | 1.21k | *nBytes = 1; |
1375 | 2.16k | else if ((chr & 0xf0) == 0xe0) |
1376 | 941 | *nBytes = 2; |
1377 | 1.22k | else if ((chr & 0xf8) == 0xf0) |
1378 | 1.18k | *nBytes = 3; |
1379 | 42 | else |
1380 | 42 | return 0; |
1381 | 3.38k | } |
1382 | 3.27M | } |
1383 | 6.37k | else |
1384 | 6.37k | { |
1385 | 6.37k | if ((chr & 0xC0) != 0x80) |
1386 | 14 | return 0; |
1387 | 6.35k | (*nBytes)--; |
1388 | 6.35k | } |
1389 | 3.28M | return 1; |
1390 | 3.28M | } |
1391 | | |
1392 | | void json_tokener_set_flags(struct json_tokener *tok, int flags) |
1393 | 3.05k | { |
1394 | 3.05k | tok->flags = flags; |
1395 | 3.05k | } |
1396 | | |
1397 | | size_t json_tokener_get_parse_end(struct json_tokener *tok) |
1398 | 0 | { |
1399 | 0 | assert(tok->char_offset >= 0); /* Drop this line when char_offset becomes a size_t */ |
1400 | 0 | return (size_t)tok->char_offset; |
1401 | 0 | } |
1402 | | |
1403 | | static int json_tokener_parse_double(const char *buf, int len, double *retval) |
1404 | 20.2k | { |
1405 | 20.2k | char *end; |
1406 | 20.2k | *retval = strtod(buf, &end); |
1407 | 20.2k | if (buf + len == end) |
1408 | 20.2k | return 0; // It worked |
1409 | 66 | return 1; |
1410 | 20.2k | } |