/src/json-c/json_tokener.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $ |
3 | | * |
4 | | * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd. |
5 | | * Michael Clark <michael@metaparadigm.com> |
6 | | * |
7 | | * This library is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the MIT license. See COPYING for details. |
9 | | * |
10 | | * |
11 | | * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved. |
12 | | * The copyrights to the contents of this file are licensed under the MIT License |
13 | | * (https://www.opensource.org/licenses/mit-license.php) |
14 | | */ |
15 | | |
16 | | #include "config.h" |
17 | | |
18 | | #include "math_compat.h" |
19 | | #include <assert.h> |
20 | | #include <errno.h> |
21 | | #include <limits.h> |
22 | | #include <math.h> |
23 | | #include <stddef.h> |
24 | | #include <stdio.h> |
25 | | #include <stdlib.h> |
26 | | #include <string.h> |
27 | | |
28 | | #include "debug.h" |
29 | | #include "json_inttypes.h" |
30 | | #include "json_object.h" |
31 | | #include "json_object_private.h" |
32 | | #include "json_tokener.h" |
33 | | #include "json_util.h" |
34 | | #include "printbuf.h" |
35 | | #include "strdup_compat.h" |
36 | | |
37 | | #ifdef HAVE_LOCALE_H |
38 | | #include <locale.h> |
39 | | #endif /* HAVE_LOCALE_H */ |
40 | | #ifdef HAVE_XLOCALE_H |
41 | | #include <xlocale.h> |
42 | | #endif |
43 | | #ifdef HAVE_STRINGS_H |
44 | | #include <strings.h> |
45 | | #endif /* HAVE_STRINGS_H */ |
46 | | |
47 | 31.8k | #define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9) |
48 | | |
49 | | #if !HAVE_STRNCASECMP && defined(_MSC_VER) |
50 | | /* MSC has the version as _strnicmp */ |
51 | | #define strncasecmp _strnicmp |
52 | | #elif !HAVE_STRNCASECMP |
53 | | #error You do not have strncasecmp on your system. |
54 | | #endif /* HAVE_STRNCASECMP */ |
55 | | |
56 | | #if defined(_MSC_VER) && (_MSC_VER <= 1800) |
57 | | /* VS2013 doesn't know about "inline" */ |
58 | | #define inline __inline |
59 | | #elif defined(AIX_CC) |
60 | | #define inline |
61 | | #endif |
62 | | |
63 | | /* The following helper functions are used to speed up parsing. They |
64 | | * are faster than their ctype counterparts because they assume that |
65 | | * the input is in ASCII and that the locale is set to "C". The |
66 | | * compiler will also inline these functions, providing an additional |
67 | | * speedup by saving on function calls. |
68 | | */ |
69 | | static inline int is_ws_char(char c) |
70 | 1.98M | { |
71 | 1.98M | return c == ' ' |
72 | 1.98M | || c == '\t' |
73 | 1.98M | || c == '\n' |
74 | 1.98M | || c == '\r'; |
75 | 1.98M | } |
76 | | |
77 | | static inline int is_hex_char(char c) |
78 | 31.9k | { |
79 | 31.9k | return (c >= '0' && c <= '9') |
80 | 31.9k | || (c >= 'A' && c <= 'F') |
81 | 31.9k | || (c >= 'a' && c <= 'f'); |
82 | 31.9k | } |
83 | | |
84 | | /* Use C99 NAN by default; if not available, nan("") should work too. */ |
85 | | #ifndef NAN |
86 | | #define NAN nan("") |
87 | | #endif /* !NAN */ |
88 | | |
89 | | static const char json_null_str[] = "null"; |
90 | | static const int json_null_str_len = sizeof(json_null_str) - 1; |
91 | | static const char json_inf_str[] = "Infinity"; |
92 | | /* Swapped case "Infinity" to avoid need to call tolower() on input chars: */ |
93 | | static const char json_inf_str_invert[] = "iNFINITY"; |
94 | | static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1; |
95 | | static const char json_nan_str[] = "NaN"; |
96 | | static const int json_nan_str_len = sizeof(json_nan_str) - 1; |
97 | | static const char json_true_str[] = "true"; |
98 | | static const int json_true_str_len = sizeof(json_true_str) - 1; |
99 | | static const char json_false_str[] = "false"; |
100 | | static const int json_false_str_len = sizeof(json_false_str) - 1; |
101 | | |
102 | | /* clang-format off */ |
103 | | static const char *json_tokener_errors[] = { |
104 | | "success", |
105 | | "continue", |
106 | | "nesting too deep", |
107 | | "unexpected end of data", |
108 | | "unexpected character", |
109 | | "null expected", |
110 | | "boolean expected", |
111 | | "number expected", |
112 | | "array value separator ',' expected", |
113 | | "quoted object property name expected", |
114 | | "object property name separator ':' expected", |
115 | | "object value separator ',' expected", |
116 | | "invalid string sequence", |
117 | | "expected comment", |
118 | | "invalid utf-8 string", |
119 | | "buffer size overflow", |
120 | | "out of memory" |
121 | | }; |
122 | | /* clang-format on */ |
123 | | |
124 | | /** |
125 | | * validete the utf-8 string in strict model. |
126 | | * if not utf-8 format, return err. |
127 | | */ |
128 | | static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes); |
129 | | |
130 | | static int json_tokener_parse_double(const char *buf, int len, double *retval); |
131 | | |
132 | | const char *json_tokener_error_desc(enum json_tokener_error jerr) |
133 | 2.57k | { |
134 | 2.57k | int jerr_int = (int)jerr; |
135 | 2.57k | if (jerr_int < 0 || |
136 | 2.57k | jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0]))) |
137 | 0 | return "Unknown error, " |
138 | 0 | "invalid json_tokener_error value passed to json_tokener_error_desc()"; |
139 | 2.57k | return json_tokener_errors[jerr]; |
140 | 2.57k | } |
141 | | |
142 | | enum json_tokener_error json_tokener_get_error(struct json_tokener *tok) |
143 | 5.14k | { |
144 | 5.14k | return tok->err; |
145 | 5.14k | } |
146 | | |
147 | | /* Stuff for decoding unicode sequences */ |
148 | 5.52k | #define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800) |
149 | 3.89k | #define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00) |
150 | 771 | #define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000) |
151 | | static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD}; |
152 | | |
153 | | struct json_tokener *json_tokener_new_ex(int depth) |
154 | 10.5k | { |
155 | 10.5k | struct json_tokener *tok; |
156 | | |
157 | 10.5k | tok = (struct json_tokener *)calloc(1, sizeof(struct json_tokener)); |
158 | 10.5k | if (!tok) |
159 | 0 | return NULL; |
160 | 10.5k | tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec)); |
161 | 10.5k | if (!tok->stack) |
162 | 0 | { |
163 | 0 | free(tok); |
164 | 0 | return NULL; |
165 | 0 | } |
166 | 10.5k | tok->pb = printbuf_new(); |
167 | 10.5k | if (!tok->pb) |
168 | 0 | { |
169 | 0 | free(tok->stack); |
170 | 0 | free(tok); |
171 | 0 | return NULL; |
172 | 0 | } |
173 | 10.5k | tok->max_depth = depth; |
174 | 10.5k | json_tokener_reset(tok); |
175 | 10.5k | return tok; |
176 | 10.5k | } |
177 | | |
178 | | struct json_tokener *json_tokener_new(void) |
179 | 10.5k | { |
180 | 10.5k | return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH); |
181 | 10.5k | } |
182 | | |
183 | | void json_tokener_free(struct json_tokener *tok) |
184 | 10.5k | { |
185 | 10.5k | json_tokener_reset(tok); |
186 | 10.5k | if (tok->pb) |
187 | 10.5k | printbuf_free(tok->pb); |
188 | 10.5k | free(tok->stack); |
189 | 10.5k | free(tok); |
190 | 10.5k | } |
191 | | |
192 | | static void json_tokener_reset_level(struct json_tokener *tok, int depth) |
193 | 748k | { |
194 | 748k | tok->stack[depth].state = json_tokener_state_eatws; |
195 | 748k | tok->stack[depth].saved_state = json_tokener_state_start; |
196 | 748k | json_object_put(tok->stack[depth].current); |
197 | 748k | tok->stack[depth].current = NULL; |
198 | 748k | free(tok->stack[depth].obj_field_name); |
199 | 748k | tok->stack[depth].obj_field_name = NULL; |
200 | 748k | } |
201 | | |
202 | | void json_tokener_reset(struct json_tokener *tok) |
203 | 21.1k | { |
204 | 21.1k | int i; |
205 | 21.1k | if (!tok) |
206 | 0 | return; |
207 | | |
208 | 46.1k | for (i = tok->depth; i >= 0; i--) |
209 | 24.9k | json_tokener_reset_level(tok, i); |
210 | 21.1k | tok->depth = 0; |
211 | 21.1k | tok->err = json_tokener_success; |
212 | 21.1k | } |
213 | | |
214 | | struct json_object *json_tokener_parse(const char *str) |
215 | 0 | { |
216 | 0 | enum json_tokener_error jerr_ignored; |
217 | 0 | struct json_object *obj; |
218 | 0 | obj = json_tokener_parse_verbose(str, &jerr_ignored); |
219 | 0 | return obj; |
220 | 0 | } |
221 | | |
222 | | struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokener_error *error) |
223 | 0 | { |
224 | 0 | struct json_tokener *tok; |
225 | 0 | struct json_object *obj; |
226 | |
|
227 | 0 | tok = json_tokener_new(); |
228 | 0 | if (!tok) |
229 | 0 | { |
230 | 0 | *error = json_tokener_error_memory; |
231 | 0 | return NULL; |
232 | 0 | } |
233 | 0 | obj = json_tokener_parse_ex(tok, str, -1); |
234 | 0 | *error = tok->err; |
235 | 0 | if (tok->err != json_tokener_success |
236 | | #if 0 |
237 | | /* This would be a more sensible default, and cause parsing |
238 | | * things like "null123" to fail when the caller can't know |
239 | | * where the parsing left off, but starting to fail would |
240 | | * be a notable behaviour change. Save for a 1.0 release. |
241 | | */ |
242 | | || json_tokener_get_parse_end(tok) != strlen(str) |
243 | | #endif |
244 | 0 | ) |
245 | | |
246 | 0 | { |
247 | 0 | if (obj != NULL) |
248 | 0 | json_object_put(obj); |
249 | 0 | obj = NULL; |
250 | 0 | } |
251 | |
|
252 | 0 | json_tokener_free(tok); |
253 | 0 | return obj; |
254 | 0 | } |
255 | | |
256 | 8.84M | #define state tok->stack[tok->depth].state |
257 | 3.36M | #define saved_state tok->stack[tok->depth].saved_state |
258 | 1.45M | #define current tok->stack[tok->depth].current |
259 | 692k | #define obj_field_name tok->stack[tok->depth].obj_field_name |
260 | | |
261 | | /* Optimization: |
262 | | * json_tokener_parse_ex() consumed a lot of CPU in its main loop, |
263 | | * iterating character-by character. A large performance boost is |
264 | | * achieved by using tighter loops to locally handle units such as |
265 | | * comments and strings. Loops that handle an entire token within |
266 | | * their scope also gather entire strings and pass them to |
267 | | * printbuf_memappend() in a single call, rather than calling |
268 | | * printbuf_memappend() one char at a time. |
269 | | * |
270 | | * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is |
271 | | * common to both the main loop and the tighter loops. |
272 | | */ |
273 | | |
274 | | /* PEEK_CHAR(dest, tok) macro: |
275 | | * Peeks at the current char and stores it in dest. |
276 | | * Returns 1 on success, sets tok->err and returns 0 if no more chars. |
277 | | * Implicit inputs: str, len, nBytesp vars |
278 | | */ |
279 | | #define PEEK_CHAR(dest, tok) \ |
280 | 8.72M | (((tok)->char_offset == len) \ |
281 | 8.72M | ? (((tok)->depth == 0 && state == json_tokener_state_eatws && \ |
282 | 0 | saved_state == json_tokener_state_finish) \ |
283 | 0 | ? (((tok)->err = json_tokener_success), 0) \ |
284 | 0 | : (((tok)->err = json_tokener_continue), 0)) \ |
285 | 8.72M | : (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && \ |
286 | 8.72M | (!json_tokener_validate_utf8(*str, nBytesp))) \ |
287 | 8.72M | ? ((tok->err = json_tokener_error_parse_utf8_string), 0) \ |
288 | 8.72M | : (((dest) = *str), 1))) |
289 | | |
290 | | /* ADVANCE_CHAR() macro: |
291 | | * Increments str & tok->char_offset. |
292 | | * For convenience of existing conditionals, returns the old value of c (0 on eof). |
293 | | * Implicit inputs: c var |
294 | | */ |
295 | 15.7M | #define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c) |
296 | | |
297 | | /* printbuf_memappend_checked(p, s, l) macro: |
298 | | * Add string s of length l to printbuffer p. |
299 | | * If operation fails abort parse operation with memory error. |
300 | | */ |
301 | | #define printbuf_memappend_checked(p, s, l) \ |
302 | 742k | do { \ |
303 | 742k | if (printbuf_memappend((p), (s), (l)) < 0) \ |
304 | 742k | { \ |
305 | 0 | tok->err = json_tokener_error_memory; \ |
306 | 0 | goto out; \ |
307 | 0 | } \ |
308 | 742k | } while (0) |
309 | | |
310 | | /* End optimization macro defs */ |
311 | | |
312 | | struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len) |
313 | 10.5k | { |
314 | 10.5k | struct json_object *obj = NULL; |
315 | 10.5k | char c = '\1'; |
316 | 10.5k | unsigned int nBytes = 0; |
317 | 10.5k | unsigned int *nBytesp = &nBytes; |
318 | | |
319 | 10.5k | #ifdef HAVE_USELOCALE |
320 | 10.5k | locale_t oldlocale = uselocale(NULL); |
321 | 10.5k | locale_t newloc; |
322 | | #elif defined(HAVE_SETLOCALE) |
323 | | char *oldlocale = NULL; |
324 | | #endif |
325 | | |
326 | 10.5k | tok->char_offset = 0; |
327 | 10.5k | tok->err = json_tokener_success; |
328 | | |
329 | | /* this interface is presently not 64-bit clean due to the int len argument |
330 | | * and the internal printbuf interface that takes 32-bit int len arguments |
331 | | * so the function limits the maximum string size to INT32_MAX (2GB). |
332 | | * If the function is called with len == -1 then strlen is called to check |
333 | | * the string length is less than INT32_MAX (2GB) |
334 | | */ |
335 | 10.5k | if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX)) |
336 | 0 | { |
337 | 0 | tok->err = json_tokener_error_size; |
338 | 0 | return NULL; |
339 | 0 | } |
340 | | |
341 | 10.5k | #ifdef HAVE_USELOCALE |
342 | 10.5k | { |
343 | 10.5k | locale_t duploc = duplocale(oldlocale); |
344 | 10.5k | if (duploc == NULL && errno == ENOMEM) |
345 | 0 | { |
346 | 0 | tok->err = json_tokener_error_memory; |
347 | 0 | return NULL; |
348 | 0 | } |
349 | 10.5k | newloc = newlocale(LC_NUMERIC_MASK, "C", duploc); |
350 | 10.5k | if (newloc == NULL) |
351 | 0 | { |
352 | 0 | tok->err = json_tokener_error_memory; |
353 | 0 | freelocale(duploc); |
354 | 0 | return NULL; |
355 | 0 | } |
356 | | #ifdef NEWLOCALE_NEEDS_FREELOCALE |
357 | | // Older versions of FreeBSD (<12.4) don't free the locale |
358 | | // passed to newlocale(), so do it here |
359 | | freelocale(duploc); |
360 | | #endif |
361 | 10.5k | uselocale(newloc); |
362 | 10.5k | } |
363 | | #elif defined(HAVE_SETLOCALE) |
364 | | { |
365 | | char *tmplocale; |
366 | | tmplocale = setlocale(LC_NUMERIC, NULL); |
367 | | if (tmplocale) |
368 | | { |
369 | | oldlocale = strdup(tmplocale); |
370 | | if (oldlocale == NULL) |
371 | | { |
372 | | tok->err = json_tokener_error_memory; |
373 | | return NULL; |
374 | | } |
375 | | } |
376 | | setlocale(LC_NUMERIC, "C"); |
377 | | } |
378 | | #endif |
379 | | |
380 | 1.40M | while (PEEK_CHAR(c, tok)) // Note: c might be '\0' ! |
381 | 1.40M | { |
382 | | |
383 | 4.61M | redo_char: |
384 | 4.61M | switch (state) |
385 | 4.61M | { |
386 | | |
387 | 1.75M | case json_tokener_state_eatws: |
388 | | /* Advance until we change state */ |
389 | 1.98M | while (is_ws_char(c)) |
390 | 230k | { |
391 | 230k | if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok))) |
392 | 0 | goto out; |
393 | 230k | } |
394 | 1.75M | if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) |
395 | 3.33k | { |
396 | 3.33k | printbuf_reset(tok->pb); |
397 | 3.33k | printbuf_memappend_checked(tok->pb, &c, 1); |
398 | 3.33k | state = json_tokener_state_comment_start; |
399 | 3.33k | } |
400 | 1.75M | else |
401 | 1.75M | { |
402 | 1.75M | state = saved_state; |
403 | 1.75M | goto redo_char; |
404 | 1.75M | } |
405 | 3.33k | break; |
406 | | |
407 | 370k | case json_tokener_state_start: |
408 | 370k | switch (c) |
409 | 370k | { |
410 | 59.1k | case '{': |
411 | 59.1k | state = json_tokener_state_eatws; |
412 | 59.1k | saved_state = json_tokener_state_object_field_start; |
413 | 59.1k | current = json_object_new_object(); |
414 | 59.1k | if (current == NULL) |
415 | 0 | { |
416 | 0 | tok->err = json_tokener_error_memory; |
417 | 0 | goto out; |
418 | 0 | } |
419 | 59.1k | break; |
420 | 59.1k | case '[': |
421 | 16.1k | state = json_tokener_state_eatws; |
422 | 16.1k | saved_state = json_tokener_state_array; |
423 | 16.1k | current = json_object_new_array(); |
424 | 16.1k | if (current == NULL) |
425 | 0 | { |
426 | 0 | tok->err = json_tokener_error_memory; |
427 | 0 | goto out; |
428 | 0 | } |
429 | 16.1k | break; |
430 | 16.1k | case 'I': |
431 | 564 | case 'i': |
432 | 564 | state = json_tokener_state_inf; |
433 | 564 | printbuf_reset(tok->pb); |
434 | 564 | tok->st_pos = 0; |
435 | 564 | goto redo_char; |
436 | 1.10k | case 'N': |
437 | 13.7k | case 'n': |
438 | 13.7k | state = json_tokener_state_null; // or NaN |
439 | 13.7k | printbuf_reset(tok->pb); |
440 | 13.7k | tok->st_pos = 0; |
441 | 13.7k | goto redo_char; |
442 | 392 | case '\'': |
443 | 392 | if (tok->flags & JSON_TOKENER_STRICT) |
444 | 0 | { |
445 | | /* in STRICT mode only double-quote are allowed */ |
446 | 0 | tok->err = json_tokener_error_parse_unexpected; |
447 | 0 | goto out; |
448 | 0 | } |
449 | | /* FALLTHRU */ |
450 | 98.1k | case '"': |
451 | 98.1k | state = json_tokener_state_string; |
452 | 98.1k | printbuf_reset(tok->pb); |
453 | 98.1k | tok->quote_char = c; |
454 | 98.1k | break; |
455 | 640 | case 'T': |
456 | 1.18k | case 't': |
457 | 1.41k | case 'F': |
458 | 1.84k | case 'f': |
459 | 1.84k | state = json_tokener_state_boolean; |
460 | 1.84k | printbuf_reset(tok->pb); |
461 | 1.84k | tok->st_pos = 0; |
462 | 1.84k | goto redo_char; |
463 | 21.9k | case '0': |
464 | 28.1k | case '1': |
465 | 34.4k | case '2': |
466 | 36.1k | case '3': |
467 | 39.0k | case '4': |
468 | 173k | case '5': |
469 | 173k | case '6': |
470 | 174k | case '7': |
471 | 175k | case '8': |
472 | 176k | case '9': |
473 | 180k | case '-': |
474 | 180k | state = json_tokener_state_number; |
475 | 180k | printbuf_reset(tok->pb); |
476 | 180k | tok->is_double = 0; |
477 | 180k | goto redo_char; |
478 | 141 | default: tok->err = json_tokener_error_parse_unexpected; goto out; |
479 | 370k | } |
480 | 173k | break; |
481 | | |
482 | 363k | case json_tokener_state_finish: |
483 | 363k | if (tok->depth == 0) |
484 | 7.92k | goto out; |
485 | 355k | obj = json_object_get(current); |
486 | 355k | json_tokener_reset_level(tok, tok->depth); |
487 | 355k | tok->depth--; |
488 | 355k | goto redo_char; |
489 | | |
490 | 997 | case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */ |
491 | 997 | { |
492 | | /* If we were guaranteed to have len set, then we could (usually) handle |
493 | | * the entire "Infinity" check in a single strncmp (strncasecmp), but |
494 | | * since len might be -1 (i.e. "read until \0"), we need to check it |
495 | | * a character at a time. |
496 | | * Trying to handle it both ways would make this code considerably more |
497 | | * complicated with likely little performance benefit. |
498 | | */ |
499 | 997 | int is_negative = 0; |
500 | | |
501 | | /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */ |
502 | 8.31k | while (tok->st_pos < (int)json_inf_str_len) |
503 | 7.41k | { |
504 | 7.41k | char inf_char = *str; |
505 | 7.41k | if (inf_char != json_inf_str[tok->st_pos] && |
506 | 7.41k | ((tok->flags & JSON_TOKENER_STRICT) || |
507 | 3.30k | inf_char != json_inf_str_invert[tok->st_pos]) |
508 | 7.41k | ) |
509 | 101 | { |
510 | 101 | tok->err = json_tokener_error_parse_unexpected; |
511 | 101 | goto out; |
512 | 101 | } |
513 | 7.31k | tok->st_pos++; |
514 | 7.31k | (void)ADVANCE_CHAR(str, tok); |
515 | 7.31k | if (!PEEK_CHAR(c, tok)) |
516 | 0 | { |
517 | | /* out of input chars, for now at least */ |
518 | 0 | goto out; |
519 | 0 | } |
520 | 7.31k | } |
521 | | /* We checked the full length of "Infinity", so create the object. |
522 | | * When handling -Infinity, the number parsing code will have dropped |
523 | | * the "-" into tok->pb for us, so check it now. |
524 | | */ |
525 | 896 | if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-') |
526 | 410 | { |
527 | 410 | is_negative = 1; |
528 | 410 | } |
529 | 896 | current = json_object_new_double(is_negative ? -INFINITY : INFINITY); |
530 | 896 | if (current == NULL) |
531 | 0 | { |
532 | 0 | tok->err = json_tokener_error_memory; |
533 | 0 | goto out; |
534 | 0 | } |
535 | 896 | saved_state = json_tokener_state_finish; |
536 | 896 | state = json_tokener_state_eatws; |
537 | 896 | goto redo_char; |
538 | 896 | } |
539 | 0 | break; |
540 | 68.1k | case json_tokener_state_null: /* aka starts with 'n' */ |
541 | 68.1k | { |
542 | 68.1k | int size; |
543 | 68.1k | int size_nan; |
544 | 68.1k | printbuf_memappend_checked(tok->pb, &c, 1); |
545 | 68.1k | size = json_min(tok->st_pos + 1, json_null_str_len); |
546 | 68.1k | size_nan = json_min(tok->st_pos + 1, json_nan_str_len); |
547 | 68.1k | if ((!(tok->flags & JSON_TOKENER_STRICT) && |
548 | 68.1k | strncasecmp(json_null_str, tok->pb->buf, size) == 0) || |
549 | 68.1k | (strncmp(json_null_str, tok->pb->buf, size) == 0)) |
550 | 66.4k | { |
551 | 66.4k | if (tok->st_pos == json_null_str_len) |
552 | 13.1k | { |
553 | 13.1k | current = NULL; |
554 | 13.1k | saved_state = json_tokener_state_finish; |
555 | 13.1k | state = json_tokener_state_eatws; |
556 | 13.1k | goto redo_char; |
557 | 13.1k | } |
558 | 66.4k | } |
559 | 1.71k | else if ((!(tok->flags & JSON_TOKENER_STRICT) && |
560 | 1.71k | strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) || |
561 | 1.71k | (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0)) |
562 | 1.60k | { |
563 | 1.60k | if (tok->st_pos == json_nan_str_len) |
564 | 528 | { |
565 | 528 | current = json_object_new_double(NAN); |
566 | 528 | if (current == NULL) |
567 | 0 | { |
568 | 0 | tok->err = json_tokener_error_memory; |
569 | 0 | goto out; |
570 | 0 | } |
571 | 528 | saved_state = json_tokener_state_finish; |
572 | 528 | state = json_tokener_state_eatws; |
573 | 528 | goto redo_char; |
574 | 528 | } |
575 | 1.60k | } |
576 | 110 | else |
577 | 110 | { |
578 | 110 | tok->err = json_tokener_error_parse_null; |
579 | 110 | goto out; |
580 | 110 | } |
581 | 54.3k | tok->st_pos++; |
582 | 54.3k | } |
583 | 0 | break; |
584 | | |
585 | 3.33k | case json_tokener_state_comment_start: |
586 | 3.33k | if (c == '*') |
587 | 759 | { |
588 | 759 | state = json_tokener_state_comment; |
589 | 759 | } |
590 | 2.57k | else if (c == '/') |
591 | 2.46k | { |
592 | 2.46k | state = json_tokener_state_comment_eol; |
593 | 2.46k | } |
594 | 110 | else |
595 | 110 | { |
596 | 110 | tok->err = json_tokener_error_parse_comment; |
597 | 110 | goto out; |
598 | 110 | } |
599 | 3.22k | printbuf_memappend_checked(tok->pb, &c, 1); |
600 | 3.22k | break; |
601 | | |
602 | 3.86k | case json_tokener_state_comment: |
603 | 3.86k | { |
604 | | /* Advance until we change state */ |
605 | 3.86k | const char *case_start = str; |
606 | 431k | while (c != '*') |
607 | 428k | { |
608 | 428k | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
609 | 182 | { |
610 | 182 | printbuf_memappend_checked(tok->pb, case_start, |
611 | 182 | str - case_start); |
612 | 182 | goto out; |
613 | 182 | } |
614 | 428k | } |
615 | 3.68k | printbuf_memappend_checked(tok->pb, case_start, 1 + str - case_start); |
616 | 3.68k | state = json_tokener_state_comment_end; |
617 | 3.68k | } |
618 | 0 | break; |
619 | | |
620 | 2.46k | case json_tokener_state_comment_eol: |
621 | 2.46k | { |
622 | | /* Advance until we change state */ |
623 | 2.46k | const char *case_start = str; |
624 | 297k | while (c != '\n') |
625 | 295k | { |
626 | 295k | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
627 | 143 | { |
628 | 143 | printbuf_memappend_checked(tok->pb, case_start, |
629 | 143 | str - case_start); |
630 | 143 | goto out; |
631 | 143 | } |
632 | 295k | } |
633 | 2.31k | printbuf_memappend_checked(tok->pb, case_start, str - case_start); |
634 | 2.31k | MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); |
635 | 2.31k | state = json_tokener_state_eatws; |
636 | 2.31k | } |
637 | 0 | break; |
638 | | |
639 | 3.68k | case json_tokener_state_comment_end: |
640 | 3.68k | printbuf_memappend_checked(tok->pb, &c, 1); |
641 | 3.68k | if (c == '/') |
642 | 570 | { |
643 | 570 | MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); |
644 | 570 | state = json_tokener_state_eatws; |
645 | 570 | } |
646 | 3.11k | else |
647 | 3.11k | { |
648 | 3.11k | state = json_tokener_state_comment; |
649 | 3.11k | } |
650 | 3.68k | break; |
651 | | |
652 | 200k | case json_tokener_state_string: |
653 | 200k | { |
654 | | /* Advance until we change state */ |
655 | 200k | const char *case_start = str; |
656 | 3.70M | while (1) |
657 | 3.70M | { |
658 | 3.70M | if (c == tok->quote_char) |
659 | 97.8k | { |
660 | 97.8k | printbuf_memappend_checked(tok->pb, case_start, |
661 | 97.8k | str - case_start); |
662 | 97.8k | current = |
663 | 97.8k | json_object_new_string_len(tok->pb->buf, tok->pb->bpos); |
664 | 97.8k | if (current == NULL) |
665 | 0 | { |
666 | 0 | tok->err = json_tokener_error_memory; |
667 | 0 | goto out; |
668 | 0 | } |
669 | 97.8k | saved_state = json_tokener_state_finish; |
670 | 97.8k | state = json_tokener_state_eatws; |
671 | 97.8k | break; |
672 | 97.8k | } |
673 | 3.61M | else if (c == '\\') |
674 | 102k | { |
675 | 102k | printbuf_memappend_checked(tok->pb, case_start, |
676 | 102k | str - case_start); |
677 | 102k | saved_state = json_tokener_state_string; |
678 | 102k | state = json_tokener_state_string_escape; |
679 | 102k | break; |
680 | 102k | } |
681 | 3.50M | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
682 | 248 | { |
683 | 248 | printbuf_memappend_checked(tok->pb, case_start, |
684 | 248 | str - case_start); |
685 | 248 | goto out; |
686 | 248 | } |
687 | 3.50M | } |
688 | 200k | } |
689 | 200k | break; |
690 | | |
691 | 200k | case json_tokener_state_string_escape: |
692 | 115k | switch (c) |
693 | 115k | { |
694 | 20.7k | case '"': |
695 | 69.3k | case '\\': |
696 | 69.7k | case '/': |
697 | 69.7k | printbuf_memappend_checked(tok->pb, &c, 1); |
698 | 69.7k | state = saved_state; |
699 | 69.7k | break; |
700 | 700 | case 'b': |
701 | 36.3k | case 'n': |
702 | 36.9k | case 'r': |
703 | 38.5k | case 't': |
704 | 39.1k | case 'f': |
705 | 39.1k | if (c == 'b') |
706 | 700 | printbuf_memappend_checked(tok->pb, "\b", 1); |
707 | 38.4k | else if (c == 'n') |
708 | 35.6k | printbuf_memappend_checked(tok->pb, "\n", 1); |
709 | 2.81k | else if (c == 'r') |
710 | 666 | printbuf_memappend_checked(tok->pb, "\r", 1); |
711 | 2.14k | else if (c == 't') |
712 | 1.57k | printbuf_memappend_checked(tok->pb, "\t", 1); |
713 | 575 | else if (c == 'f') |
714 | 575 | printbuf_memappend_checked(tok->pb, "\f", 1); |
715 | 39.1k | state = saved_state; |
716 | 39.1k | break; |
717 | 6.16k | case 'u': |
718 | 6.16k | tok->ucs_char = 0; |
719 | 6.16k | tok->st_pos = 0; |
720 | 6.16k | state = json_tokener_state_escape_unicode; |
721 | 6.16k | break; |
722 | 42 | default: tok->err = json_tokener_error_parse_string; goto out; |
723 | 115k | } |
724 | 115k | break; |
725 | | |
726 | | // =================================================== |
727 | | |
728 | 115k | case json_tokener_state_escape_unicode: |
729 | 8.04k | { |
730 | | /* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */ |
731 | 31.9k | while (1) |
732 | 31.9k | { |
733 | 31.9k | if (!c || !is_hex_char(c)) |
734 | 101 | { |
735 | 101 | tok->err = json_tokener_error_parse_string; |
736 | 101 | goto out; |
737 | 101 | } |
738 | 31.8k | tok->ucs_char |= |
739 | 31.8k | ((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4)); |
740 | 31.8k | tok->st_pos++; |
741 | 31.8k | if (tok->st_pos >= 4) |
742 | 7.94k | break; |
743 | | |
744 | 23.9k | (void)ADVANCE_CHAR(str, tok); |
745 | 23.9k | if (!PEEK_CHAR(c, tok)) |
746 | 0 | { |
747 | | /* |
748 | | * We're out of characters in the current call to |
749 | | * json_tokener_parse(), but a subsequent call might |
750 | | * provide us with more, so leave our current state |
751 | | * as-is (including tok->high_surrogate) and return. |
752 | | */ |
753 | 0 | goto out; |
754 | 0 | } |
755 | 23.9k | } |
756 | 7.94k | tok->st_pos = 0; |
757 | | |
758 | | /* Now, we have a full \uNNNN sequence in tok->ucs_char */ |
759 | | |
760 | | /* If the *previous* sequence was a high surrogate ... */ |
761 | 7.94k | if (tok->high_surrogate) |
762 | 1.86k | { |
763 | 1.86k | if (IS_LOW_SURROGATE(tok->ucs_char)) |
764 | 771 | { |
765 | | /* Recalculate the ucs_char, then fall thru to process normally */ |
766 | 771 | tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate, |
767 | 771 | tok->ucs_char); |
768 | 771 | } |
769 | 1.09k | else |
770 | 1.09k | { |
771 | | /* High surrogate was not followed by a low surrogate |
772 | | * Replace the high and process the rest normally |
773 | | */ |
774 | 1.09k | printbuf_memappend_checked(tok->pb, |
775 | 1.09k | (char *)utf8_replacement_char, 3); |
776 | 1.09k | } |
777 | 1.86k | tok->high_surrogate = 0; |
778 | 1.86k | } |
779 | | |
780 | 7.94k | if (tok->ucs_char < 0x80) |
781 | 2.05k | { |
782 | 2.05k | unsigned char unescaped_utf[1]; |
783 | 2.05k | unescaped_utf[0] = tok->ucs_char; |
784 | 2.05k | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 1); |
785 | 2.05k | } |
786 | 5.89k | else if (tok->ucs_char < 0x800) |
787 | 361 | { |
788 | 361 | unsigned char unescaped_utf[2]; |
789 | 361 | unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6); |
790 | 361 | unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f); |
791 | 361 | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 2); |
792 | 361 | } |
793 | 5.52k | else if (IS_HIGH_SURROGATE(tok->ucs_char)) |
794 | 3.49k | { |
795 | | /* |
796 | | * The next two characters should be \u, HOWEVER, |
797 | | * we can't simply peek ahead here, because the |
798 | | * characters we need might not be passed to us |
799 | | * until a subsequent call to json_tokener_parse. |
800 | | * Instead, transition through a couple of states. |
801 | | * (now): |
802 | | * _escape_unicode => _unicode_need_escape |
803 | | * (see a '\\' char): |
804 | | * _unicode_need_escape => _unicode_need_u |
805 | | * (see a 'u' char): |
806 | | * _unicode_need_u => _escape_unicode |
807 | | * ...and we'll end up back around here. |
808 | | */ |
809 | 3.49k | tok->high_surrogate = tok->ucs_char; |
810 | 3.49k | tok->ucs_char = 0; |
811 | 3.49k | state = json_tokener_state_escape_unicode_need_escape; |
812 | 3.49k | break; |
813 | 3.49k | } |
814 | 2.03k | else if (IS_LOW_SURROGATE(tok->ucs_char)) |
815 | 491 | { |
816 | | /* Got a low surrogate not preceded by a high */ |
817 | 491 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
818 | 491 | } |
819 | 1.54k | else if (tok->ucs_char < 0x10000) |
820 | 874 | { |
821 | 874 | unsigned char unescaped_utf[3]; |
822 | 874 | unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12); |
823 | 874 | unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); |
824 | 874 | unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f); |
825 | 874 | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 3); |
826 | 874 | } |
827 | 669 | else if (tok->ucs_char < 0x110000) |
828 | 669 | { |
829 | 669 | unsigned char unescaped_utf[4]; |
830 | 669 | unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07); |
831 | 669 | unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f); |
832 | 669 | unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); |
833 | 669 | unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f); |
834 | 669 | printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 4); |
835 | 669 | } |
836 | 0 | else |
837 | 0 | { |
838 | | /* Don't know what we got--insert the replacement char */ |
839 | 0 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
840 | 0 | } |
841 | 4.45k | state = saved_state; // i.e. _state_string or _state_object_field |
842 | 4.45k | } |
843 | 0 | break; |
844 | | |
845 | 3.49k | case json_tokener_state_escape_unicode_need_escape: |
846 | | // We get here after processing a high_surrogate |
847 | | // require a '\\' char |
848 | 3.49k | if (!c || c != '\\') |
849 | 850 | { |
850 | | /* Got a high surrogate without another sequence following |
851 | | * it. Put a replacement char in for the high surrogate |
852 | | * and pop back up to _state_string or _state_object_field. |
853 | | */ |
854 | 850 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
855 | 850 | tok->high_surrogate = 0; |
856 | 850 | tok->ucs_char = 0; |
857 | 850 | tok->st_pos = 0; |
858 | 850 | state = saved_state; |
859 | 850 | goto redo_char; |
860 | 850 | } |
861 | 2.64k | state = json_tokener_state_escape_unicode_need_u; |
862 | 2.64k | break; |
863 | | |
864 | 2.64k | case json_tokener_state_escape_unicode_need_u: |
865 | | /* We already had a \ char, check that it's \u */ |
866 | 2.64k | if (!c || c != 'u') |
867 | 764 | { |
868 | | /* Got a high surrogate with some non-unicode escape |
869 | | * sequence following it. |
870 | | * Put a replacement char in for the high surrogate |
871 | | * and handle the escape sequence normally. |
872 | | */ |
873 | 764 | printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); |
874 | 764 | tok->high_surrogate = 0; |
875 | 764 | tok->ucs_char = 0; |
876 | 764 | tok->st_pos = 0; |
877 | 764 | state = json_tokener_state_string_escape; |
878 | 764 | goto redo_char; |
879 | 764 | } |
880 | 1.88k | state = json_tokener_state_escape_unicode; |
881 | 1.88k | break; |
882 | | |
883 | | // =================================================== |
884 | | |
885 | 9.42k | case json_tokener_state_boolean: |
886 | 9.42k | { |
887 | 9.42k | int size1, size2; |
888 | 9.42k | printbuf_memappend_checked(tok->pb, &c, 1); |
889 | 9.42k | size1 = json_min(tok->st_pos + 1, json_true_str_len); |
890 | 9.42k | size2 = json_min(tok->st_pos + 1, json_false_str_len); |
891 | 9.42k | if ((!(tok->flags & JSON_TOKENER_STRICT) && |
892 | 9.42k | strncasecmp(json_true_str, tok->pb->buf, size1) == 0) || |
893 | 9.42k | (strncmp(json_true_str, tok->pb->buf, size1) == 0)) |
894 | 5.61k | { |
895 | 5.61k | if (tok->st_pos == json_true_str_len) |
896 | 1.09k | { |
897 | 1.09k | current = json_object_new_boolean(1); |
898 | 1.09k | if (current == NULL) |
899 | 0 | { |
900 | 0 | tok->err = json_tokener_error_memory; |
901 | 0 | goto out; |
902 | 0 | } |
903 | 1.09k | saved_state = json_tokener_state_finish; |
904 | 1.09k | state = json_tokener_state_eatws; |
905 | 1.09k | goto redo_char; |
906 | 1.09k | } |
907 | 5.61k | } |
908 | 3.81k | else if ((!(tok->flags & JSON_TOKENER_STRICT) && |
909 | 3.81k | strncasecmp(json_false_str, tok->pb->buf, size2) == 0) || |
910 | 3.81k | (strncmp(json_false_str, tok->pb->buf, size2) == 0)) |
911 | 3.66k | { |
912 | 3.66k | if (tok->st_pos == json_false_str_len) |
913 | 586 | { |
914 | 586 | current = json_object_new_boolean(0); |
915 | 586 | if (current == NULL) |
916 | 0 | { |
917 | 0 | tok->err = json_tokener_error_memory; |
918 | 0 | goto out; |
919 | 0 | } |
920 | 586 | saved_state = json_tokener_state_finish; |
921 | 586 | state = json_tokener_state_eatws; |
922 | 586 | goto redo_char; |
923 | 586 | } |
924 | 3.66k | } |
925 | 158 | else |
926 | 158 | { |
927 | 158 | tok->err = json_tokener_error_parse_boolean; |
928 | 158 | goto out; |
929 | 158 | } |
930 | 7.58k | tok->st_pos++; |
931 | 7.58k | } |
932 | 0 | break; |
933 | | |
934 | 180k | case json_tokener_state_number: |
935 | 180k | { |
936 | | /* Advance until we change state */ |
937 | 180k | const char *case_start = str; |
938 | 180k | int case_len = 0; |
939 | 180k | int is_exponent = 0; |
940 | 180k | int neg_sign_ok = 1; |
941 | 180k | int pos_sign_ok = 0; |
942 | 180k | if (printbuf_length(tok->pb) > 0) |
943 | 0 | { |
944 | | /* We don't save all state from the previous incremental parse |
945 | | so we need to re-generate it based on the saved string so far. |
946 | | */ |
947 | 0 | char *e_loc = strchr(tok->pb->buf, 'e'); |
948 | 0 | if (!e_loc) |
949 | 0 | e_loc = strchr(tok->pb->buf, 'E'); |
950 | 0 | if (e_loc) |
951 | 0 | { |
952 | 0 | char *last_saved_char = |
953 | 0 | &tok->pb->buf[printbuf_length(tok->pb) - 1]; |
954 | 0 | is_exponent = 1; |
955 | 0 | pos_sign_ok = neg_sign_ok = 1; |
956 | | /* If the "e" isn't at the end, we can't start with a '-' */ |
957 | 0 | if (e_loc != last_saved_char) |
958 | 0 | { |
959 | 0 | neg_sign_ok = 0; |
960 | 0 | pos_sign_ok = 0; |
961 | 0 | } |
962 | | // else leave it set to 1, i.e. start of the new input |
963 | 0 | } |
964 | 0 | } |
965 | | |
966 | 553k | while (c && ((c >= '0' && c <= '9') || |
967 | 553k | (!is_exponent && (c == 'e' || c == 'E')) || |
968 | 553k | (neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') || |
969 | 553k | (!tok->is_double && c == '.'))) |
970 | 372k | { |
971 | 372k | pos_sign_ok = neg_sign_ok = 0; |
972 | 372k | ++case_len; |
973 | | |
974 | | /* non-digit characters checks */ |
975 | | /* note: since the main loop condition to get here was |
976 | | * an input starting with 0-9 or '-', we are |
977 | | * protected from input starting with '.' or |
978 | | * e/E. |
979 | | */ |
980 | 372k | switch (c) |
981 | 372k | { |
982 | 1.17k | case '.': |
983 | 1.17k | tok->is_double = 1; |
984 | 1.17k | pos_sign_ok = 1; |
985 | 1.17k | neg_sign_ok = 1; |
986 | 1.17k | break; |
987 | 1.59k | case 'e': /* FALLTHRU */ |
988 | 2.36k | case 'E': |
989 | 2.36k | is_exponent = 1; |
990 | 2.36k | tok->is_double = 1; |
991 | | /* the exponent part can begin with a negative sign */ |
992 | 2.36k | pos_sign_ok = neg_sign_ok = 1; |
993 | 2.36k | break; |
994 | 369k | default: break; |
995 | 372k | } |
996 | | |
997 | 372k | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
998 | 0 | { |
999 | 0 | printbuf_memappend_checked(tok->pb, case_start, case_len); |
1000 | 0 | goto out; |
1001 | 0 | } |
1002 | 372k | } |
1003 | | /* |
1004 | | Now we know c isn't a valid number char, but check whether |
1005 | | it might have been intended to be, and return a potentially |
1006 | | more understandable error right away. |
1007 | | However, if we're at the top-level, use the number as-is |
1008 | | because c can be part of a new object to parse on the |
1009 | | next call to json_tokener_parse(). |
1010 | | */ |
1011 | 180k | if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' && |
1012 | 180k | c != 'I' && c != 'i' && !is_ws_char(c)) |
1013 | 294 | { |
1014 | 294 | tok->err = json_tokener_error_parse_number; |
1015 | 294 | goto out; |
1016 | 294 | } |
1017 | 180k | if (case_len > 0) |
1018 | 180k | printbuf_memappend_checked(tok->pb, case_start, case_len); |
1019 | | |
1020 | | // Check for -Infinity |
1021 | 180k | if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I')) |
1022 | 433 | { |
1023 | 433 | state = json_tokener_state_inf; |
1024 | 433 | tok->st_pos = 0; |
1025 | 433 | goto redo_char; |
1026 | 433 | } |
1027 | 179k | if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT)) |
1028 | 2.96k | { |
1029 | | /* Trim some chars off the end, to allow things |
1030 | | like "123e+" to parse ok. */ |
1031 | 5.92k | while (printbuf_length(tok->pb) > 1) |
1032 | 4.78k | { |
1033 | 4.78k | char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1]; |
1034 | 4.78k | if (last_char != 'e' && last_char != 'E' && |
1035 | 4.78k | last_char != '-' && last_char != '+') |
1036 | 1.81k | { |
1037 | 1.81k | break; |
1038 | 1.81k | } |
1039 | 2.96k | tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0'; |
1040 | 2.96k | printbuf_length(tok->pb)--; |
1041 | 2.96k | } |
1042 | 2.96k | } |
1043 | 179k | } |
1044 | 0 | { |
1045 | 179k | int64_t num64; |
1046 | 179k | uint64_t numuint64; |
1047 | 179k | double numd; |
1048 | 179k | if (!tok->is_double && tok->pb->buf[0] == '-' && |
1049 | 179k | json_parse_int64(tok->pb->buf, &num64) == 0) |
1050 | 2.84k | { |
1051 | 2.84k | if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT)) |
1052 | 0 | { |
1053 | 0 | tok->err = json_tokener_error_parse_number; |
1054 | 0 | goto out; |
1055 | 0 | } |
1056 | 2.84k | current = json_object_new_int64(num64); |
1057 | 2.84k | if (current == NULL) |
1058 | 0 | { |
1059 | 0 | tok->err = json_tokener_error_memory; |
1060 | 0 | goto out; |
1061 | 0 | } |
1062 | 2.84k | } |
1063 | 176k | else if (!tok->is_double && tok->pb->buf[0] != '-' && |
1064 | 176k | json_parse_uint64(tok->pb->buf, &numuint64) == 0) |
1065 | 173k | { |
1066 | 173k | if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT)) |
1067 | 0 | { |
1068 | 0 | tok->err = json_tokener_error_parse_number; |
1069 | 0 | goto out; |
1070 | 0 | } |
1071 | 173k | if (numuint64 && tok->pb->buf[0] == '0' && |
1072 | 173k | (tok->flags & JSON_TOKENER_STRICT)) |
1073 | 0 | { |
1074 | 0 | tok->err = json_tokener_error_parse_number; |
1075 | 0 | goto out; |
1076 | 0 | } |
1077 | 173k | if (numuint64 <= INT64_MAX) |
1078 | 172k | { |
1079 | 172k | num64 = (uint64_t)numuint64; |
1080 | 172k | current = json_object_new_int64(num64); |
1081 | 172k | if (current == NULL) |
1082 | 0 | { |
1083 | 0 | tok->err = json_tokener_error_memory; |
1084 | 0 | goto out; |
1085 | 0 | } |
1086 | 172k | } |
1087 | 992 | else |
1088 | 992 | { |
1089 | 992 | current = json_object_new_uint64(numuint64); |
1090 | 992 | if (current == NULL) |
1091 | 0 | { |
1092 | 0 | tok->err = json_tokener_error_memory; |
1093 | 0 | goto out; |
1094 | 0 | } |
1095 | 992 | } |
1096 | 173k | } |
1097 | 2.99k | else if (tok->is_double && |
1098 | 2.99k | json_tokener_parse_double( |
1099 | 2.96k | tok->pb->buf, printbuf_length(tok->pb), &numd) == 0) |
1100 | 2.95k | { |
1101 | 2.95k | current = json_object_new_double_s(numd, tok->pb->buf); |
1102 | 2.95k | if (current == NULL) |
1103 | 0 | { |
1104 | 0 | tok->err = json_tokener_error_memory; |
1105 | 0 | goto out; |
1106 | 0 | } |
1107 | 2.95k | } |
1108 | 41 | else |
1109 | 41 | { |
1110 | 41 | tok->err = json_tokener_error_parse_number; |
1111 | 41 | goto out; |
1112 | 41 | } |
1113 | 179k | saved_state = json_tokener_state_finish; |
1114 | 179k | state = json_tokener_state_eatws; |
1115 | 179k | goto redo_char; |
1116 | 179k | } |
1117 | 0 | break; |
1118 | | |
1119 | 206k | case json_tokener_state_array_after_sep: |
1120 | 222k | case json_tokener_state_array: |
1121 | 222k | if (c == ']') |
1122 | 2.07k | { |
1123 | | // Minimize memory usage; assume parsed objs are unlikely to be changed |
1124 | 2.07k | json_object_array_shrink(current, 0); |
1125 | | |
1126 | 2.07k | if (state == json_tokener_state_array_after_sep && |
1127 | 2.07k | (tok->flags & JSON_TOKENER_STRICT)) |
1128 | 0 | { |
1129 | 0 | tok->err = json_tokener_error_parse_unexpected; |
1130 | 0 | goto out; |
1131 | 0 | } |
1132 | 2.07k | saved_state = json_tokener_state_finish; |
1133 | 2.07k | state = json_tokener_state_eatws; |
1134 | 2.07k | } |
1135 | 220k | else |
1136 | 220k | { |
1137 | 220k | if (tok->depth >= tok->max_depth - 1) |
1138 | 4 | { |
1139 | 4 | tok->err = json_tokener_error_depth; |
1140 | 4 | goto out; |
1141 | 4 | } |
1142 | 220k | state = json_tokener_state_array_add; |
1143 | 220k | tok->depth++; |
1144 | 220k | json_tokener_reset_level(tok, tok->depth); |
1145 | 220k | goto redo_char; |
1146 | 220k | } |
1147 | 2.07k | break; |
1148 | | |
1149 | 218k | case json_tokener_state_array_add: |
1150 | 218k | if (json_object_array_add(current, obj) != 0) |
1151 | 0 | { |
1152 | 0 | tok->err = json_tokener_error_memory; |
1153 | 0 | goto out; |
1154 | 0 | } |
1155 | 218k | saved_state = json_tokener_state_array_sep; |
1156 | 218k | state = json_tokener_state_eatws; |
1157 | 218k | goto redo_char; |
1158 | | |
1159 | 218k | case json_tokener_state_array_sep: |
1160 | 218k | if (c == ']') |
1161 | 12.0k | { |
1162 | | // Minimize memory usage; assume parsed objs are unlikely to be changed |
1163 | 12.0k | json_object_array_shrink(current, 0); |
1164 | | |
1165 | 12.0k | saved_state = json_tokener_state_finish; |
1166 | 12.0k | state = json_tokener_state_eatws; |
1167 | 12.0k | } |
1168 | 206k | else if (c == ',') |
1169 | 206k | { |
1170 | 206k | saved_state = json_tokener_state_array_after_sep; |
1171 | 206k | state = json_tokener_state_eatws; |
1172 | 206k | } |
1173 | 209 | else |
1174 | 209 | { |
1175 | 209 | tok->err = json_tokener_error_parse_array; |
1176 | 209 | goto out; |
1177 | 209 | } |
1178 | 218k | break; |
1179 | | |
1180 | 218k | case json_tokener_state_object_field_start: |
1181 | 161k | case json_tokener_state_object_field_start_after_sep: |
1182 | 161k | if (c == '}') |
1183 | 20.6k | { |
1184 | 20.6k | if (state == json_tokener_state_object_field_start_after_sep && |
1185 | 20.6k | (tok->flags & JSON_TOKENER_STRICT)) |
1186 | 0 | { |
1187 | 0 | tok->err = json_tokener_error_parse_unexpected; |
1188 | 0 | goto out; |
1189 | 0 | } |
1190 | 20.6k | saved_state = json_tokener_state_finish; |
1191 | 20.6k | state = json_tokener_state_eatws; |
1192 | 20.6k | } |
1193 | 140k | else if (c == '"' || c == '\'') |
1194 | 140k | { |
1195 | 140k | tok->quote_char = c; |
1196 | 140k | printbuf_reset(tok->pb); |
1197 | 140k | state = json_tokener_state_object_field; |
1198 | 140k | } |
1199 | 51 | else |
1200 | 51 | { |
1201 | 51 | tok->err = json_tokener_error_parse_object_key_name; |
1202 | 51 | goto out; |
1203 | 51 | } |
1204 | 161k | break; |
1205 | | |
1206 | 161k | case json_tokener_state_object_field: |
1207 | 151k | { |
1208 | | /* Advance until we change state */ |
1209 | 151k | const char *case_start = str; |
1210 | 2.61M | while (1) |
1211 | 2.61M | { |
1212 | 2.61M | if (c == tok->quote_char) |
1213 | 140k | { |
1214 | 140k | printbuf_memappend_checked(tok->pb, case_start, |
1215 | 140k | str - case_start); |
1216 | 140k | obj_field_name = strdup(tok->pb->buf); |
1217 | 140k | if (obj_field_name == NULL) |
1218 | 0 | { |
1219 | 0 | tok->err = json_tokener_error_memory; |
1220 | 0 | goto out; |
1221 | 0 | } |
1222 | 140k | saved_state = json_tokener_state_object_field_end; |
1223 | 140k | state = json_tokener_state_eatws; |
1224 | 140k | break; |
1225 | 140k | } |
1226 | 2.47M | else if (c == '\\') |
1227 | 11.7k | { |
1228 | 11.7k | printbuf_memappend_checked(tok->pb, case_start, |
1229 | 11.7k | str - case_start); |
1230 | 11.7k | saved_state = json_tokener_state_object_field; |
1231 | 11.7k | state = json_tokener_state_string_escape; |
1232 | 11.7k | break; |
1233 | 11.7k | } |
1234 | 2.46M | if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) |
1235 | 220 | { |
1236 | 220 | printbuf_memappend_checked(tok->pb, case_start, |
1237 | 220 | str - case_start); |
1238 | 220 | goto out; |
1239 | 220 | } |
1240 | 2.46M | } |
1241 | 151k | } |
1242 | 151k | break; |
1243 | | |
1244 | 151k | case json_tokener_state_object_field_end: |
1245 | 139k | if (c == ':') |
1246 | 139k | { |
1247 | 139k | saved_state = json_tokener_state_object_value; |
1248 | 139k | state = json_tokener_state_eatws; |
1249 | 139k | } |
1250 | 156 | else |
1251 | 156 | { |
1252 | 156 | tok->err = json_tokener_error_parse_object_key_sep; |
1253 | 156 | goto out; |
1254 | 156 | } |
1255 | 139k | break; |
1256 | | |
1257 | 139k | case json_tokener_state_object_value: |
1258 | 139k | if (tok->depth >= tok->max_depth - 1) |
1259 | 3 | { |
1260 | 3 | tok->err = json_tokener_error_depth; |
1261 | 3 | goto out; |
1262 | 3 | } |
1263 | 139k | state = json_tokener_state_object_value_add; |
1264 | 139k | tok->depth++; |
1265 | 139k | json_tokener_reset_level(tok, tok->depth); |
1266 | 139k | goto redo_char; |
1267 | | |
1268 | 137k | case json_tokener_state_object_value_add: |
1269 | 137k | if (json_object_object_add(current, obj_field_name, obj) != 0) |
1270 | 0 | { |
1271 | 0 | tok->err = json_tokener_error_memory; |
1272 | 0 | goto out; |
1273 | 0 | } |
1274 | 137k | free(obj_field_name); |
1275 | 137k | obj_field_name = NULL; |
1276 | 137k | saved_state = json_tokener_state_object_sep; |
1277 | 137k | state = json_tokener_state_eatws; |
1278 | 137k | goto redo_char; |
1279 | | |
1280 | 137k | case json_tokener_state_object_sep: |
1281 | | /* { */ |
1282 | 137k | if (c == '}') |
1283 | 35.3k | { |
1284 | 35.3k | saved_state = json_tokener_state_finish; |
1285 | 35.3k | state = json_tokener_state_eatws; |
1286 | 35.3k | } |
1287 | 102k | else if (c == ',') |
1288 | 101k | { |
1289 | 101k | saved_state = json_tokener_state_object_field_start_after_sep; |
1290 | 101k | state = json_tokener_state_eatws; |
1291 | 101k | } |
1292 | 336 | else |
1293 | 336 | { |
1294 | 336 | tok->err = json_tokener_error_parse_object_value_sep; |
1295 | 336 | goto out; |
1296 | 336 | } |
1297 | 137k | break; |
1298 | 4.61M | } |
1299 | 1.38M | (void)ADVANCE_CHAR(str, tok); |
1300 | 1.38M | if (!c) // This is the char *before* advancing |
1301 | 7 | break; |
1302 | 1.38M | } /* while(PEEK_CHAR) */ |
1303 | | |
1304 | 10.5k | out: |
1305 | 10.5k | if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0)) |
1306 | 0 | { |
1307 | 0 | tok->err = json_tokener_error_parse_utf8_string; |
1308 | 0 | } |
1309 | 10.5k | if (c && (state == json_tokener_state_finish) && (tok->depth == 0) && |
1310 | 10.5k | (tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) == |
1311 | 114 | JSON_TOKENER_STRICT) |
1312 | 0 | { |
1313 | | /* unexpected char after JSON data */ |
1314 | 0 | tok->err = json_tokener_error_parse_unexpected; |
1315 | 0 | } |
1316 | 10.5k | if (!c) |
1317 | 8.75k | { |
1318 | | /* We hit an eof char (0) */ |
1319 | 8.75k | if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish) |
1320 | 856 | tok->err = json_tokener_error_parse_eof; |
1321 | 8.75k | } |
1322 | | |
1323 | 10.5k | #ifdef HAVE_USELOCALE |
1324 | 10.5k | uselocale(oldlocale); |
1325 | 10.5k | freelocale(newloc); |
1326 | | #elif defined(HAVE_SETLOCALE) |
1327 | | setlocale(LC_NUMERIC, oldlocale); |
1328 | | free(oldlocale); |
1329 | | #endif |
1330 | | |
1331 | 10.5k | if (tok->err == json_tokener_success) |
1332 | 8.01k | { |
1333 | 8.01k | json_object *ret = json_object_get(current); |
1334 | 8.01k | int ii; |
1335 | | |
1336 | | /* Partially reset, so we parse additional objects on subsequent calls. */ |
1337 | 16.3k | for (ii = tok->depth; ii >= 0; ii--) |
1338 | 8.31k | json_tokener_reset_level(tok, ii); |
1339 | 8.01k | return ret; |
1340 | 8.01k | } |
1341 | | |
1342 | 2.57k | MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err], |
1343 | 2.57k | tok->char_offset); |
1344 | 2.57k | return NULL; |
1345 | 10.5k | } |
1346 | | |
1347 | | static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes) |
1348 | 0 | { |
1349 | 0 | unsigned char chr = c; |
1350 | 0 | if (*nBytes == 0) |
1351 | 0 | { |
1352 | 0 | if (chr >= 0x80) |
1353 | 0 | { |
1354 | 0 | if ((chr & 0xe0) == 0xc0) |
1355 | 0 | *nBytes = 1; |
1356 | 0 | else if ((chr & 0xf0) == 0xe0) |
1357 | 0 | *nBytes = 2; |
1358 | 0 | else if ((chr & 0xf8) == 0xf0) |
1359 | 0 | *nBytes = 3; |
1360 | 0 | else |
1361 | 0 | return 0; |
1362 | 0 | } |
1363 | 0 | } |
1364 | 0 | else |
1365 | 0 | { |
1366 | 0 | if ((chr & 0xC0) != 0x80) |
1367 | 0 | return 0; |
1368 | 0 | (*nBytes)--; |
1369 | 0 | } |
1370 | 0 | return 1; |
1371 | 0 | } |
1372 | | |
1373 | | void json_tokener_set_flags(struct json_tokener *tok, int flags) |
1374 | 0 | { |
1375 | 0 | tok->flags = flags; |
1376 | 0 | } |
1377 | | |
1378 | | size_t json_tokener_get_parse_end(struct json_tokener *tok) |
1379 | 0 | { |
1380 | 0 | assert(tok->char_offset >= 0); /* Drop this line when char_offset becomes a size_t */ |
1381 | 0 | return (size_t)tok->char_offset; |
1382 | 0 | } |
1383 | | |
1384 | | static int json_tokener_parse_double(const char *buf, int len, double *retval) |
1385 | 2.96k | { |
1386 | 2.96k | char *end; |
1387 | 2.96k | *retval = strtod(buf, &end); |
1388 | 2.96k | if (buf + len == end) |
1389 | 2.95k | return 0; // It worked |
1390 | 15 | return 1; |
1391 | 2.96k | } |