Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org> |
3 | | * |
4 | | * Jansson is free software; you can redistribute it and/or modify |
5 | | * it under the terms of the MIT license. See LICENSE for details. |
6 | | */ |
7 | | |
8 | | #ifndef _GNU_SOURCE |
9 | | #define _GNU_SOURCE |
10 | | #endif |
11 | | |
12 | | #include "jansson_private.h" |
13 | | |
14 | | #include <assert.h> |
15 | | #include <errno.h> |
16 | | #include <limits.h> |
17 | | #include <stdio.h> |
18 | | #include <stdlib.h> |
19 | | #include <string.h> |
20 | | #ifdef HAVE_UNISTD_H |
21 | | #include <unistd.h> |
22 | | #endif |
23 | | |
24 | | #include "jansson.h" |
25 | | #include "strbuffer.h" |
26 | | #include "utf.h" |
27 | | |
28 | 23.9M | #define STREAM_STATE_OK 0 |
29 | 64.0M | #define STREAM_STATE_EOF -1 |
30 | 41.8M | #define STREAM_STATE_ERROR -2 |
31 | | |
32 | 2.33M | #define TOKEN_INVALID -1 |
33 | 2.25k | #define TOKEN_EOF 0 |
34 | 5.66M | #define TOKEN_STRING 256 |
35 | 2.90M | #define TOKEN_INTEGER 257 |
36 | 1.70M | #define TOKEN_REAL 258 |
37 | 1.36k | #define TOKEN_TRUE 259 |
38 | 12.8k | #define TOKEN_FALSE 260 |
39 | 1.94k | #define TOKEN_NULL 261 |
40 | | |
41 | | /* Locale independent versions of isxxx() functions */ |
42 | 236k | #define l_isupper(c) ('A' <= (c) && (c) <= 'Z') |
43 | 130k | #define l_islower(c) ('a' <= (c) && (c) <= 'z') |
44 | 113k | #define l_isalpha(c) (l_isupper(c) || l_islower(c)) |
45 | 9.67M | #define l_isdigit(c) ('0' <= (c) && (c) <= '9') |
46 | | #define l_isxdigit(c) \ |
47 | 28.0k | (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f')) |
48 | | |
49 | | /* Read one byte from stream, convert to unsigned char, then int, and |
50 | | return. return EOF on end of file. This corresponds to the |
51 | | behaviour of fgetc(). */ |
52 | | typedef int (*get_func)(void *data); |
53 | | |
54 | | typedef struct { |
55 | | get_func get; |
56 | | void *data; |
57 | | char buffer[5]; |
58 | | size_t buffer_pos; |
59 | | int state; |
60 | | int line; |
61 | | int column, last_column; |
62 | | size_t position; |
63 | | } stream_t; |
64 | | |
65 | | typedef struct { |
66 | | stream_t stream; |
67 | | strbuffer_t saved_text; |
68 | | size_t flags; |
69 | | size_t depth; |
70 | | int token; |
71 | | union { |
72 | | struct { |
73 | | char *val; |
74 | | size_t len; |
75 | | } string; |
76 | | json_int_t integer; |
77 | | double real; |
78 | | } value; |
79 | | } lex_t; |
80 | | |
81 | 191 | #define stream_to_lex(stream) container_of(stream, lex_t, stream) |
82 | | |
83 | | /*** error reporting ***/ |
84 | | |
85 | | static void error_set(json_error_t *error, const lex_t *lex, enum json_error_code code, |
86 | 2.10k | const char *msg, ...) { |
87 | 2.10k | va_list ap; |
88 | 2.10k | char msg_text[JSON_ERROR_TEXT_LENGTH]; |
89 | 2.10k | char msg_with_context[JSON_ERROR_TEXT_LENGTH]; |
90 | | |
91 | 2.10k | int line = -1, col = -1; |
92 | 2.10k | size_t pos = 0; |
93 | 2.10k | const char *result = msg_text; |
94 | | |
95 | 2.10k | if (!error) |
96 | 0 | return; |
97 | | |
98 | 2.10k | va_start(ap, msg); |
99 | 2.10k | vsnprintf(msg_text, JSON_ERROR_TEXT_LENGTH, msg, ap); |
100 | 2.10k | msg_text[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; |
101 | 2.10k | va_end(ap); |
102 | | |
103 | 2.10k | if (lex) { |
104 | 2.10k | const char *saved_text = strbuffer_value(&lex->saved_text); |
105 | | |
106 | 2.10k | line = lex->stream.line; |
107 | 2.10k | col = lex->stream.column; |
108 | 2.10k | pos = lex->stream.position; |
109 | | |
110 | 2.10k | if (saved_text && saved_text[0]) { |
111 | 1.52k | if (lex->saved_text.length <= 20) { |
112 | 1.33k | snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH, "%s near '%s'", |
113 | 1.33k | msg_text, saved_text); |
114 | 1.33k | msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; |
115 | 1.33k | result = msg_with_context; |
116 | 1.33k | } |
117 | 1.52k | } else { |
118 | 583 | if (code == json_error_invalid_syntax) { |
119 | | /* More specific error code for premature end of file. */ |
120 | 440 | code = json_error_premature_end_of_input; |
121 | 440 | } |
122 | 583 | if (lex->stream.state == STREAM_STATE_ERROR) { |
123 | | /* No context for UTF-8 decoding errors */ |
124 | 281 | result = msg_text; |
125 | 302 | } else { |
126 | 302 | snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH, "%s near end of file", |
127 | 302 | msg_text); |
128 | 302 | msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; |
129 | 302 | result = msg_with_context; |
130 | 302 | } |
131 | 583 | } |
132 | 2.10k | } |
133 | | |
134 | 2.10k | jsonp_error_set(error, line, col, pos, code, "%s", result); |
135 | 2.10k | } |
136 | | |
137 | | /*** lexical analyzer ***/ |
138 | | |
139 | 7.18k | static void stream_init(stream_t *stream, get_func get, void *data) { |
140 | 7.18k | stream->get = get; |
141 | 7.18k | stream->data = data; |
142 | 7.18k | stream->buffer[0] = '\0'; |
143 | 7.18k | stream->buffer_pos = 0; |
144 | | |
145 | 7.18k | stream->state = STREAM_STATE_OK; |
146 | 7.18k | stream->line = 1; |
147 | 7.18k | stream->column = 0; |
148 | 7.18k | stream->position = 0; |
149 | 7.18k | } |
150 | | |
151 | 23.9M | static int stream_get(stream_t *stream, json_error_t *error) { |
152 | 23.9M | int c; |
153 | | |
154 | 23.9M | if (stream->state != STREAM_STATE_OK) |
155 | 531 | return stream->state; |
156 | | |
157 | 23.9M | if (!stream->buffer[stream->buffer_pos]) { |
158 | 21.6M | c = stream->get(stream->data); |
159 | 21.6M | if (c == EOF) { |
160 | 4.71k | stream->state = STREAM_STATE_EOF; |
161 | 4.71k | return STREAM_STATE_EOF; |
162 | 4.71k | } |
163 | | |
164 | 21.6M | stream->buffer[0] = c; |
165 | 21.6M | stream->buffer_pos = 0; |
166 | | |
167 | 21.6M | if (0x80 <= c && c <= 0xFF) { |
168 | | /* multi-byte UTF-8 sequence */ |
169 | 4.95k | size_t i, count; |
170 | | |
171 | 4.95k | count = utf8_check_first(c); |
172 | 4.95k | if (!count) |
173 | 57 | goto out; |
174 | | |
175 | 4.90k | assert(count >= 2); |
176 | | |
177 | 14.2k | for (i = 1; i < count; i++) |
178 | 9.31k | stream->buffer[i] = stream->get(stream->data); |
179 | | |
180 | 4.90k | if (!utf8_check_full(stream->buffer, count, NULL)) |
181 | 134 | goto out; |
182 | | |
183 | 4.76k | stream->buffer[count] = '\0'; |
184 | 4.76k | } else |
185 | 21.6M | stream->buffer[1] = '\0'; |
186 | 21.6M | } |
187 | | |
188 | 23.9M | c = stream->buffer[stream->buffer_pos++]; |
189 | | |
190 | 23.9M | stream->position++; |
191 | 23.9M | if (c == '\n') { |
192 | 625 | stream->line++; |
193 | 625 | stream->last_column = stream->column; |
194 | 625 | stream->column = 0; |
195 | 23.9M | } else if (utf8_check_first(c)) { |
196 | | /* track the Unicode character column, so increment only if |
197 | | this is the first character of a UTF-8 sequence */ |
198 | 23.9M | stream->column++; |
199 | 23.9M | } |
200 | | |
201 | 23.9M | return c; |
202 | | |
203 | 191 | out: |
204 | 191 | stream->state = STREAM_STATE_ERROR; |
205 | 191 | error_set(error, stream_to_lex(stream), json_error_invalid_utf8, |
206 | 191 | "unable to decode byte 0x%x", c); |
207 | 191 | return STREAM_STATE_ERROR; |
208 | 23.9M | } |
209 | | |
210 | 2.31M | static void stream_unget(stream_t *stream, int c) { |
211 | 2.31M | if (c == STREAM_STATE_EOF || c == STREAM_STATE_ERROR) |
212 | 6 | return; |
213 | | |
214 | 2.31M | stream->position--; |
215 | 2.31M | if (c == '\n') { |
216 | 199 | stream->line--; |
217 | 199 | stream->column = stream->last_column; |
218 | 2.31M | } else if (utf8_check_first(c)) |
219 | 2.31M | stream->column--; |
220 | | |
221 | 2.31M | assert(stream->buffer_pos > 0); |
222 | 2.31M | stream->buffer_pos--; |
223 | 2.31M | assert(stream->buffer[stream->buffer_pos] == c); |
224 | 2.31M | } |
225 | | |
226 | 6.40M | static int lex_get(lex_t *lex, json_error_t *error) { |
227 | 6.40M | return stream_get(&lex->stream, error); |
228 | 6.40M | } |
229 | | |
230 | 23.9M | static void lex_save(lex_t *lex, int c) { strbuffer_append_byte(&lex->saved_text, c); } |
231 | | |
232 | 17.5M | static int lex_get_save(lex_t *lex, json_error_t *error) { |
233 | 17.5M | int c = stream_get(&lex->stream, error); |
234 | 17.5M | if (c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) |
235 | 17.5M | lex_save(lex, c); |
236 | 17.5M | return c; |
237 | 17.5M | } |
238 | | |
239 | 22 | static void lex_unget(lex_t *lex, int c) { stream_unget(&lex->stream, c); } |
240 | | |
241 | 2.31M | static void lex_unget_unsave(lex_t *lex, int c) { |
242 | 2.31M | if (c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) { |
243 | | /* Since we treat warnings as errors, when assertions are turned |
244 | | * off the "d" variable would be set but never used. Which is |
245 | | * treated as an error by GCC. |
246 | | */ |
247 | 2.31M | #ifndef NDEBUG |
248 | 2.31M | char d; |
249 | 2.31M | #endif |
250 | 2.31M | stream_unget(&lex->stream, c); |
251 | 2.31M | #ifndef NDEBUG |
252 | 2.31M | d = |
253 | 2.31M | #endif |
254 | 2.31M | strbuffer_pop(&lex->saved_text); |
255 | 2.31M | assert(c == d); |
256 | 2.31M | } |
257 | 2.31M | } |
258 | | |
259 | 145 | static void lex_save_cached(lex_t *lex) { |
260 | 331 | while (lex->stream.buffer[lex->stream.buffer_pos] != '\0') { |
261 | 186 | lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]); |
262 | 186 | lex->stream.buffer_pos++; |
263 | 186 | lex->stream.position++; |
264 | 186 | } |
265 | 145 | } |
266 | | |
267 | 15.2k | static void lex_free_string(lex_t *lex) { |
268 | 15.2k | jsonp_free(lex->value.string.val); |
269 | 15.2k | lex->value.string.val = NULL; |
270 | 15.2k | lex->value.string.len = 0; |
271 | 15.2k | } |
272 | | |
273 | | /* assumes that str points to 'u' plus at least 4 valid hex digits */ |
274 | 6.30k | static int32_t decode_unicode_escape(const char *str) { |
275 | 6.30k | int i; |
276 | 6.30k | int32_t value = 0; |
277 | | |
278 | 6.30k | assert(str[0] == 'u'); |
279 | | |
280 | 31.5k | for (i = 1; i <= 4; i++) { |
281 | 25.2k | char c = str[i]; |
282 | 25.2k | value <<= 4; |
283 | 25.2k | if (l_isdigit(c)) |
284 | 8.15k | value += c - '0'; |
285 | 17.0k | else if (l_islower(c)) |
286 | 7.73k | value += c - 'a' + 10; |
287 | 9.34k | else if (l_isupper(c)) |
288 | 9.34k | value += c - 'A' + 10; |
289 | 0 | else |
290 | 0 | return -1; |
291 | 25.2k | } |
292 | | |
293 | 6.30k | return value; |
294 | 6.30k | } |
295 | | |
296 | 15.2k | static void lex_scan_string(lex_t *lex, json_error_t *error) { |
297 | 15.2k | int c; |
298 | 15.2k | const char *p; |
299 | 15.2k | char *t; |
300 | 15.2k | int i; |
301 | | |
302 | 15.2k | lex->value.string.val = NULL; |
303 | 15.2k | lex->token = TOKEN_INVALID; |
304 | | |
305 | 15.2k | c = lex_get_save(lex, error); |
306 | | |
307 | 14.1M | while (c != '"') { |
308 | 14.1M | if (c == STREAM_STATE_ERROR) |
309 | 28 | goto out; |
310 | | |
311 | 14.1M | else if (c == STREAM_STATE_EOF) { |
312 | 142 | error_set(error, lex, json_error_premature_end_of_input, |
313 | 142 | "premature end of input"); |
314 | 142 | goto out; |
315 | 142 | } |
316 | | |
317 | 14.1M | else if (0 <= c && c <= 0x1F) { |
318 | | /* control character */ |
319 | 31 | lex_unget_unsave(lex, c); |
320 | 31 | if (c == '\n') |
321 | 2 | error_set(error, lex, json_error_invalid_syntax, "unexpected newline"); |
322 | 29 | else |
323 | 29 | error_set(error, lex, json_error_invalid_syntax, "control character 0x%x", |
324 | 29 | c); |
325 | 31 | goto out; |
326 | 31 | } |
327 | | |
328 | 14.1M | else if (c == '\\') { |
329 | 11.8k | c = lex_get_save(lex, error); |
330 | 11.8k | if (c == 'u') { |
331 | 7.06k | c = lex_get_save(lex, error); |
332 | 35.0k | for (i = 0; i < 4; i++) { |
333 | 28.0k | if (!l_isxdigit(c)) { |
334 | 81 | error_set(error, lex, json_error_invalid_syntax, |
335 | 81 | "invalid escape"); |
336 | 81 | goto out; |
337 | 81 | } |
338 | 28.0k | c = lex_get_save(lex, error); |
339 | 28.0k | } |
340 | 7.06k | } else if (c == '"' || c == '\\' || c == '/' || c == 'b' || c == 'f' || |
341 | 4.74k | c == 'n' || c == 'r' || c == 't') |
342 | 4.67k | c = lex_get_save(lex, error); |
343 | 61 | else { |
344 | 61 | error_set(error, lex, json_error_invalid_syntax, "invalid escape"); |
345 | 61 | goto out; |
346 | 61 | } |
347 | 11.8k | } else |
348 | 14.0M | c = lex_get_save(lex, error); |
349 | 14.1M | } |
350 | | |
351 | | /* the actual value is at most of the same length as the source |
352 | | string, because: |
353 | | - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte |
354 | | - a single \uXXXX escape (length 6) is converted to at most 3 bytes |
355 | | - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair |
356 | | are converted to 4 bytes |
357 | | */ |
358 | 14.9k | t = jsonp_malloc(lex->saved_text.length + 1); |
359 | 14.9k | if (!t) { |
360 | | /* this is not very nice, since TOKEN_INVALID is returned */ |
361 | 0 | goto out; |
362 | 0 | } |
363 | 14.9k | lex->value.string.val = t; |
364 | | |
365 | | /* + 1 to skip the " */ |
366 | 14.9k | p = strbuffer_value(&lex->saved_text) + 1; |
367 | | |
368 | 11.4M | while (*p != '"') { |
369 | 11.4M | if (*p == '\\') { |
370 | 8.09k | p++; |
371 | 8.09k | if (*p == 'u') { |
372 | 4.61k | size_t length; |
373 | 4.61k | int32_t value; |
374 | | |
375 | 4.61k | value = decode_unicode_escape(p); |
376 | 4.61k | if (value < 0) { |
377 | 0 | error_set(error, lex, json_error_invalid_syntax, |
378 | 0 | "invalid Unicode escape '%.6s'", p - 1); |
379 | 0 | goto out; |
380 | 0 | } |
381 | 4.61k | p += 5; |
382 | | |
383 | 4.61k | if (0xD800 <= value && value <= 0xDBFF) { |
384 | | /* surrogate pair */ |
385 | 1.72k | if (*p == '\\' && *(p + 1) == 'u') { |
386 | 1.69k | int32_t value2 = decode_unicode_escape(++p); |
387 | 1.69k | if (value2 < 0) { |
388 | 0 | error_set(error, lex, json_error_invalid_syntax, |
389 | 0 | "invalid Unicode escape '%.6s'", p - 1); |
390 | 0 | goto out; |
391 | 0 | } |
392 | 1.69k | p += 5; |
393 | | |
394 | 1.69k | if (0xDC00 <= value2 && value2 <= 0xDFFF) { |
395 | | /* valid second surrogate */ |
396 | 1.65k | value = |
397 | 1.65k | ((value - 0xD800) << 10) + (value2 - 0xDC00) + 0x10000; |
398 | 1.65k | } else { |
399 | | /* invalid second surrogate */ |
400 | 41 | error_set(error, lex, json_error_invalid_syntax, |
401 | 41 | "invalid Unicode '\\u%04X\\u%04X'", value, value2); |
402 | 41 | goto out; |
403 | 41 | } |
404 | 1.69k | } else { |
405 | | /* no second surrogate */ |
406 | 27 | error_set(error, lex, json_error_invalid_syntax, |
407 | 27 | "invalid Unicode '\\u%04X'", value); |
408 | 27 | goto out; |
409 | 27 | } |
410 | 2.88k | } else if (0xDC00 <= value && value <= 0xDFFF) { |
411 | 17 | error_set(error, lex, json_error_invalid_syntax, |
412 | 17 | "invalid Unicode '\\u%04X'", value); |
413 | 17 | goto out; |
414 | 17 | } |
415 | | |
416 | 4.52k | if (utf8_encode(value, t, &length)) |
417 | 0 | assert(0); |
418 | 4.52k | t += length; |
419 | 4.52k | } else { |
420 | 3.48k | switch (*p) { |
421 | 269 | case '"': |
422 | 539 | case '\\': |
423 | 738 | case '/': |
424 | 738 | *t = *p; |
425 | 738 | break; |
426 | 530 | case 'b': |
427 | 530 | *t = '\b'; |
428 | 530 | break; |
429 | 490 | case 'f': |
430 | 490 | *t = '\f'; |
431 | 490 | break; |
432 | 633 | case 'n': |
433 | 633 | *t = '\n'; |
434 | 633 | break; |
435 | 497 | case 'r': |
436 | 497 | *t = '\r'; |
437 | 497 | break; |
438 | 593 | case 't': |
439 | 593 | *t = '\t'; |
440 | 593 | break; |
441 | 0 | default: |
442 | 0 | assert(0); |
443 | 3.48k | } |
444 | 3.48k | t++; |
445 | 3.48k | p++; |
446 | 3.48k | } |
447 | 8.09k | } else |
448 | 11.4M | *(t++) = *(p++); |
449 | 11.4M | } |
450 | 14.8k | *t = '\0'; |
451 | 14.8k | lex->value.string.len = t - lex->value.string.val; |
452 | 14.8k | lex->token = TOKEN_STRING; |
453 | 14.8k | return; |
454 | | |
455 | 428 | out: |
456 | 428 | lex_free_string(lex); |
457 | 428 | } |
458 | | |
459 | | #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */ |
460 | | #if JSON_INTEGER_IS_LONG_LONG |
461 | | #ifdef _MSC_VER /* Microsoft Visual Studio */ |
462 | | #define json_strtoint _strtoi64 |
463 | | #else |
464 | 1.45M | #define json_strtoint strtoll |
465 | | #endif |
466 | | #else |
467 | | #define json_strtoint strtol |
468 | | #endif |
469 | | #endif |
470 | | |
471 | 2.30M | static int lex_scan_number(lex_t *lex, int c, json_error_t *error) { |
472 | 2.30M | const char *saved_text; |
473 | 2.30M | char *end; |
474 | 2.30M | double doubleval; |
475 | | |
476 | 2.30M | lex->token = TOKEN_INVALID; |
477 | | |
478 | 2.30M | if (c == '-') |
479 | 652 | c = lex_get_save(lex, error); |
480 | | |
481 | 2.30M | if (c == '0') { |
482 | 1.39M | c = lex_get_save(lex, error); |
483 | 1.39M | if (l_isdigit(c)) { |
484 | 4 | lex_unget_unsave(lex, c); |
485 | 4 | goto out; |
486 | 4 | } |
487 | 1.39M | } else if (l_isdigit(c)) { |
488 | 909k | do |
489 | 1.01M | c = lex_get_save(lex, error); |
490 | 1.01M | while (l_isdigit(c)); |
491 | 909k | } else { |
492 | 39 | lex_unget_unsave(lex, c); |
493 | 39 | goto out; |
494 | 39 | } |
495 | | |
496 | 2.30M | if (!(lex->flags & JSON_DECODE_INT_AS_REAL) && c != '.' && c != 'E' && c != 'e') { |
497 | 1.45M | json_int_t intval; |
498 | | |
499 | 1.45M | lex_unget_unsave(lex, c); |
500 | | |
501 | 1.45M | saved_text = strbuffer_value(&lex->saved_text); |
502 | | |
503 | 1.45M | errno = 0; |
504 | 1.45M | intval = json_strtoint(saved_text, &end, 10); |
505 | 1.45M | if (errno == ERANGE) { |
506 | 3 | if (intval < 0) |
507 | 1 | error_set(error, lex, json_error_numeric_overflow, |
508 | 1 | "too big negative integer"); |
509 | 2 | else |
510 | 2 | error_set(error, lex, json_error_numeric_overflow, "too big integer"); |
511 | 3 | goto out; |
512 | 3 | } |
513 | | |
514 | 1.45M | assert(end == saved_text + lex->saved_text.length); |
515 | | |
516 | 1.45M | lex->token = TOKEN_INTEGER; |
517 | 1.45M | lex->value.integer = intval; |
518 | 1.45M | return 0; |
519 | 1.45M | } |
520 | | |
521 | 854k | if (c == '.') { |
522 | 782k | c = lex_get(lex, error); |
523 | 782k | if (!l_isdigit(c)) { |
524 | 22 | lex_unget(lex, c); |
525 | 22 | goto out; |
526 | 22 | } |
527 | 782k | lex_save(lex, c); |
528 | | |
529 | 782k | do |
530 | 805k | c = lex_get_save(lex, error); |
531 | 805k | while (l_isdigit(c)); |
532 | 782k | } |
533 | | |
534 | 854k | if (c == 'E' || c == 'e') { |
535 | 20.0k | c = lex_get_save(lex, error); |
536 | 20.0k | if (c == '+' || c == '-') |
537 | 7.51k | c = lex_get_save(lex, error); |
538 | | |
539 | 20.0k | if (!l_isdigit(c)) { |
540 | 86 | lex_unget_unsave(lex, c); |
541 | 86 | goto out; |
542 | 86 | } |
543 | | |
544 | 19.9k | do |
545 | 33.8k | c = lex_get_save(lex, error); |
546 | 33.8k | while (l_isdigit(c)); |
547 | 19.9k | } |
548 | | |
549 | 854k | lex_unget_unsave(lex, c); |
550 | | |
551 | 854k | if (jsonp_strtod(&lex->saved_text, &doubleval)) { |
552 | 6 | error_set(error, lex, json_error_numeric_overflow, "real number overflow"); |
553 | 6 | goto out; |
554 | 6 | } |
555 | | |
556 | 854k | lex->token = TOKEN_REAL; |
557 | 854k | lex->value.real = doubleval; |
558 | 854k | return 0; |
559 | | |
560 | 160 | out: |
561 | 160 | return -1; |
562 | 854k | } |
563 | | |
564 | 5.61M | static int lex_scan(lex_t *lex, json_error_t *error) { |
565 | 5.61M | int c; |
566 | | |
567 | 5.61M | strbuffer_clear(&lex->saved_text); |
568 | | |
569 | 5.61M | if (lex->token == TOKEN_STRING) |
570 | 14.4k | lex_free_string(lex); |
571 | | |
572 | 5.61M | do |
573 | 5.61M | c = lex_get(lex, error); |
574 | 5.61M | while (c == ' ' || c == '\t' || c == '\n' || c == '\r'); |
575 | | |
576 | 5.61M | if (c == STREAM_STATE_EOF) { |
577 | 1.24k | lex->token = TOKEN_EOF; |
578 | 1.24k | goto out; |
579 | 1.24k | } |
580 | | |
581 | 5.61M | if (c == STREAM_STATE_ERROR) { |
582 | 141 | lex->token = TOKEN_INVALID; |
583 | 141 | goto out; |
584 | 141 | } |
585 | | |
586 | 5.61M | lex_save(lex, c); |
587 | | |
588 | 5.61M | if (c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',') |
589 | 3.28M | lex->token = c; |
590 | | |
591 | 2.33M | else if (c == '"') |
592 | 15.2k | lex_scan_string(lex, error); |
593 | | |
594 | 2.31M | else if (l_isdigit(c) || c == '-') { |
595 | 2.30M | if (lex_scan_number(lex, c, error)) |
596 | 160 | goto out; |
597 | 2.30M | } |
598 | | |
599 | 8.40k | else if (l_isalpha(c)) { |
600 | | /* eat up the whole identifier for clearer error messages */ |
601 | 8.25k | const char *saved_text; |
602 | | |
603 | 8.25k | do |
604 | 105k | c = lex_get_save(lex, error); |
605 | 105k | while (l_isalpha(c)); |
606 | 8.25k | lex_unget_unsave(lex, c); |
607 | | |
608 | 8.25k | saved_text = strbuffer_value(&lex->saved_text); |
609 | | |
610 | 8.25k | if (strcmp(saved_text, "true") == 0) |
611 | 684 | lex->token = TOKEN_TRUE; |
612 | 7.57k | else if (strcmp(saved_text, "false") == 0) |
613 | 6.42k | lex->token = TOKEN_FALSE; |
614 | 1.14k | else if (strcmp(saved_text, "null") == 0) |
615 | 974 | lex->token = TOKEN_NULL; |
616 | 168 | else |
617 | 168 | lex->token = TOKEN_INVALID; |
618 | 8.25k | } |
619 | | |
620 | 145 | else { |
621 | | /* save the rest of the input UTF-8 sequence to get an error |
622 | | message of valid UTF-8 */ |
623 | 145 | lex_save_cached(lex); |
624 | 145 | lex->token = TOKEN_INVALID; |
625 | 145 | } |
626 | | |
627 | 5.61M | out: |
628 | 5.61M | return lex->token; |
629 | 5.61M | } |
630 | | |
631 | 12.9k | static char *lex_steal_string(lex_t *lex, size_t *out_len) { |
632 | 12.9k | char *result = NULL; |
633 | 12.9k | if (lex->token == TOKEN_STRING) { |
634 | 12.9k | result = lex->value.string.val; |
635 | 12.9k | *out_len = lex->value.string.len; |
636 | 12.9k | lex->value.string.val = NULL; |
637 | 12.9k | lex->value.string.len = 0; |
638 | 12.9k | } |
639 | 12.9k | return result; |
640 | 12.9k | } |
641 | | |
642 | 7.18k | static int lex_init(lex_t *lex, get_func get, size_t flags, void *data) { |
643 | 7.18k | stream_init(&lex->stream, get, data); |
644 | 7.18k | if (strbuffer_init(&lex->saved_text)) |
645 | 0 | return -1; |
646 | | |
647 | 7.18k | lex->flags = flags; |
648 | 7.18k | lex->token = TOKEN_INVALID; |
649 | 7.18k | return 0; |
650 | 7.18k | } |
651 | | |
652 | 7.18k | static void lex_close(lex_t *lex) { |
653 | 7.18k | if (lex->token == TOKEN_STRING) |
654 | 411 | lex_free_string(lex); |
655 | 7.18k | strbuffer_close(&lex->saved_text); |
656 | 7.18k | } |
657 | | |
658 | | /*** parser ***/ |
659 | | |
660 | | static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error); |
661 | | |
662 | 4.94k | static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error) { |
663 | 4.94k | json_t *object = json_object(); |
664 | 4.94k | if (!object) |
665 | 0 | return NULL; |
666 | | |
667 | 4.94k | lex_scan(lex, error); |
668 | 4.94k | if (lex->token == '}') |
669 | 1.25k | return object; |
670 | | |
671 | 13.0k | while (1) { |
672 | 13.0k | char *key; |
673 | 13.0k | size_t len; |
674 | 13.0k | json_t *value; |
675 | | |
676 | 13.0k | if (lex->token != TOKEN_STRING) { |
677 | 49 | error_set(error, lex, json_error_invalid_syntax, "string or '}' expected"); |
678 | 49 | goto error; |
679 | 49 | } |
680 | | |
681 | 12.9k | key = lex_steal_string(lex, &len); |
682 | 12.9k | if (!key) |
683 | 0 | return NULL; |
684 | 12.9k | if (memchr(key, '\0', len)) { |
685 | 1 | jsonp_free(key); |
686 | 1 | error_set(error, lex, json_error_null_byte_in_key, |
687 | 1 | "NUL byte in object key not supported"); |
688 | 1 | goto error; |
689 | 1 | } |
690 | | |
691 | 12.9k | if (flags & JSON_REJECT_DUPLICATES) { |
692 | 3.05k | if (json_object_getn(object, key, len)) { |
693 | 1 | jsonp_free(key); |
694 | 1 | error_set(error, lex, json_error_duplicate_key, "duplicate object key"); |
695 | 1 | goto error; |
696 | 1 | } |
697 | 3.05k | } |
698 | | |
699 | 12.9k | lex_scan(lex, error); |
700 | 12.9k | if (lex->token != ':') { |
701 | 43 | jsonp_free(key); |
702 | 43 | error_set(error, lex, json_error_invalid_syntax, "':' expected"); |
703 | 43 | goto error; |
704 | 43 | } |
705 | | |
706 | 12.9k | lex_scan(lex, error); |
707 | 12.9k | value = parse_value(lex, flags, error); |
708 | 12.9k | if (!value) { |
709 | 647 | jsonp_free(key); |
710 | 647 | goto error; |
711 | 647 | } |
712 | | |
713 | 12.2k | if (json_object_setn_new_nocheck(object, key, len, value)) { |
714 | 0 | jsonp_free(key); |
715 | 0 | goto error; |
716 | 0 | } |
717 | | |
718 | 12.2k | jsonp_free(key); |
719 | | |
720 | 12.2k | lex_scan(lex, error); |
721 | 12.2k | if (lex->token != ',') |
722 | 2.94k | break; |
723 | | |
724 | 9.35k | lex_scan(lex, error); |
725 | 9.35k | } |
726 | | |
727 | 2.94k | if (lex->token != '}') { |
728 | 72 | error_set(error, lex, json_error_invalid_syntax, "'}' expected"); |
729 | 72 | goto error; |
730 | 72 | } |
731 | | |
732 | 2.87k | return object; |
733 | | |
734 | 813 | error: |
735 | 813 | json_decref(object); |
736 | 813 | return NULL; |
737 | 2.94k | } |
738 | | |
739 | 494k | static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error) { |
740 | 494k | json_t *array = json_array(); |
741 | 494k | if (!array) |
742 | 0 | return NULL; |
743 | | |
744 | 494k | lex_scan(lex, error); |
745 | 494k | if (lex->token == ']') |
746 | 3.15k | return array; |
747 | | |
748 | 2.79M | while (lex->token) { |
749 | 2.79M | json_t *elem = parse_value(lex, flags, error); |
750 | 2.79M | if (!elem) |
751 | 40.9k | goto error; |
752 | | |
753 | 2.75M | if (json_array_append_new(array, elem)) { |
754 | 0 | goto error; |
755 | 0 | } |
756 | | |
757 | 2.75M | lex_scan(lex, error); |
758 | 2.75M | if (lex->token != ',') |
759 | 450k | break; |
760 | | |
761 | 2.30M | lex_scan(lex, error); |
762 | 2.30M | } |
763 | | |
764 | 450k | if (lex->token != ']') { |
765 | 188 | error_set(error, lex, json_error_invalid_syntax, "']' expected"); |
766 | 188 | goto error; |
767 | 188 | } |
768 | | |
769 | 450k | return array; |
770 | | |
771 | 41.1k | error: |
772 | 41.1k | json_decref(array); |
773 | 41.1k | return NULL; |
774 | 450k | } |
775 | | |
776 | 2.81M | static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error) { |
777 | 2.81M | json_t *json; |
778 | | |
779 | 2.81M | lex->depth++; |
780 | 2.81M | if (lex->depth > JSON_PARSER_MAX_DEPTH) { |
781 | 1 | error_set(error, lex, json_error_stack_overflow, "maximum parsing depth reached"); |
782 | 1 | return NULL; |
783 | 1 | } |
784 | | |
785 | 2.81M | switch (lex->token) { |
786 | 1.83k | case TOKEN_STRING: { |
787 | 1.83k | const char *value = lex->value.string.val; |
788 | 1.83k | size_t len = lex->value.string.len; |
789 | | |
790 | 1.83k | if (!(flags & JSON_ALLOW_NUL)) { |
791 | 1.01k | if (memchr(value, '\0', len)) { |
792 | 1 | error_set(error, lex, json_error_null_character, |
793 | 1 | "\\u0000 is not allowed without JSON_ALLOW_NUL"); |
794 | 1 | return NULL; |
795 | 1 | } |
796 | 1.01k | } |
797 | | |
798 | 1.83k | json = jsonp_stringn_nocheck_own(value, len); |
799 | 1.83k | lex->value.string.val = NULL; |
800 | 1.83k | lex->value.string.len = 0; |
801 | 1.83k | break; |
802 | 1.83k | } |
803 | | |
804 | 1.45M | case TOKEN_INTEGER: { |
805 | 1.45M | json = json_integer(lex->value.integer); |
806 | 1.45M | break; |
807 | 1.83k | } |
808 | | |
809 | 854k | case TOKEN_REAL: { |
810 | 854k | json = json_real(lex->value.real); |
811 | 854k | break; |
812 | 1.83k | } |
813 | | |
814 | 682 | case TOKEN_TRUE: |
815 | 682 | json = json_true(); |
816 | 682 | break; |
817 | | |
818 | 6.42k | case TOKEN_FALSE: |
819 | 6.42k | json = json_false(); |
820 | 6.42k | break; |
821 | | |
822 | 971 | case TOKEN_NULL: |
823 | 971 | json = json_null(); |
824 | 971 | break; |
825 | | |
826 | 4.94k | case '{': |
827 | 4.94k | json = parse_object(lex, flags, error); |
828 | 4.94k | break; |
829 | | |
830 | 494k | case '[': |
831 | 494k | json = parse_array(lex, flags, error); |
832 | 494k | break; |
833 | | |
834 | 723 | case TOKEN_INVALID: |
835 | 723 | error_set(error, lex, json_error_invalid_syntax, "invalid token"); |
836 | 723 | return NULL; |
837 | | |
838 | 44 | default: |
839 | 44 | error_set(error, lex, json_error_invalid_syntax, "unexpected token"); |
840 | 44 | return NULL; |
841 | 2.81M | } |
842 | | |
843 | 2.81M | if (!json) |
844 | 41.9k | return NULL; |
845 | | |
846 | 2.77M | lex->depth--; |
847 | 2.77M | return json; |
848 | 2.81M | } |
849 | | |
850 | 7.18k | static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error) { |
851 | 7.18k | json_t *result; |
852 | | |
853 | 7.18k | lex->depth = 0; |
854 | | |
855 | 7.18k | lex_scan(lex, error); |
856 | 7.18k | if (!(flags & JSON_DECODE_ANY)) { |
857 | 1.36k | if (lex->token != '[' && lex->token != '{') { |
858 | 311 | error_set(error, lex, json_error_invalid_syntax, "'[' or '{' expected"); |
859 | 311 | return NULL; |
860 | 311 | } |
861 | 1.36k | } |
862 | | |
863 | 6.87k | result = parse_value(lex, flags, error); |
864 | 6.87k | if (!result) |
865 | 1.12k | return NULL; |
866 | | |
867 | 5.74k | if (!(flags & JSON_DISABLE_EOF_CHECK)) { |
868 | 1.01k | lex_scan(lex, error); |
869 | 1.01k | if (lex->token != TOKEN_EOF) { |
870 | 73 | error_set(error, lex, json_error_end_of_input_expected, |
871 | 73 | "end of file expected"); |
872 | 73 | json_decref(result); |
873 | 73 | return NULL; |
874 | 73 | } |
875 | 1.01k | } |
876 | | |
877 | 5.67k | if (error) { |
878 | | /* Save the position even though there was no error */ |
879 | 5.67k | error->position = (int)lex->stream.position; |
880 | 5.67k | } |
881 | | |
882 | 5.67k | return result; |
883 | 5.74k | } |
884 | | |
885 | | typedef struct { |
886 | | const char *data; |
887 | | size_t pos; |
888 | | } string_data_t; |
889 | | |
890 | 0 | static int string_get(void *data) { |
891 | 0 | char c; |
892 | 0 | string_data_t *stream = (string_data_t *)data; |
893 | 0 | c = stream->data[stream->pos]; |
894 | 0 | if (c == '\0') |
895 | 0 | return EOF; |
896 | 0 | else { |
897 | 0 | stream->pos++; |
898 | 0 | return (unsigned char)c; |
899 | 0 | } |
900 | 0 | } |
901 | | |
902 | 0 | json_t *json_loads(const char *string, size_t flags, json_error_t *error) { |
903 | 0 | lex_t lex; |
904 | 0 | json_t *result; |
905 | 0 | string_data_t stream_data; |
906 | |
|
907 | 0 | jsonp_error_init(error, "<string>"); |
908 | |
|
909 | 0 | if (string == NULL) { |
910 | 0 | error_set(error, NULL, json_error_invalid_argument, "wrong arguments"); |
911 | 0 | return NULL; |
912 | 0 | } |
913 | | |
914 | 0 | stream_data.data = string; |
915 | 0 | stream_data.pos = 0; |
916 | |
|
917 | 0 | if (lex_init(&lex, string_get, flags, (void *)&stream_data)) |
918 | 0 | return NULL; |
919 | | |
920 | 0 | result = parse_json(&lex, flags, error); |
921 | |
|
922 | 0 | lex_close(&lex); |
923 | 0 | return result; |
924 | 0 | } |
925 | | |
926 | | typedef struct { |
927 | | const char *data; |
928 | | size_t len; |
929 | | size_t pos; |
930 | | } buffer_data_t; |
931 | | |
932 | 21.6M | static int buffer_get(void *data) { |
933 | 21.6M | char c; |
934 | 21.6M | buffer_data_t *stream = data; |
935 | 21.6M | if (stream->pos >= stream->len) |
936 | 4.87k | return EOF; |
937 | | |
938 | 21.6M | c = stream->data[stream->pos]; |
939 | 21.6M | stream->pos++; |
940 | 21.6M | return (unsigned char)c; |
941 | 21.6M | } |
942 | | |
943 | 7.18k | json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error) { |
944 | 7.18k | lex_t lex; |
945 | 7.18k | json_t *result; |
946 | 7.18k | buffer_data_t stream_data; |
947 | | |
948 | 7.18k | jsonp_error_init(error, "<buffer>"); |
949 | | |
950 | 7.18k | if (buffer == NULL) { |
951 | 0 | error_set(error, NULL, json_error_invalid_argument, "wrong arguments"); |
952 | 0 | return NULL; |
953 | 0 | } |
954 | | |
955 | 7.18k | stream_data.data = buffer; |
956 | 7.18k | stream_data.pos = 0; |
957 | 7.18k | stream_data.len = buflen; |
958 | | |
959 | 7.18k | if (lex_init(&lex, buffer_get, flags, (void *)&stream_data)) |
960 | 0 | return NULL; |
961 | | |
962 | 7.18k | result = parse_json(&lex, flags, error); |
963 | | |
964 | 7.18k | lex_close(&lex); |
965 | 7.18k | return result; |
966 | 7.18k | } |
967 | | |
968 | 0 | json_t *json_loadf(FILE *input, size_t flags, json_error_t *error) { |
969 | 0 | lex_t lex; |
970 | 0 | const char *source; |
971 | 0 | json_t *result; |
972 | |
|
973 | 0 | if (input == stdin) |
974 | 0 | source = "<stdin>"; |
975 | 0 | else |
976 | 0 | source = "<stream>"; |
977 | |
|
978 | 0 | jsonp_error_init(error, source); |
979 | |
|
980 | 0 | if (input == NULL) { |
981 | 0 | error_set(error, NULL, json_error_invalid_argument, "wrong arguments"); |
982 | 0 | return NULL; |
983 | 0 | } |
984 | | |
985 | 0 | if (lex_init(&lex, (get_func)fgetc, flags, input)) |
986 | 0 | return NULL; |
987 | | |
988 | 0 | result = parse_json(&lex, flags, error); |
989 | |
|
990 | 0 | lex_close(&lex); |
991 | 0 | return result; |
992 | 0 | } |
993 | | |
994 | 0 | static int fd_get_func(int *fd) { |
995 | 0 | #ifdef HAVE_UNISTD_H |
996 | 0 | uint8_t c; |
997 | 0 | if (read(*fd, &c, 1) == 1) |
998 | 0 | return c; |
999 | 0 | #endif |
1000 | 0 | return EOF; |
1001 | 0 | } |
1002 | | |
1003 | 0 | json_t *json_loadfd(int input, size_t flags, json_error_t *error) { |
1004 | 0 | lex_t lex; |
1005 | 0 | const char *source; |
1006 | 0 | json_t *result; |
1007 | |
|
1008 | 0 | #ifdef HAVE_UNISTD_H |
1009 | 0 | if (input == STDIN_FILENO) |
1010 | 0 | source = "<stdin>"; |
1011 | 0 | else |
1012 | 0 | #endif |
1013 | 0 | source = "<stream>"; |
1014 | |
|
1015 | 0 | jsonp_error_init(error, source); |
1016 | |
|
1017 | 0 | if (input < 0) { |
1018 | 0 | error_set(error, NULL, json_error_invalid_argument, "wrong arguments"); |
1019 | 0 | return NULL; |
1020 | 0 | } |
1021 | | |
1022 | 0 | if (lex_init(&lex, (get_func)fd_get_func, flags, &input)) |
1023 | 0 | return NULL; |
1024 | | |
1025 | 0 | result = parse_json(&lex, flags, error); |
1026 | |
|
1027 | 0 | lex_close(&lex); |
1028 | 0 | return result; |
1029 | 0 | } |
1030 | | |
1031 | 0 | json_t *json_load_file(const char *path, size_t flags, json_error_t *error) { |
1032 | 0 | json_t *result; |
1033 | 0 | FILE *fp; |
1034 | |
|
1035 | 0 | jsonp_error_init(error, path); |
1036 | |
|
1037 | 0 | if (path == NULL) { |
1038 | 0 | error_set(error, NULL, json_error_invalid_argument, "wrong arguments"); |
1039 | 0 | return NULL; |
1040 | 0 | } |
1041 | | |
1042 | 0 | fp = fopen(path, "rb"); |
1043 | 0 | if (!fp) { |
1044 | 0 | error_set(error, NULL, json_error_cannot_open_file, "unable to open %s: %s", path, |
1045 | 0 | strerror(errno)); |
1046 | 0 | return NULL; |
1047 | 0 | } |
1048 | | |
1049 | 0 | result = json_loadf(fp, flags, error); |
1050 | |
|
1051 | 0 | fclose(fp); |
1052 | 0 | return result; |
1053 | 0 | } |
1054 | | |
1055 | 0 | #define MAX_BUF_LEN 1024 |
1056 | | |
1057 | | typedef struct { |
1058 | | char data[MAX_BUF_LEN]; |
1059 | | size_t len; |
1060 | | size_t pos; |
1061 | | json_load_callback_t callback; |
1062 | | void *arg; |
1063 | | } callback_data_t; |
1064 | | |
1065 | 0 | static int callback_get(void *data) { |
1066 | 0 | char c; |
1067 | 0 | callback_data_t *stream = data; |
1068 | |
|
1069 | 0 | if (stream->pos >= stream->len) { |
1070 | 0 | stream->pos = 0; |
1071 | 0 | stream->len = stream->callback(stream->data, MAX_BUF_LEN, stream->arg); |
1072 | 0 | if (stream->len == 0 || stream->len == (size_t)-1) |
1073 | 0 | return EOF; |
1074 | 0 | } |
1075 | | |
1076 | 0 | c = stream->data[stream->pos]; |
1077 | 0 | stream->pos++; |
1078 | 0 | return (unsigned char)c; |
1079 | 0 | } |
1080 | | |
1081 | | json_t *json_load_callback(json_load_callback_t callback, void *arg, size_t flags, |
1082 | 0 | json_error_t *error) { |
1083 | 0 | lex_t lex; |
1084 | 0 | json_t *result; |
1085 | |
|
1086 | 0 | callback_data_t stream_data; |
1087 | |
|
1088 | 0 | memset(&stream_data, 0, sizeof(stream_data)); |
1089 | 0 | stream_data.callback = callback; |
1090 | 0 | stream_data.arg = arg; |
1091 | |
|
1092 | 0 | jsonp_error_init(error, "<callback>"); |
1093 | |
|
1094 | 0 | if (callback == NULL) { |
1095 | 0 | error_set(error, NULL, json_error_invalid_argument, "wrong arguments"); |
1096 | 0 | return NULL; |
1097 | 0 | } |
1098 | | |
1099 | 0 | if (lex_init(&lex, (get_func)callback_get, flags, &stream_data)) |
1100 | 0 | return NULL; |
1101 | | |
1102 | 0 | result = parse_json(&lex, flags, error); |
1103 | |
|
1104 | 0 | lex_close(&lex); |
1105 | 0 | return result; |
1106 | 0 | } |