Coverage Report

Created: 2025-08-26 06:20

/src/jansson/src/load.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
3
 *
4
 * Jansson is free software; you can redistribute it and/or modify
5
 * it under the terms of the MIT license. See LICENSE for details.
6
 */
7
8
#ifndef _GNU_SOURCE
9
#define _GNU_SOURCE
10
#endif
11
12
#include "jansson_private.h"
13
14
#include <assert.h>
15
#include <errno.h>
16
#include <limits.h>
17
#include <stdio.h>
18
#include <stdlib.h>
19
#include <string.h>
20
#ifdef HAVE_UNISTD_H
21
#include <unistd.h>
22
#endif
23
24
#include "jansson.h"
25
#include "strbuffer.h"
26
#include "utf.h"
27
28
23.9M
#define STREAM_STATE_OK    0
29
64.0M
#define STREAM_STATE_EOF   -1
30
41.8M
#define STREAM_STATE_ERROR -2
31
32
2.33M
#define TOKEN_INVALID -1
33
2.25k
#define TOKEN_EOF     0
34
5.66M
#define TOKEN_STRING  256
35
2.90M
#define TOKEN_INTEGER 257
36
1.70M
#define TOKEN_REAL    258
37
1.36k
#define TOKEN_TRUE    259
38
12.8k
#define TOKEN_FALSE   260
39
1.94k
#define TOKEN_NULL    261
40
41
/* Locale independent versions of isxxx() functions */
42
236k
#define l_isupper(c) ('A' <= (c) && (c) <= 'Z')
43
130k
#define l_islower(c) ('a' <= (c) && (c) <= 'z')
44
113k
#define l_isalpha(c) (l_isupper(c) || l_islower(c))
45
9.67M
#define l_isdigit(c) ('0' <= (c) && (c) <= '9')
46
#define l_isxdigit(c)                                                                    \
47
28.0k
    (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f'))
48
49
/* Read one byte from stream, convert to unsigned char, then int, and
50
   return. return EOF on end of file. This corresponds to the
51
   behaviour of fgetc(). */
52
typedef int (*get_func)(void *data);
53
54
typedef struct {
55
    get_func get;
56
    void *data;
57
    char buffer[5];
58
    size_t buffer_pos;
59
    int state;
60
    int line;
61
    int column, last_column;
62
    size_t position;
63
} stream_t;
64
65
typedef struct {
66
    stream_t stream;
67
    strbuffer_t saved_text;
68
    size_t flags;
69
    size_t depth;
70
    int token;
71
    union {
72
        struct {
73
            char *val;
74
            size_t len;
75
        } string;
76
        json_int_t integer;
77
        double real;
78
    } value;
79
} lex_t;
80
81
191
#define stream_to_lex(stream) container_of(stream, lex_t, stream)
82
83
/*** error reporting ***/
84
85
static void error_set(json_error_t *error, const lex_t *lex, enum json_error_code code,
86
2.10k
                      const char *msg, ...) {
87
2.10k
    va_list ap;
88
2.10k
    char msg_text[JSON_ERROR_TEXT_LENGTH];
89
2.10k
    char msg_with_context[JSON_ERROR_TEXT_LENGTH];
90
91
2.10k
    int line = -1, col = -1;
92
2.10k
    size_t pos = 0;
93
2.10k
    const char *result = msg_text;
94
95
2.10k
    if (!error)
96
0
        return;
97
98
2.10k
    va_start(ap, msg);
99
2.10k
    vsnprintf(msg_text, JSON_ERROR_TEXT_LENGTH, msg, ap);
100
2.10k
    msg_text[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
101
2.10k
    va_end(ap);
102
103
2.10k
    if (lex) {
104
2.10k
        const char *saved_text = strbuffer_value(&lex->saved_text);
105
106
2.10k
        line = lex->stream.line;
107
2.10k
        col = lex->stream.column;
108
2.10k
        pos = lex->stream.position;
109
110
2.10k
        if (saved_text && saved_text[0]) {
111
1.52k
            if (lex->saved_text.length <= 20) {
112
1.33k
                snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH, "%s near '%s'",
113
1.33k
                         msg_text, saved_text);
114
1.33k
                msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
115
1.33k
                result = msg_with_context;
116
1.33k
            }
117
1.52k
        } else {
118
583
            if (code == json_error_invalid_syntax) {
119
                /* More specific error code for premature end of file. */
120
440
                code = json_error_premature_end_of_input;
121
440
            }
122
583
            if (lex->stream.state == STREAM_STATE_ERROR) {
123
                /* No context for UTF-8 decoding errors */
124
281
                result = msg_text;
125
302
            } else {
126
302
                snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH, "%s near end of file",
127
302
                         msg_text);
128
302
                msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
129
302
                result = msg_with_context;
130
302
            }
131
583
        }
132
2.10k
    }
133
134
2.10k
    jsonp_error_set(error, line, col, pos, code, "%s", result);
135
2.10k
}
136
137
/*** lexical analyzer ***/
138
139
7.18k
static void stream_init(stream_t *stream, get_func get, void *data) {
140
7.18k
    stream->get = get;
141
7.18k
    stream->data = data;
142
7.18k
    stream->buffer[0] = '\0';
143
7.18k
    stream->buffer_pos = 0;
144
145
7.18k
    stream->state = STREAM_STATE_OK;
146
7.18k
    stream->line = 1;
147
7.18k
    stream->column = 0;
148
7.18k
    stream->position = 0;
149
7.18k
}
150
151
23.9M
static int stream_get(stream_t *stream, json_error_t *error) {
152
23.9M
    int c;
153
154
23.9M
    if (stream->state != STREAM_STATE_OK)
155
531
        return stream->state;
156
157
23.9M
    if (!stream->buffer[stream->buffer_pos]) {
158
21.6M
        c = stream->get(stream->data);
159
21.6M
        if (c == EOF) {
160
4.71k
            stream->state = STREAM_STATE_EOF;
161
4.71k
            return STREAM_STATE_EOF;
162
4.71k
        }
163
164
21.6M
        stream->buffer[0] = c;
165
21.6M
        stream->buffer_pos = 0;
166
167
21.6M
        if (0x80 <= c && c <= 0xFF) {
168
            /* multi-byte UTF-8 sequence */
169
4.95k
            size_t i, count;
170
171
4.95k
            count = utf8_check_first(c);
172
4.95k
            if (!count)
173
57
                goto out;
174
175
4.90k
            assert(count >= 2);
176
177
14.2k
            for (i = 1; i < count; i++)
178
9.31k
                stream->buffer[i] = stream->get(stream->data);
179
180
4.90k
            if (!utf8_check_full(stream->buffer, count, NULL))
181
134
                goto out;
182
183
4.76k
            stream->buffer[count] = '\0';
184
4.76k
        } else
185
21.6M
            stream->buffer[1] = '\0';
186
21.6M
    }
187
188
23.9M
    c = stream->buffer[stream->buffer_pos++];
189
190
23.9M
    stream->position++;
191
23.9M
    if (c == '\n') {
192
625
        stream->line++;
193
625
        stream->last_column = stream->column;
194
625
        stream->column = 0;
195
23.9M
    } else if (utf8_check_first(c)) {
196
        /* track the Unicode character column, so increment only if
197
           this is the first character of a UTF-8 sequence */
198
23.9M
        stream->column++;
199
23.9M
    }
200
201
23.9M
    return c;
202
203
191
out:
204
191
    stream->state = STREAM_STATE_ERROR;
205
191
    error_set(error, stream_to_lex(stream), json_error_invalid_utf8,
206
191
              "unable to decode byte 0x%x", c);
207
191
    return STREAM_STATE_ERROR;
208
23.9M
}
209
210
2.31M
static void stream_unget(stream_t *stream, int c) {
211
2.31M
    if (c == STREAM_STATE_EOF || c == STREAM_STATE_ERROR)
212
6
        return;
213
214
2.31M
    stream->position--;
215
2.31M
    if (c == '\n') {
216
199
        stream->line--;
217
199
        stream->column = stream->last_column;
218
2.31M
    } else if (utf8_check_first(c))
219
2.31M
        stream->column--;
220
221
2.31M
    assert(stream->buffer_pos > 0);
222
2.31M
    stream->buffer_pos--;
223
2.31M
    assert(stream->buffer[stream->buffer_pos] == c);
224
2.31M
}
225
226
6.40M
static int lex_get(lex_t *lex, json_error_t *error) {
227
6.40M
    return stream_get(&lex->stream, error);
228
6.40M
}
229
230
23.9M
static void lex_save(lex_t *lex, int c) { strbuffer_append_byte(&lex->saved_text, c); }
231
232
17.5M
static int lex_get_save(lex_t *lex, json_error_t *error) {
233
17.5M
    int c = stream_get(&lex->stream, error);
234
17.5M
    if (c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR)
235
17.5M
        lex_save(lex, c);
236
17.5M
    return c;
237
17.5M
}
238
239
22
static void lex_unget(lex_t *lex, int c) { stream_unget(&lex->stream, c); }
240
241
2.31M
static void lex_unget_unsave(lex_t *lex, int c) {
242
2.31M
    if (c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) {
243
/* Since we treat warnings as errors, when assertions are turned
244
 * off the "d" variable would be set but never used. Which is
245
 * treated as an error by GCC.
246
 */
247
2.31M
#ifndef NDEBUG
248
2.31M
        char d;
249
2.31M
#endif
250
2.31M
        stream_unget(&lex->stream, c);
251
2.31M
#ifndef NDEBUG
252
2.31M
        d =
253
2.31M
#endif
254
2.31M
            strbuffer_pop(&lex->saved_text);
255
2.31M
        assert(c == d);
256
2.31M
    }
257
2.31M
}
258
259
145
static void lex_save_cached(lex_t *lex) {
260
331
    while (lex->stream.buffer[lex->stream.buffer_pos] != '\0') {
261
186
        lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]);
262
186
        lex->stream.buffer_pos++;
263
186
        lex->stream.position++;
264
186
    }
265
145
}
266
267
15.2k
static void lex_free_string(lex_t *lex) {
268
15.2k
    jsonp_free(lex->value.string.val);
269
15.2k
    lex->value.string.val = NULL;
270
15.2k
    lex->value.string.len = 0;
271
15.2k
}
272
273
/* assumes that str points to 'u' plus at least 4 valid hex digits */
274
6.30k
static int32_t decode_unicode_escape(const char *str) {
275
6.30k
    int i;
276
6.30k
    int32_t value = 0;
277
278
6.30k
    assert(str[0] == 'u');
279
280
31.5k
    for (i = 1; i <= 4; i++) {
281
25.2k
        char c = str[i];
282
25.2k
        value <<= 4;
283
25.2k
        if (l_isdigit(c))
284
8.15k
            value += c - '0';
285
17.0k
        else if (l_islower(c))
286
7.73k
            value += c - 'a' + 10;
287
9.34k
        else if (l_isupper(c))
288
9.34k
            value += c - 'A' + 10;
289
0
        else
290
0
            return -1;
291
25.2k
    }
292
293
6.30k
    return value;
294
6.30k
}
295
296
15.2k
static void lex_scan_string(lex_t *lex, json_error_t *error) {
297
15.2k
    int c;
298
15.2k
    const char *p;
299
15.2k
    char *t;
300
15.2k
    int i;
301
302
15.2k
    lex->value.string.val = NULL;
303
15.2k
    lex->token = TOKEN_INVALID;
304
305
15.2k
    c = lex_get_save(lex, error);
306
307
14.1M
    while (c != '"') {
308
14.1M
        if (c == STREAM_STATE_ERROR)
309
28
            goto out;
310
311
14.1M
        else if (c == STREAM_STATE_EOF) {
312
142
            error_set(error, lex, json_error_premature_end_of_input,
313
142
                      "premature end of input");
314
142
            goto out;
315
142
        }
316
317
14.1M
        else if (0 <= c && c <= 0x1F) {
318
            /* control character */
319
31
            lex_unget_unsave(lex, c);
320
31
            if (c == '\n')
321
2
                error_set(error, lex, json_error_invalid_syntax, "unexpected newline");
322
29
            else
323
29
                error_set(error, lex, json_error_invalid_syntax, "control character 0x%x",
324
29
                          c);
325
31
            goto out;
326
31
        }
327
328
14.1M
        else if (c == '\\') {
329
11.8k
            c = lex_get_save(lex, error);
330
11.8k
            if (c == 'u') {
331
7.06k
                c = lex_get_save(lex, error);
332
35.0k
                for (i = 0; i < 4; i++) {
333
28.0k
                    if (!l_isxdigit(c)) {
334
81
                        error_set(error, lex, json_error_invalid_syntax,
335
81
                                  "invalid escape");
336
81
                        goto out;
337
81
                    }
338
28.0k
                    c = lex_get_save(lex, error);
339
28.0k
                }
340
7.06k
            } else if (c == '"' || c == '\\' || c == '/' || c == 'b' || c == 'f' ||
341
4.74k
                       c == 'n' || c == 'r' || c == 't')
342
4.67k
                c = lex_get_save(lex, error);
343
61
            else {
344
61
                error_set(error, lex, json_error_invalid_syntax, "invalid escape");
345
61
                goto out;
346
61
            }
347
11.8k
        } else
348
14.0M
            c = lex_get_save(lex, error);
349
14.1M
    }
350
351
    /* the actual value is at most of the same length as the source
352
       string, because:
353
         - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte
354
         - a single \uXXXX escape (length 6) is converted to at most 3 bytes
355
         - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair
356
           are converted to 4 bytes
357
    */
358
14.9k
    t = jsonp_malloc(lex->saved_text.length + 1);
359
14.9k
    if (!t) {
360
        /* this is not very nice, since TOKEN_INVALID is returned */
361
0
        goto out;
362
0
    }
363
14.9k
    lex->value.string.val = t;
364
365
    /* + 1 to skip the " */
366
14.9k
    p = strbuffer_value(&lex->saved_text) + 1;
367
368
11.4M
    while (*p != '"') {
369
11.4M
        if (*p == '\\') {
370
8.09k
            p++;
371
8.09k
            if (*p == 'u') {
372
4.61k
                size_t length;
373
4.61k
                int32_t value;
374
375
4.61k
                value = decode_unicode_escape(p);
376
4.61k
                if (value < 0) {
377
0
                    error_set(error, lex, json_error_invalid_syntax,
378
0
                              "invalid Unicode escape '%.6s'", p - 1);
379
0
                    goto out;
380
0
                }
381
4.61k
                p += 5;
382
383
4.61k
                if (0xD800 <= value && value <= 0xDBFF) {
384
                    /* surrogate pair */
385
1.72k
                    if (*p == '\\' && *(p + 1) == 'u') {
386
1.69k
                        int32_t value2 = decode_unicode_escape(++p);
387
1.69k
                        if (value2 < 0) {
388
0
                            error_set(error, lex, json_error_invalid_syntax,
389
0
                                      "invalid Unicode escape '%.6s'", p - 1);
390
0
                            goto out;
391
0
                        }
392
1.69k
                        p += 5;
393
394
1.69k
                        if (0xDC00 <= value2 && value2 <= 0xDFFF) {
395
                            /* valid second surrogate */
396
1.65k
                            value =
397
1.65k
                                ((value - 0xD800) << 10) + (value2 - 0xDC00) + 0x10000;
398
1.65k
                        } else {
399
                            /* invalid second surrogate */
400
41
                            error_set(error, lex, json_error_invalid_syntax,
401
41
                                      "invalid Unicode '\\u%04X\\u%04X'", value, value2);
402
41
                            goto out;
403
41
                        }
404
1.69k
                    } else {
405
                        /* no second surrogate */
406
27
                        error_set(error, lex, json_error_invalid_syntax,
407
27
                                  "invalid Unicode '\\u%04X'", value);
408
27
                        goto out;
409
27
                    }
410
2.88k
                } else if (0xDC00 <= value && value <= 0xDFFF) {
411
17
                    error_set(error, lex, json_error_invalid_syntax,
412
17
                              "invalid Unicode '\\u%04X'", value);
413
17
                    goto out;
414
17
                }
415
416
4.52k
                if (utf8_encode(value, t, &length))
417
0
                    assert(0);
418
4.52k
                t += length;
419
4.52k
            } else {
420
3.48k
                switch (*p) {
421
269
                    case '"':
422
539
                    case '\\':
423
738
                    case '/':
424
738
                        *t = *p;
425
738
                        break;
426
530
                    case 'b':
427
530
                        *t = '\b';
428
530
                        break;
429
490
                    case 'f':
430
490
                        *t = '\f';
431
490
                        break;
432
633
                    case 'n':
433
633
                        *t = '\n';
434
633
                        break;
435
497
                    case 'r':
436
497
                        *t = '\r';
437
497
                        break;
438
593
                    case 't':
439
593
                        *t = '\t';
440
593
                        break;
441
0
                    default:
442
0
                        assert(0);
443
3.48k
                }
444
3.48k
                t++;
445
3.48k
                p++;
446
3.48k
            }
447
8.09k
        } else
448
11.4M
            *(t++) = *(p++);
449
11.4M
    }
450
14.8k
    *t = '\0';
451
14.8k
    lex->value.string.len = t - lex->value.string.val;
452
14.8k
    lex->token = TOKEN_STRING;
453
14.8k
    return;
454
455
428
out:
456
428
    lex_free_string(lex);
457
428
}
458
459
#ifndef JANSSON_USING_CMAKE /* disabled if using cmake */
460
#if JSON_INTEGER_IS_LONG_LONG
461
#ifdef _MSC_VER /* Microsoft Visual Studio */
462
#define json_strtoint _strtoi64
463
#else
464
1.45M
#define json_strtoint strtoll
465
#endif
466
#else
467
#define json_strtoint strtol
468
#endif
469
#endif
470
471
2.30M
static int lex_scan_number(lex_t *lex, int c, json_error_t *error) {
472
2.30M
    const char *saved_text;
473
2.30M
    char *end;
474
2.30M
    double doubleval;
475
476
2.30M
    lex->token = TOKEN_INVALID;
477
478
2.30M
    if (c == '-')
479
652
        c = lex_get_save(lex, error);
480
481
2.30M
    if (c == '0') {
482
1.39M
        c = lex_get_save(lex, error);
483
1.39M
        if (l_isdigit(c)) {
484
4
            lex_unget_unsave(lex, c);
485
4
            goto out;
486
4
        }
487
1.39M
    } else if (l_isdigit(c)) {
488
909k
        do
489
1.01M
            c = lex_get_save(lex, error);
490
1.01M
        while (l_isdigit(c));
491
909k
    } else {
492
39
        lex_unget_unsave(lex, c);
493
39
        goto out;
494
39
    }
495
496
2.30M
    if (!(lex->flags & JSON_DECODE_INT_AS_REAL) && c != '.' && c != 'E' && c != 'e') {
497
1.45M
        json_int_t intval;
498
499
1.45M
        lex_unget_unsave(lex, c);
500
501
1.45M
        saved_text = strbuffer_value(&lex->saved_text);
502
503
1.45M
        errno = 0;
504
1.45M
        intval = json_strtoint(saved_text, &end, 10);
505
1.45M
        if (errno == ERANGE) {
506
3
            if (intval < 0)
507
1
                error_set(error, lex, json_error_numeric_overflow,
508
1
                          "too big negative integer");
509
2
            else
510
2
                error_set(error, lex, json_error_numeric_overflow, "too big integer");
511
3
            goto out;
512
3
        }
513
514
1.45M
        assert(end == saved_text + lex->saved_text.length);
515
516
1.45M
        lex->token = TOKEN_INTEGER;
517
1.45M
        lex->value.integer = intval;
518
1.45M
        return 0;
519
1.45M
    }
520
521
854k
    if (c == '.') {
522
782k
        c = lex_get(lex, error);
523
782k
        if (!l_isdigit(c)) {
524
22
            lex_unget(lex, c);
525
22
            goto out;
526
22
        }
527
782k
        lex_save(lex, c);
528
529
782k
        do
530
805k
            c = lex_get_save(lex, error);
531
805k
        while (l_isdigit(c));
532
782k
    }
533
534
854k
    if (c == 'E' || c == 'e') {
535
20.0k
        c = lex_get_save(lex, error);
536
20.0k
        if (c == '+' || c == '-')
537
7.51k
            c = lex_get_save(lex, error);
538
539
20.0k
        if (!l_isdigit(c)) {
540
86
            lex_unget_unsave(lex, c);
541
86
            goto out;
542
86
        }
543
544
19.9k
        do
545
33.8k
            c = lex_get_save(lex, error);
546
33.8k
        while (l_isdigit(c));
547
19.9k
    }
548
549
854k
    lex_unget_unsave(lex, c);
550
551
854k
    if (jsonp_strtod(&lex->saved_text, &doubleval)) {
552
6
        error_set(error, lex, json_error_numeric_overflow, "real number overflow");
553
6
        goto out;
554
6
    }
555
556
854k
    lex->token = TOKEN_REAL;
557
854k
    lex->value.real = doubleval;
558
854k
    return 0;
559
560
160
out:
561
160
    return -1;
562
854k
}
563
564
5.61M
static int lex_scan(lex_t *lex, json_error_t *error) {
565
5.61M
    int c;
566
567
5.61M
    strbuffer_clear(&lex->saved_text);
568
569
5.61M
    if (lex->token == TOKEN_STRING)
570
14.4k
        lex_free_string(lex);
571
572
5.61M
    do
573
5.61M
        c = lex_get(lex, error);
574
5.61M
    while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
575
576
5.61M
    if (c == STREAM_STATE_EOF) {
577
1.24k
        lex->token = TOKEN_EOF;
578
1.24k
        goto out;
579
1.24k
    }
580
581
5.61M
    if (c == STREAM_STATE_ERROR) {
582
141
        lex->token = TOKEN_INVALID;
583
141
        goto out;
584
141
    }
585
586
5.61M
    lex_save(lex, c);
587
588
5.61M
    if (c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',')
589
3.28M
        lex->token = c;
590
591
2.33M
    else if (c == '"')
592
15.2k
        lex_scan_string(lex, error);
593
594
2.31M
    else if (l_isdigit(c) || c == '-') {
595
2.30M
        if (lex_scan_number(lex, c, error))
596
160
            goto out;
597
2.30M
    }
598
599
8.40k
    else if (l_isalpha(c)) {
600
        /* eat up the whole identifier for clearer error messages */
601
8.25k
        const char *saved_text;
602
603
8.25k
        do
604
105k
            c = lex_get_save(lex, error);
605
105k
        while (l_isalpha(c));
606
8.25k
        lex_unget_unsave(lex, c);
607
608
8.25k
        saved_text = strbuffer_value(&lex->saved_text);
609
610
8.25k
        if (strcmp(saved_text, "true") == 0)
611
684
            lex->token = TOKEN_TRUE;
612
7.57k
        else if (strcmp(saved_text, "false") == 0)
613
6.42k
            lex->token = TOKEN_FALSE;
614
1.14k
        else if (strcmp(saved_text, "null") == 0)
615
974
            lex->token = TOKEN_NULL;
616
168
        else
617
168
            lex->token = TOKEN_INVALID;
618
8.25k
    }
619
620
145
    else {
621
        /* save the rest of the input UTF-8 sequence to get an error
622
           message of valid UTF-8 */
623
145
        lex_save_cached(lex);
624
145
        lex->token = TOKEN_INVALID;
625
145
    }
626
627
5.61M
out:
628
5.61M
    return lex->token;
629
5.61M
}
630
631
12.9k
static char *lex_steal_string(lex_t *lex, size_t *out_len) {
632
12.9k
    char *result = NULL;
633
12.9k
    if (lex->token == TOKEN_STRING) {
634
12.9k
        result = lex->value.string.val;
635
12.9k
        *out_len = lex->value.string.len;
636
12.9k
        lex->value.string.val = NULL;
637
12.9k
        lex->value.string.len = 0;
638
12.9k
    }
639
12.9k
    return result;
640
12.9k
}
641
642
7.18k
static int lex_init(lex_t *lex, get_func get, size_t flags, void *data) {
643
7.18k
    stream_init(&lex->stream, get, data);
644
7.18k
    if (strbuffer_init(&lex->saved_text))
645
0
        return -1;
646
647
7.18k
    lex->flags = flags;
648
7.18k
    lex->token = TOKEN_INVALID;
649
7.18k
    return 0;
650
7.18k
}
651
652
7.18k
static void lex_close(lex_t *lex) {
653
7.18k
    if (lex->token == TOKEN_STRING)
654
411
        lex_free_string(lex);
655
7.18k
    strbuffer_close(&lex->saved_text);
656
7.18k
}
657
658
/*** parser ***/
659
660
static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error);
661
662
4.94k
static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error) {
663
4.94k
    json_t *object = json_object();
664
4.94k
    if (!object)
665
0
        return NULL;
666
667
4.94k
    lex_scan(lex, error);
668
4.94k
    if (lex->token == '}')
669
1.25k
        return object;
670
671
13.0k
    while (1) {
672
13.0k
        char *key;
673
13.0k
        size_t len;
674
13.0k
        json_t *value;
675
676
13.0k
        if (lex->token != TOKEN_STRING) {
677
49
            error_set(error, lex, json_error_invalid_syntax, "string or '}' expected");
678
49
            goto error;
679
49
        }
680
681
12.9k
        key = lex_steal_string(lex, &len);
682
12.9k
        if (!key)
683
0
            return NULL;
684
12.9k
        if (memchr(key, '\0', len)) {
685
1
            jsonp_free(key);
686
1
            error_set(error, lex, json_error_null_byte_in_key,
687
1
                      "NUL byte in object key not supported");
688
1
            goto error;
689
1
        }
690
691
12.9k
        if (flags & JSON_REJECT_DUPLICATES) {
692
3.05k
            if (json_object_getn(object, key, len)) {
693
1
                jsonp_free(key);
694
1
                error_set(error, lex, json_error_duplicate_key, "duplicate object key");
695
1
                goto error;
696
1
            }
697
3.05k
        }
698
699
12.9k
        lex_scan(lex, error);
700
12.9k
        if (lex->token != ':') {
701
43
            jsonp_free(key);
702
43
            error_set(error, lex, json_error_invalid_syntax, "':' expected");
703
43
            goto error;
704
43
        }
705
706
12.9k
        lex_scan(lex, error);
707
12.9k
        value = parse_value(lex, flags, error);
708
12.9k
        if (!value) {
709
647
            jsonp_free(key);
710
647
            goto error;
711
647
        }
712
713
12.2k
        if (json_object_setn_new_nocheck(object, key, len, value)) {
714
0
            jsonp_free(key);
715
0
            goto error;
716
0
        }
717
718
12.2k
        jsonp_free(key);
719
720
12.2k
        lex_scan(lex, error);
721
12.2k
        if (lex->token != ',')
722
2.94k
            break;
723
724
9.35k
        lex_scan(lex, error);
725
9.35k
    }
726
727
2.94k
    if (lex->token != '}') {
728
72
        error_set(error, lex, json_error_invalid_syntax, "'}' expected");
729
72
        goto error;
730
72
    }
731
732
2.87k
    return object;
733
734
813
error:
735
813
    json_decref(object);
736
813
    return NULL;
737
2.94k
}
738
739
494k
static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error) {
740
494k
    json_t *array = json_array();
741
494k
    if (!array)
742
0
        return NULL;
743
744
494k
    lex_scan(lex, error);
745
494k
    if (lex->token == ']')
746
3.15k
        return array;
747
748
2.79M
    while (lex->token) {
749
2.79M
        json_t *elem = parse_value(lex, flags, error);
750
2.79M
        if (!elem)
751
40.9k
            goto error;
752
753
2.75M
        if (json_array_append_new(array, elem)) {
754
0
            goto error;
755
0
        }
756
757
2.75M
        lex_scan(lex, error);
758
2.75M
        if (lex->token != ',')
759
450k
            break;
760
761
2.30M
        lex_scan(lex, error);
762
2.30M
    }
763
764
450k
    if (lex->token != ']') {
765
188
        error_set(error, lex, json_error_invalid_syntax, "']' expected");
766
188
        goto error;
767
188
    }
768
769
450k
    return array;
770
771
41.1k
error:
772
41.1k
    json_decref(array);
773
41.1k
    return NULL;
774
450k
}
775
776
2.81M
static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error) {
777
2.81M
    json_t *json;
778
779
2.81M
    lex->depth++;
780
2.81M
    if (lex->depth > JSON_PARSER_MAX_DEPTH) {
781
1
        error_set(error, lex, json_error_stack_overflow, "maximum parsing depth reached");
782
1
        return NULL;
783
1
    }
784
785
2.81M
    switch (lex->token) {
786
1.83k
        case TOKEN_STRING: {
787
1.83k
            const char *value = lex->value.string.val;
788
1.83k
            size_t len = lex->value.string.len;
789
790
1.83k
            if (!(flags & JSON_ALLOW_NUL)) {
791
1.01k
                if (memchr(value, '\0', len)) {
792
1
                    error_set(error, lex, json_error_null_character,
793
1
                              "\\u0000 is not allowed without JSON_ALLOW_NUL");
794
1
                    return NULL;
795
1
                }
796
1.01k
            }
797
798
1.83k
            json = jsonp_stringn_nocheck_own(value, len);
799
1.83k
            lex->value.string.val = NULL;
800
1.83k
            lex->value.string.len = 0;
801
1.83k
            break;
802
1.83k
        }
803
804
1.45M
        case TOKEN_INTEGER: {
805
1.45M
            json = json_integer(lex->value.integer);
806
1.45M
            break;
807
1.83k
        }
808
809
854k
        case TOKEN_REAL: {
810
854k
            json = json_real(lex->value.real);
811
854k
            break;
812
1.83k
        }
813
814
682
        case TOKEN_TRUE:
815
682
            json = json_true();
816
682
            break;
817
818
6.42k
        case TOKEN_FALSE:
819
6.42k
            json = json_false();
820
6.42k
            break;
821
822
971
        case TOKEN_NULL:
823
971
            json = json_null();
824
971
            break;
825
826
4.94k
        case '{':
827
4.94k
            json = parse_object(lex, flags, error);
828
4.94k
            break;
829
830
494k
        case '[':
831
494k
            json = parse_array(lex, flags, error);
832
494k
            break;
833
834
723
        case TOKEN_INVALID:
835
723
            error_set(error, lex, json_error_invalid_syntax, "invalid token");
836
723
            return NULL;
837
838
44
        default:
839
44
            error_set(error, lex, json_error_invalid_syntax, "unexpected token");
840
44
            return NULL;
841
2.81M
    }
842
843
2.81M
    if (!json)
844
41.9k
        return NULL;
845
846
2.77M
    lex->depth--;
847
2.77M
    return json;
848
2.81M
}
849
850
7.18k
static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error) {
851
7.18k
    json_t *result;
852
853
7.18k
    lex->depth = 0;
854
855
7.18k
    lex_scan(lex, error);
856
7.18k
    if (!(flags & JSON_DECODE_ANY)) {
857
1.36k
        if (lex->token != '[' && lex->token != '{') {
858
311
            error_set(error, lex, json_error_invalid_syntax, "'[' or '{' expected");
859
311
            return NULL;
860
311
        }
861
1.36k
    }
862
863
6.87k
    result = parse_value(lex, flags, error);
864
6.87k
    if (!result)
865
1.12k
        return NULL;
866
867
5.74k
    if (!(flags & JSON_DISABLE_EOF_CHECK)) {
868
1.01k
        lex_scan(lex, error);
869
1.01k
        if (lex->token != TOKEN_EOF) {
870
73
            error_set(error, lex, json_error_end_of_input_expected,
871
73
                      "end of file expected");
872
73
            json_decref(result);
873
73
            return NULL;
874
73
        }
875
1.01k
    }
876
877
5.67k
    if (error) {
878
        /* Save the position even though there was no error */
879
5.67k
        error->position = (int)lex->stream.position;
880
5.67k
    }
881
882
5.67k
    return result;
883
5.74k
}
884
885
typedef struct {
886
    const char *data;
887
    size_t pos;
888
} string_data_t;
889
890
0
static int string_get(void *data) {
891
0
    char c;
892
0
    string_data_t *stream = (string_data_t *)data;
893
0
    c = stream->data[stream->pos];
894
0
    if (c == '\0')
895
0
        return EOF;
896
0
    else {
897
0
        stream->pos++;
898
0
        return (unsigned char)c;
899
0
    }
900
0
}
901
902
0
json_t *json_loads(const char *string, size_t flags, json_error_t *error) {
903
0
    lex_t lex;
904
0
    json_t *result;
905
0
    string_data_t stream_data;
906
907
0
    jsonp_error_init(error, "<string>");
908
909
0
    if (string == NULL) {
910
0
        error_set(error, NULL, json_error_invalid_argument, "wrong arguments");
911
0
        return NULL;
912
0
    }
913
914
0
    stream_data.data = string;
915
0
    stream_data.pos = 0;
916
917
0
    if (lex_init(&lex, string_get, flags, (void *)&stream_data))
918
0
        return NULL;
919
920
0
    result = parse_json(&lex, flags, error);
921
922
0
    lex_close(&lex);
923
0
    return result;
924
0
}
925
926
typedef struct {
927
    const char *data;
928
    size_t len;
929
    size_t pos;
930
} buffer_data_t;
931
932
21.6M
static int buffer_get(void *data) {
933
21.6M
    char c;
934
21.6M
    buffer_data_t *stream = data;
935
21.6M
    if (stream->pos >= stream->len)
936
4.87k
        return EOF;
937
938
21.6M
    c = stream->data[stream->pos];
939
21.6M
    stream->pos++;
940
21.6M
    return (unsigned char)c;
941
21.6M
}
942
943
7.18k
json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error) {
944
7.18k
    lex_t lex;
945
7.18k
    json_t *result;
946
7.18k
    buffer_data_t stream_data;
947
948
7.18k
    jsonp_error_init(error, "<buffer>");
949
950
7.18k
    if (buffer == NULL) {
951
0
        error_set(error, NULL, json_error_invalid_argument, "wrong arguments");
952
0
        return NULL;
953
0
    }
954
955
7.18k
    stream_data.data = buffer;
956
7.18k
    stream_data.pos = 0;
957
7.18k
    stream_data.len = buflen;
958
959
7.18k
    if (lex_init(&lex, buffer_get, flags, (void *)&stream_data))
960
0
        return NULL;
961
962
7.18k
    result = parse_json(&lex, flags, error);
963
964
7.18k
    lex_close(&lex);
965
7.18k
    return result;
966
7.18k
}
967
968
0
json_t *json_loadf(FILE *input, size_t flags, json_error_t *error) {
969
0
    lex_t lex;
970
0
    const char *source;
971
0
    json_t *result;
972
973
0
    if (input == stdin)
974
0
        source = "<stdin>";
975
0
    else
976
0
        source = "<stream>";
977
978
0
    jsonp_error_init(error, source);
979
980
0
    if (input == NULL) {
981
0
        error_set(error, NULL, json_error_invalid_argument, "wrong arguments");
982
0
        return NULL;
983
0
    }
984
985
0
    if (lex_init(&lex, (get_func)fgetc, flags, input))
986
0
        return NULL;
987
988
0
    result = parse_json(&lex, flags, error);
989
990
0
    lex_close(&lex);
991
0
    return result;
992
0
}
993
994
0
static int fd_get_func(int *fd) {
995
0
#ifdef HAVE_UNISTD_H
996
0
    uint8_t c;
997
0
    if (read(*fd, &c, 1) == 1)
998
0
        return c;
999
0
#endif
1000
0
    return EOF;
1001
0
}
1002
1003
0
json_t *json_loadfd(int input, size_t flags, json_error_t *error) {
1004
0
    lex_t lex;
1005
0
    const char *source;
1006
0
    json_t *result;
1007
1008
0
#ifdef HAVE_UNISTD_H
1009
0
    if (input == STDIN_FILENO)
1010
0
        source = "<stdin>";
1011
0
    else
1012
0
#endif
1013
0
        source = "<stream>";
1014
1015
0
    jsonp_error_init(error, source);
1016
1017
0
    if (input < 0) {
1018
0
        error_set(error, NULL, json_error_invalid_argument, "wrong arguments");
1019
0
        return NULL;
1020
0
    }
1021
1022
0
    if (lex_init(&lex, (get_func)fd_get_func, flags, &input))
1023
0
        return NULL;
1024
1025
0
    result = parse_json(&lex, flags, error);
1026
1027
0
    lex_close(&lex);
1028
0
    return result;
1029
0
}
1030
1031
0
json_t *json_load_file(const char *path, size_t flags, json_error_t *error) {
1032
0
    json_t *result;
1033
0
    FILE *fp;
1034
1035
0
    jsonp_error_init(error, path);
1036
1037
0
    if (path == NULL) {
1038
0
        error_set(error, NULL, json_error_invalid_argument, "wrong arguments");
1039
0
        return NULL;
1040
0
    }
1041
1042
0
    fp = fopen(path, "rb");
1043
0
    if (!fp) {
1044
0
        error_set(error, NULL, json_error_cannot_open_file, "unable to open %s: %s", path,
1045
0
                  strerror(errno));
1046
0
        return NULL;
1047
0
    }
1048
1049
0
    result = json_loadf(fp, flags, error);
1050
1051
0
    fclose(fp);
1052
0
    return result;
1053
0
}
1054
1055
0
#define MAX_BUF_LEN 1024
1056
1057
typedef struct {
1058
    char data[MAX_BUF_LEN];
1059
    size_t len;
1060
    size_t pos;
1061
    json_load_callback_t callback;
1062
    void *arg;
1063
} callback_data_t;
1064
1065
0
static int callback_get(void *data) {
1066
0
    char c;
1067
0
    callback_data_t *stream = data;
1068
1069
0
    if (stream->pos >= stream->len) {
1070
0
        stream->pos = 0;
1071
0
        stream->len = stream->callback(stream->data, MAX_BUF_LEN, stream->arg);
1072
0
        if (stream->len == 0 || stream->len == (size_t)-1)
1073
0
            return EOF;
1074
0
    }
1075
1076
0
    c = stream->data[stream->pos];
1077
0
    stream->pos++;
1078
0
    return (unsigned char)c;
1079
0
}
1080
1081
json_t *json_load_callback(json_load_callback_t callback, void *arg, size_t flags,
1082
0
                           json_error_t *error) {
1083
0
    lex_t lex;
1084
0
    json_t *result;
1085
1086
0
    callback_data_t stream_data;
1087
1088
0
    memset(&stream_data, 0, sizeof(stream_data));
1089
0
    stream_data.callback = callback;
1090
0
    stream_data.arg = arg;
1091
1092
0
    jsonp_error_init(error, "<callback>");
1093
1094
0
    if (callback == NULL) {
1095
0
        error_set(error, NULL, json_error_invalid_argument, "wrong arguments");
1096
0
        return NULL;
1097
0
    }
1098
1099
0
    if (lex_init(&lex, (get_func)callback_get, flags, &stream_data))
1100
0
        return NULL;
1101
1102
0
    result = parse_json(&lex, flags, error);
1103
1104
0
    lex_close(&lex);
1105
0
    return result;
1106
0
}