Coverage Report

Created: 2025-10-10 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/jansson/src/load.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
3
 *
4
 * Jansson is free software; you can redistribute it and/or modify
5
 * it under the terms of the MIT license. See LICENSE for details.
6
 */
7
8
#ifndef _GNU_SOURCE
9
#define _GNU_SOURCE
10
#endif
11
12
#include "jansson_private.h"
13
14
#include <assert.h>
15
#include <errno.h>
16
#include <limits.h>
17
#include <stdio.h>
18
#include <stdlib.h>
19
#include <string.h>
20
#ifdef HAVE_UNISTD_H
21
#include <unistd.h>
22
#endif
23
24
#include "jansson.h"
25
#include "strbuffer.h"
26
#include "utf.h"
27
28
23.3M
#define STREAM_STATE_OK    0
29
61.8M
#define STREAM_STATE_EOF   -1
30
40.3M
#define STREAM_STATE_ERROR -2
31
32
2.40M
#define TOKEN_INVALID -1
33
2.13k
#define TOKEN_EOF     0
34
5.77M
#define TOKEN_STRING  256
35
2.84M
#define TOKEN_INTEGER 257
36
1.92M
#define TOKEN_REAL    258
37
1.06k
#define TOKEN_TRUE    259
38
11.8k
#define TOKEN_FALSE   260
39
1.68k
#define TOKEN_NULL    261
40
41
/* Locale independent versions of isxxx() functions */
42
233k
#define l_isupper(c) ('A' <= (c) && (c) <= 'Z')
43
127k
#define l_islower(c) ('a' <= (c) && (c) <= 'z')
44
113k
#define l_isalpha(c) (l_isupper(c) || l_islower(c))
45
10.2M
#define l_isdigit(c) ('0' <= (c) && (c) <= '9')
46
#define l_isxdigit(c)                                                                    \
47
25.9k
    (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f'))
48
49
/* Read one byte from stream, convert to unsigned char, then int, and
50
   return. return EOF on end of file. This corresponds to the
51
   behaviour of fgetc(). */
52
typedef int (*get_func)(void *data);
53
54
typedef struct {
55
    get_func get;
56
    void *data;
57
    char buffer[5];
58
    size_t buffer_pos;
59
    int state;
60
    int line;
61
    int column, last_column;
62
    size_t position;
63
} stream_t;
64
65
typedef struct {
66
    stream_t stream;
67
    strbuffer_t saved_text;
68
    size_t flags;
69
    size_t depth;
70
    int token;
71
    union {
72
        struct {
73
            char *val;
74
            size_t len;
75
        } string;
76
        json_int_t integer;
77
        double real;
78
    } value;
79
} lex_t;
80
81
192
#define stream_to_lex(stream) container_of(stream, lex_t, stream)
82
83
/*** error reporting ***/
84
85
static void error_set(json_error_t *error, const lex_t *lex, enum json_error_code code,
86
2.16k
                      const char *msg, ...) {
87
2.16k
    va_list ap;
88
2.16k
    char msg_text[JSON_ERROR_TEXT_LENGTH];
89
2.16k
    char msg_with_context[JSON_ERROR_TEXT_LENGTH];
90
91
2.16k
    int line = -1, col = -1;
92
2.16k
    size_t pos = 0;
93
2.16k
    const char *result = msg_text;
94
95
2.16k
    if (!error)
96
0
        return;
97
98
2.16k
    va_start(ap, msg);
99
2.16k
    vsnprintf(msg_text, JSON_ERROR_TEXT_LENGTH, msg, ap);
100
2.16k
    msg_text[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
101
2.16k
    va_end(ap);
102
103
2.16k
    if (lex) {
104
2.16k
        const char *saved_text = strbuffer_value(&lex->saved_text);
105
106
2.16k
        line = lex->stream.line;
107
2.16k
        col = lex->stream.column;
108
2.16k
        pos = lex->stream.position;
109
110
2.16k
        if (saved_text && saved_text[0]) {
111
1.58k
            if (lex->saved_text.length <= 20) {
112
1.39k
                snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH, "%s near '%s'",
113
1.39k
                         msg_text, saved_text);
114
1.39k
                msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
115
1.39k
                result = msg_with_context;
116
1.39k
            }
117
1.58k
        } else {
118
585
            if (code == json_error_invalid_syntax) {
119
                /* More specific error code for premature end of file. */
120
446
                code = json_error_premature_end_of_input;
121
446
            }
122
585
            if (lex->stream.state == STREAM_STATE_ERROR) {
123
                /* No context for UTF-8 decoding errors */
124
274
                result = msg_text;
125
311
            } else {
126
311
                snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH, "%s near end of file",
127
311
                         msg_text);
128
311
                msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
129
311
                result = msg_with_context;
130
311
            }
131
585
        }
132
2.16k
    }
133
134
2.16k
    jsonp_error_set(error, line, col, pos, code, "%s", result);
135
2.16k
}
136
137
/*** lexical analyzer ***/
138
139
7.43k
static void stream_init(stream_t *stream, get_func get, void *data) {
140
7.43k
    stream->get = get;
141
7.43k
    stream->data = data;
142
7.43k
    stream->buffer[0] = '\0';
143
7.43k
    stream->buffer_pos = 0;
144
145
7.43k
    stream->state = STREAM_STATE_OK;
146
7.43k
    stream->line = 1;
147
7.43k
    stream->column = 0;
148
7.43k
    stream->position = 0;
149
7.43k
}
150
151
23.3M
static int stream_get(stream_t *stream, json_error_t *error) {
152
23.3M
    int c;
153
154
23.3M
    if (stream->state != STREAM_STATE_OK)
155
544
        return stream->state;
156
157
23.3M
    if (!stream->buffer[stream->buffer_pos]) {
158
20.9M
        c = stream->get(stream->data);
159
20.9M
        if (c == EOF) {
160
4.87k
            stream->state = STREAM_STATE_EOF;
161
4.87k
            return STREAM_STATE_EOF;
162
4.87k
        }
163
164
20.9M
        stream->buffer[0] = c;
165
20.9M
        stream->buffer_pos = 0;
166
167
20.9M
        if (0x80 <= c && c <= 0xFF) {
168
            /* multi-byte UTF-8 sequence */
169
3.48k
            size_t i, count;
170
171
3.48k
            count = utf8_check_first(c);
172
3.48k
            if (!count)
173
50
                goto out;
174
175
3.48k
            assert(count >= 2);
176
177
9.69k
            for (i = 1; i < count; i++)
178
6.25k
                stream->buffer[i] = stream->get(stream->data);
179
180
3.43k
            if (!utf8_check_full(stream->buffer, count, NULL))
181
142
                goto out;
182
183
3.29k
            stream->buffer[count] = '\0';
184
3.29k
        } else
185
20.9M
            stream->buffer[1] = '\0';
186
20.9M
    }
187
188
23.3M
    c = stream->buffer[stream->buffer_pos++];
189
190
23.3M
    stream->position++;
191
23.3M
    if (c == '\n') {
192
639
        stream->line++;
193
639
        stream->last_column = stream->column;
194
639
        stream->column = 0;
195
23.3M
    } else if (utf8_check_first(c)) {
196
        /* track the Unicode character column, so increment only if
197
           this is the first character of a UTF-8 sequence */
198
23.3M
        stream->column++;
199
23.3M
    }
200
201
23.3M
    return c;
202
203
192
out:
204
192
    stream->state = STREAM_STATE_ERROR;
205
192
    error_set(error, stream_to_lex(stream), json_error_invalid_utf8,
206
192
              "unable to decode byte 0x%x", c);
207
192
    return STREAM_STATE_ERROR;
208
23.3M
}
209
210
2.39M
static void stream_unget(stream_t *stream, int c) {
211
2.39M
    if (c == STREAM_STATE_EOF || c == STREAM_STATE_ERROR)
212
7
        return;
213
214
2.39M
    stream->position--;
215
2.39M
    if (c == '\n') {
216
219
        stream->line--;
217
219
        stream->column = stream->last_column;
218
2.39M
    } else if (utf8_check_first(c))
219
2.39M
        stream->column--;
220
221
2.39M
    assert(stream->buffer_pos > 0);
222
2.39M
    stream->buffer_pos--;
223
2.39M
    assert(stream->buffer[stream->buffer_pos] == c);
224
2.39M
}
225
226
6.63M
static int lex_get(lex_t *lex, json_error_t *error) {
227
6.63M
    return stream_get(&lex->stream, error);
228
6.63M
}
229
230
23.3M
static void lex_save(lex_t *lex, int c) { strbuffer_append_byte(&lex->saved_text, c); }
231
232
16.7M
static int lex_get_save(lex_t *lex, json_error_t *error) {
233
16.7M
    int c = stream_get(&lex->stream, error);
234
16.7M
    if (c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR)
235
16.7M
        lex_save(lex, c);
236
16.7M
    return c;
237
16.7M
}
238
239
22
static void lex_unget(lex_t *lex, int c) { stream_unget(&lex->stream, c); }
240
241
2.39M
static void lex_unget_unsave(lex_t *lex, int c) {
242
2.39M
    if (c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) {
243
/* Since we treat warnings as errors, when assertions are turned
244
 * off the "d" variable would be set but never used. Which is
245
 * treated as an error by GCC.
246
 */
247
2.39M
#ifndef NDEBUG
248
2.39M
        char d;
249
2.39M
#endif
250
2.39M
        stream_unget(&lex->stream, c);
251
2.39M
#ifndef NDEBUG
252
2.39M
        d =
253
2.39M
#endif
254
2.39M
            strbuffer_pop(&lex->saved_text);
255
2.39M
        assert(c == d);
256
2.39M
    }
257
2.39M
}
258
259
159
static void lex_save_cached(lex_t *lex) {
260
364
    while (lex->stream.buffer[lex->stream.buffer_pos] != '\0') {
261
205
        lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]);
262
205
        lex->stream.buffer_pos++;
263
205
        lex->stream.position++;
264
205
    }
265
159
}
266
267
11.0k
static void lex_free_string(lex_t *lex) {
268
11.0k
    jsonp_free(lex->value.string.val);
269
11.0k
    lex->value.string.val = NULL;
270
11.0k
    lex->value.string.len = 0;
271
11.0k
}
272
273
/* assumes that str points to 'u' plus at least 4 valid hex digits */
274
5.65k
static int32_t decode_unicode_escape(const char *str) {
275
5.65k
    int i;
276
5.65k
    int32_t value = 0;
277
278
5.65k
    assert(str[0] == 'u');
279
280
28.2k
    for (i = 1; i <= 4; i++) {
281
22.6k
        char c = str[i];
282
22.6k
        value <<= 4;
283
22.6k
        if (l_isdigit(c))
284
7.91k
            value += c - '0';
285
14.6k
        else if (l_islower(c))
286
7.79k
            value += c - 'a' + 10;
287
6.88k
        else if (l_isupper(c))
288
6.88k
            value += c - 'A' + 10;
289
0
        else
290
0
            return -1;
291
22.6k
    }
292
293
5.65k
    return value;
294
5.65k
}
295
296
11.0k
static void lex_scan_string(lex_t *lex, json_error_t *error) {
297
11.0k
    int c;
298
11.0k
    const char *p;
299
11.0k
    char *t;
300
11.0k
    int i;
301
302
11.0k
    lex->value.string.val = NULL;
303
11.0k
    lex->token = TOKEN_INVALID;
304
305
11.0k
    c = lex_get_save(lex, error);
306
307
13.1M
    while (c != '"') {
308
13.0M
        if (c == STREAM_STATE_ERROR)
309
29
            goto out;
310
311
13.0M
        else if (c == STREAM_STATE_EOF) {
312
162
            error_set(error, lex, json_error_premature_end_of_input,
313
162
                      "premature end of input");
314
162
            goto out;
315
162
        }
316
317
13.0M
        else if (0 <= c && c <= 0x1F) {
318
            /* control character */
319
27
            lex_unget_unsave(lex, c);
320
27
            if (c == '\n')
321
1
                error_set(error, lex, json_error_invalid_syntax, "unexpected newline");
322
26
            else
323
26
                error_set(error, lex, json_error_invalid_syntax, "control character 0x%x",
324
26
                          c);
325
27
            goto out;
326
27
        }
327
328
13.0M
        else if (c == '\\') {
329
14.6k
            c = lex_get_save(lex, error);
330
14.6k
            if (c == 'u') {
331
6.52k
                c = lex_get_save(lex, error);
332
32.3k
                for (i = 0; i < 4; i++) {
333
25.9k
                    if (!l_isxdigit(c)) {
334
80
                        error_set(error, lex, json_error_invalid_syntax,
335
80
                                  "invalid escape");
336
80
                        goto out;
337
80
                    }
338
25.8k
                    c = lex_get_save(lex, error);
339
25.8k
                }
340
8.16k
            } else if (c == '"' || c == '\\' || c == '/' || c == 'b' || c == 'f' ||
341
2.27k
                       c == 'n' || c == 'r' || c == 't')
342
8.10k
                c = lex_get_save(lex, error);
343
64
            else {
344
64
                error_set(error, lex, json_error_invalid_syntax, "invalid escape");
345
64
                goto out;
346
64
            }
347
14.6k
        } else
348
13.0M
            c = lex_get_save(lex, error);
349
13.0M
    }
350
351
    /* the actual value is at most of the same length as the source
352
       string, because:
353
         - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte
354
         - a single \uXXXX escape (length 6) is converted to at most 3 bytes
355
         - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair
356
           are converted to 4 bytes
357
    */
358
10.7k
    t = jsonp_malloc(lex->saved_text.length + 1);
359
10.7k
    if (!t) {
360
        /* this is not very nice, since TOKEN_INVALID is returned */
361
0
        goto out;
362
0
    }
363
10.7k
    lex->value.string.val = t;
364
365
    /* + 1 to skip the " */
366
10.7k
    p = strbuffer_value(&lex->saved_text) + 1;
367
368
10.9M
    while (*p != '"') {
369
10.9M
        if (*p == '\\') {
370
9.56k
            p++;
371
9.56k
            if (*p == 'u') {
372
4.45k
                size_t length;
373
4.45k
                int32_t value;
374
375
4.45k
                value = decode_unicode_escape(p);
376
4.45k
                if (value < 0) {
377
0
                    error_set(error, lex, json_error_invalid_syntax,
378
0
                              "invalid Unicode escape '%.6s'", p - 1);
379
0
                    goto out;
380
0
                }
381
4.45k
                p += 5;
382
383
4.45k
                if (0xD800 <= value && value <= 0xDBFF) {
384
                    /* surrogate pair */
385
1.22k
                    if (*p == '\\' && *(p + 1) == 'u') {
386
1.19k
                        int32_t value2 = decode_unicode_escape(++p);
387
1.19k
                        if (value2 < 0) {
388
0
                            error_set(error, lex, json_error_invalid_syntax,
389
0
                                      "invalid Unicode escape '%.6s'", p - 1);
390
0
                            goto out;
391
0
                        }
392
1.19k
                        p += 5;
393
394
1.19k
                        if (0xDC00 <= value2 && value2 <= 0xDFFF) {
395
                            /* valid second surrogate */
396
1.15k
                            value =
397
1.15k
                                ((value - 0xD800) << 10) + (value2 - 0xDC00) + 0x10000;
398
1.15k
                        } else {
399
                            /* invalid second surrogate */
400
40
                            error_set(error, lex, json_error_invalid_syntax,
401
40
                                      "invalid Unicode '\\u%04X\\u%04X'", value, value2);
402
40
                            goto out;
403
40
                        }
404
1.19k
                    } else {
405
                        /* no second surrogate */
406
27
                        error_set(error, lex, json_error_invalid_syntax,
407
27
                                  "invalid Unicode '\\u%04X'", value);
408
27
                        goto out;
409
27
                    }
410
3.22k
                } else if (0xDC00 <= value && value <= 0xDFFF) {
411
15
                    error_set(error, lex, json_error_invalid_syntax,
412
15
                              "invalid Unicode '\\u%04X'", value);
413
15
                    goto out;
414
15
                }
415
416
4.36k
                if (utf8_encode(value, t, &length))
417
4.36k
                    assert(0);
418
4.36k
                t += length;
419
5.11k
            } else {
420
5.11k
                switch (*p) {
421
339
                    case '"':
422
2.16k
                    case '\\':
423
2.38k
                    case '/':
424
2.38k
                        *t = *p;
425
2.38k
                        break;
426
453
                    case 'b':
427
453
                        *t = '\b';
428
453
                        break;
429
664
                    case 'f':
430
664
                        *t = '\f';
431
664
                        break;
432
646
                    case 'n':
433
646
                        *t = '\n';
434
646
                        break;
435
395
                    case 'r':
436
395
                        *t = '\r';
437
395
                        break;
438
572
                    case 't':
439
572
                        *t = '\t';
440
572
                        break;
441
0
                    default:
442
0
                        assert(0);
443
5.11k
                }
444
5.11k
                t++;
445
5.11k
                p++;
446
5.11k
            }
447
9.56k
        } else
448
10.9M
            *(t++) = *(p++);
449
10.9M
    }
450
10.6k
    *t = '\0';
451
10.6k
    lex->value.string.len = t - lex->value.string.val;
452
10.6k
    lex->token = TOKEN_STRING;
453
10.6k
    return;
454
455
444
out:
456
444
    lex_free_string(lex);
457
444
}
458
459
#ifndef JANSSON_USING_CMAKE /* disabled if using cmake */
460
#if JSON_INTEGER_IS_LONG_LONG
461
#ifdef _MSC_VER /* Microsoft Visual Studio */
462
#define json_strtoint _strtoi64
463
#else
464
1.42M
#define json_strtoint strtoll
465
#endif
466
#else
467
#define json_strtoint strtol
468
#endif
469
#endif
470
471
2.38M
static int lex_scan_number(lex_t *lex, int c, json_error_t *error) {
472
2.38M
    const char *saved_text;
473
2.38M
    char *end;
474
2.38M
    double doubleval;
475
476
2.38M
    lex->token = TOKEN_INVALID;
477
478
2.38M
    if (c == '-')
479
633
        c = lex_get_save(lex, error);
480
481
2.38M
    if (c == '0') {
482
1.37M
        c = lex_get_save(lex, error);
483
1.37M
        if (l_isdigit(c)) {
484
4
            lex_unget_unsave(lex, c);
485
4
            goto out;
486
4
        }
487
1.37M
    } else if (l_isdigit(c)) {
488
1.01M
        do
489
1.11M
            c = lex_get_save(lex, error);
490
1.11M
        while (l_isdigit(c));
491
1.01M
    } else {
492
40
        lex_unget_unsave(lex, c);
493
40
        goto out;
494
40
    }
495
496
2.38M
    if (!(lex->flags & JSON_DECODE_INT_AS_REAL) && c != '.' && c != 'E' && c != 'e') {
497
1.42M
        json_int_t intval;
498
499
1.42M
        lex_unget_unsave(lex, c);
500
501
1.42M
        saved_text = strbuffer_value(&lex->saved_text);
502
503
1.42M
        errno = 0;
504
1.42M
        intval = json_strtoint(saved_text, &end, 10);
505
1.42M
        if (errno == ERANGE) {
506
3
            if (intval < 0)
507
1
                error_set(error, lex, json_error_numeric_overflow,
508
1
                          "too big negative integer");
509
2
            else
510
2
                error_set(error, lex, json_error_numeric_overflow, "too big integer");
511
3
            goto out;
512
3
        }
513
514
1.42M
        assert(end == saved_text + lex->saved_text.length);
515
516
1.42M
        lex->token = TOKEN_INTEGER;
517
1.42M
        lex->value.integer = intval;
518
1.42M
        return 0;
519
1.42M
    }
520
521
962k
    if (c == '.') {
522
895k
        c = lex_get(lex, error);
523
895k
        if (!l_isdigit(c)) {
524
22
            lex_unget(lex, c);
525
22
            goto out;
526
22
        }
527
895k
        lex_save(lex, c);
528
529
895k
        do
530
917k
            c = lex_get_save(lex, error);
531
917k
        while (l_isdigit(c));
532
895k
    }
533
534
962k
    if (c == 'E' || c == 'e') {
535
19.9k
        c = lex_get_save(lex, error);
536
19.9k
        if (c == '+' || c == '-')
537
7.81k
            c = lex_get_save(lex, error);
538
539
19.9k
        if (!l_isdigit(c)) {
540
83
            lex_unget_unsave(lex, c);
541
83
            goto out;
542
83
        }
543
544
19.8k
        do
545
33.7k
            c = lex_get_save(lex, error);
546
33.7k
        while (l_isdigit(c));
547
19.8k
    }
548
549
961k
    lex_unget_unsave(lex, c);
550
551
961k
    if (jsonp_strtod(&lex->saved_text, &doubleval)) {
552
8
        error_set(error, lex, json_error_numeric_overflow, "real number overflow");
553
8
        goto out;
554
8
    }
555
556
961k
    lex->token = TOKEN_REAL;
557
961k
    lex->value.real = doubleval;
558
961k
    return 0;
559
560
160
out:
561
160
    return -1;
562
961k
}
563
564
5.73M
static int lex_scan(lex_t *lex, json_error_t *error) {
565
5.73M
    int c;
566
567
5.73M
    strbuffer_clear(&lex->saved_text);
568
569
5.73M
    if (lex->token == TOKEN_STRING)
570
10.2k
        lex_free_string(lex);
571
572
5.73M
    do
573
5.73M
        c = lex_get(lex, error);
574
5.73M
    while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
575
576
5.73M
    if (c == STREAM_STATE_EOF) {
577
1.18k
        lex->token = TOKEN_EOF;
578
1.18k
        goto out;
579
1.18k
    }
580
581
5.73M
    if (c == STREAM_STATE_ERROR) {
582
139
        lex->token = TOKEN_INVALID;
583
139
        goto out;
584
139
    }
585
586
5.73M
    lex_save(lex, c);
587
588
5.73M
    if (c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',')
589
3.33M
        lex->token = c;
590
591
2.40M
    else if (c == '"')
592
11.0k
        lex_scan_string(lex, error);
593
594
2.39M
    else if (l_isdigit(c) || c == '-') {
595
2.38M
        if (lex_scan_number(lex, c, error))
596
160
            goto out;
597
2.38M
    }
598
599
7.63k
    else if (l_isalpha(c)) {
600
        /* eat up the whole identifier for clearer error messages */
601
7.47k
        const char *saved_text;
602
603
7.47k
        do
604
105k
            c = lex_get_save(lex, error);
605
105k
        while (l_isalpha(c));
606
7.47k
        lex_unget_unsave(lex, c);
607
608
7.47k
        saved_text = strbuffer_value(&lex->saved_text);
609
610
7.47k
        if (strcmp(saved_text, "true") == 0)
611
535
            lex->token = TOKEN_TRUE;
612
6.93k
        else if (strcmp(saved_text, "false") == 0)
613
5.91k
            lex->token = TOKEN_FALSE;
614
1.02k
        else if (strcmp(saved_text, "null") == 0)
615
844
            lex->token = TOKEN_NULL;
616
180
        else
617
180
            lex->token = TOKEN_INVALID;
618
7.47k
    }
619
620
159
    else {
621
        /* save the rest of the input UTF-8 sequence to get an error
622
           message of valid UTF-8 */
623
159
        lex_save_cached(lex);
624
159
        lex->token = TOKEN_INVALID;
625
159
    }
626
627
5.73M
out:
628
5.73M
    return lex->token;
629
5.73M
}
630
631
8.87k
static char *lex_steal_string(lex_t *lex, size_t *out_len) {
632
8.87k
    char *result = NULL;
633
8.87k
    if (lex->token == TOKEN_STRING) {
634
8.87k
        result = lex->value.string.val;
635
8.87k
        *out_len = lex->value.string.len;
636
8.87k
        lex->value.string.val = NULL;
637
8.87k
        lex->value.string.len = 0;
638
8.87k
    }
639
8.87k
    return result;
640
8.87k
}
641
642
7.43k
static int lex_init(lex_t *lex, get_func get, size_t flags, void *data) {
643
7.43k
    stream_init(&lex->stream, get, data);
644
7.43k
    if (strbuffer_init(&lex->saved_text))
645
0
        return -1;
646
647
7.43k
    lex->flags = flags;
648
7.43k
    lex->token = TOKEN_INVALID;
649
7.43k
    return 0;
650
7.43k
}
651
652
7.43k
static void lex_close(lex_t *lex) {
653
7.43k
    if (lex->token == TOKEN_STRING)
654
416
        lex_free_string(lex);
655
7.43k
    strbuffer_close(&lex->saved_text);
656
7.43k
}
657
658
/*** parser ***/
659
660
static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error);
661
662
4.11k
static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error) {
663
4.11k
    json_t *object = json_object();
664
4.11k
    if (!object)
665
0
        return NULL;
666
667
4.11k
    lex_scan(lex, error);
668
4.11k
    if (lex->token == '}')
669
1.03k
        return object;
670
671
8.92k
    while (1) {
672
8.92k
        char *key;
673
8.92k
        size_t len;
674
8.92k
        json_t *value;
675
676
8.92k
        if (lex->token != TOKEN_STRING) {
677
45
            error_set(error, lex, json_error_invalid_syntax, "string or '}' expected");
678
45
            goto error;
679
45
        }
680
681
8.87k
        key = lex_steal_string(lex, &len);
682
8.87k
        if (!key)
683
0
            return NULL;
684
8.87k
        if (memchr(key, '\0', len)) {
685
1
            jsonp_free(key);
686
1
            error_set(error, lex, json_error_null_byte_in_key,
687
1
                      "NUL byte in object key not supported");
688
1
            goto error;
689
1
        }
690
691
8.87k
        if (flags & JSON_REJECT_DUPLICATES) {
692
2.37k
            if (json_object_getn(object, key, len)) {
693
1
                jsonp_free(key);
694
1
                error_set(error, lex, json_error_duplicate_key, "duplicate object key");
695
1
                goto error;
696
1
            }
697
2.37k
        }
698
699
8.87k
        lex_scan(lex, error);
700
8.87k
        if (lex->token != ':') {
701
44
            jsonp_free(key);
702
44
            error_set(error, lex, json_error_invalid_syntax, "':' expected");
703
44
            goto error;
704
44
        }
705
706
8.82k
        lex_scan(lex, error);
707
8.82k
        value = parse_value(lex, flags, error);
708
8.82k
        if (!value) {
709
647
            jsonp_free(key);
710
647
            goto error;
711
647
        }
712
713
8.18k
        if (json_object_setn_new_nocheck(object, key, len, value)) {
714
0
            jsonp_free(key);
715
0
            goto error;
716
0
        }
717
718
8.18k
        jsonp_free(key);
719
720
8.18k
        lex_scan(lex, error);
721
8.18k
        if (lex->token != ',')
722
2.34k
            break;
723
724
5.84k
        lex_scan(lex, error);
725
5.84k
    }
726
727
2.34k
    if (lex->token != '}') {
728
79
        error_set(error, lex, json_error_invalid_syntax, "'}' expected");
729
79
        goto error;
730
79
    }
731
732
2.26k
    return object;
733
734
817
error:
735
817
    json_decref(object);
736
817
    return NULL;
737
2.34k
}
738
739
484k
static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error) {
740
484k
    json_t *array = json_array();
741
484k
    if (!array)
742
0
        return NULL;
743
744
484k
    lex_scan(lex, error);
745
484k
    if (lex->token == ']')
746
2.92k
        return array;
747
748
2.86M
    while (lex->token) {
749
2.86M
        json_t *elem = parse_value(lex, flags, error);
750
2.86M
        if (!elem)
751
49.0k
            goto error;
752
753
2.82M
        if (json_array_append_new(array, elem)) {
754
0
            goto error;
755
0
        }
756
757
2.82M
        lex_scan(lex, error);
758
2.82M
        if (lex->token != ',')
759
432k
            break;
760
761
2.38M
        lex_scan(lex, error);
762
2.38M
    }
763
764
432k
    if (lex->token != ']') {
765
192
        error_set(error, lex, json_error_invalid_syntax, "']' expected");
766
192
        goto error;
767
192
    }
768
769
432k
    return array;
770
771
49.2k
error:
772
49.2k
    json_decref(array);
773
49.2k
    return NULL;
774
432k
}
775
776
2.88M
static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error) {
777
2.88M
    json_t *json;
778
779
2.88M
    lex->depth++;
780
2.88M
    if (lex->depth > JSON_PARSER_MAX_DEPTH) {
781
2
        error_set(error, lex, json_error_stack_overflow, "maximum parsing depth reached");
782
2
        return NULL;
783
2
    }
784
785
2.88M
    switch (lex->token) {
786
1.73k
        case TOKEN_STRING: {
787
1.73k
            const char *value = lex->value.string.val;
788
1.73k
            size_t len = lex->value.string.len;
789
790
1.73k
            if (!(flags & JSON_ALLOW_NUL)) {
791
829
                if (memchr(value, '\0', len)) {
792
3
                    error_set(error, lex, json_error_null_character,
793
3
                              "\\u0000 is not allowed without JSON_ALLOW_NUL");
794
3
                    return NULL;
795
3
                }
796
829
            }
797
798
1.73k
            json = jsonp_stringn_nocheck_own(value, len);
799
1.73k
            lex->value.string.val = NULL;
800
1.73k
            lex->value.string.len = 0;
801
1.73k
            break;
802
1.73k
        }
803
804
1.42M
        case TOKEN_INTEGER: {
805
1.42M
            json = json_integer(lex->value.integer);
806
1.42M
            break;
807
1.73k
        }
808
809
961k
        case TOKEN_REAL: {
810
961k
            json = json_real(lex->value.real);
811
961k
            break;
812
1.73k
        }
813
814
533
        case TOKEN_TRUE:
815
533
            json = json_true();
816
533
            break;
817
818
5.91k
        case TOKEN_FALSE:
819
5.91k
            json = json_false();
820
5.91k
            break;
821
822
841
        case TOKEN_NULL:
823
841
            json = json_null();
824
841
            break;
825
826
4.11k
        case '{':
827
4.11k
            json = parse_object(lex, flags, error);
828
4.11k
            break;
829
830
484k
        case '[':
831
484k
            json = parse_array(lex, flags, error);
832
484k
            break;
833
834
721
        case TOKEN_INVALID:
835
721
            error_set(error, lex, json_error_invalid_syntax, "invalid token");
836
721
            return NULL;
837
838
35
        default:
839
35
            error_set(error, lex, json_error_invalid_syntax, "unexpected token");
840
35
            return NULL;
841
2.88M
    }
842
843
2.88M
    if (!json)
844
50.0k
        return NULL;
845
846
2.83M
    lex->depth--;
847
2.83M
    return json;
848
2.88M
}
849
850
7.43k
static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error) {
851
7.43k
    json_t *result;
852
853
7.43k
    lex->depth = 0;
854
855
7.43k
    lex_scan(lex, error);
856
7.43k
    if (!(flags & JSON_DECODE_ANY)) {
857
1.34k
        if (lex->token != '[' && lex->token != '{') {
858
345
            error_set(error, lex, json_error_invalid_syntax, "'[' or '{' expected");
859
345
            return NULL;
860
345
        }
861
1.34k
    }
862
863
7.08k
    result = parse_value(lex, flags, error);
864
7.08k
    if (!result)
865
1.12k
        return NULL;
866
867
5.96k
    if (!(flags & JSON_DISABLE_EOF_CHECK)) {
868
956
        lex_scan(lex, error);
869
956
        if (lex->token != TOKEN_EOF) {
870
82
            error_set(error, lex, json_error_end_of_input_expected,
871
82
                      "end of file expected");
872
82
            json_decref(result);
873
82
            return NULL;
874
82
        }
875
956
    }
876
877
5.88k
    if (error) {
878
        /* Save the position even though there was no error */
879
5.88k
        error->position = (int)lex->stream.position;
880
5.88k
    }
881
882
5.88k
    return result;
883
5.96k
}
884
885
typedef struct {
886
    const char *data;
887
    size_t pos;
888
} string_data_t;
889
890
0
static int string_get(void *data) {
891
0
    char c;
892
0
    string_data_t *stream = (string_data_t *)data;
893
0
    c = stream->data[stream->pos];
894
0
    if (c == '\0')
895
0
        return EOF;
896
0
    else {
897
0
        stream->pos++;
898
0
        return (unsigned char)c;
899
0
    }
900
0
}
901
902
0
json_t *json_loads(const char *string, size_t flags, json_error_t *error) {
903
0
    lex_t lex;
904
0
    json_t *result;
905
0
    string_data_t stream_data;
906
907
0
    jsonp_error_init(error, "<string>");
908
909
0
    if (string == NULL) {
910
0
        error_set(error, NULL, json_error_invalid_argument, "wrong arguments");
911
0
        return NULL;
912
0
    }
913
914
0
    stream_data.data = string;
915
0
    stream_data.pos = 0;
916
917
0
    if (lex_init(&lex, string_get, flags, (void *)&stream_data))
918
0
        return NULL;
919
920
0
    result = parse_json(&lex, flags, error);
921
922
0
    lex_close(&lex);
923
0
    return result;
924
0
}
925
926
typedef struct {
927
    const char *data;
928
    size_t len;
929
    size_t pos;
930
} buffer_data_t;
931
932
20.9M
static int buffer_get(void *data) {
933
20.9M
    char c;
934
20.9M
    buffer_data_t *stream = data;
935
20.9M
    if (stream->pos >= stream->len)
936
5.05k
        return EOF;
937
938
20.9M
    c = stream->data[stream->pos];
939
20.9M
    stream->pos++;
940
20.9M
    return (unsigned char)c;
941
20.9M
}
942
943
7.43k
json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error) {
944
7.43k
    lex_t lex;
945
7.43k
    json_t *result;
946
7.43k
    buffer_data_t stream_data;
947
948
7.43k
    jsonp_error_init(error, "<buffer>");
949
950
7.43k
    if (buffer == NULL) {
951
0
        error_set(error, NULL, json_error_invalid_argument, "wrong arguments");
952
0
        return NULL;
953
0
    }
954
955
7.43k
    stream_data.data = buffer;
956
7.43k
    stream_data.pos = 0;
957
7.43k
    stream_data.len = buflen;
958
959
7.43k
    if (lex_init(&lex, buffer_get, flags, (void *)&stream_data))
960
0
        return NULL;
961
962
7.43k
    result = parse_json(&lex, flags, error);
963
964
7.43k
    lex_close(&lex);
965
7.43k
    return result;
966
7.43k
}
967
968
0
json_t *json_loadf(FILE *input, size_t flags, json_error_t *error) {
969
0
    lex_t lex;
970
0
    const char *source;
971
0
    json_t *result;
972
973
0
    if (input == stdin)
974
0
        source = "<stdin>";
975
0
    else
976
0
        source = "<stream>";
977
978
0
    jsonp_error_init(error, source);
979
980
0
    if (input == NULL) {
981
0
        error_set(error, NULL, json_error_invalid_argument, "wrong arguments");
982
0
        return NULL;
983
0
    }
984
985
0
    if (lex_init(&lex, (get_func)fgetc, flags, input))
986
0
        return NULL;
987
988
0
    result = parse_json(&lex, flags, error);
989
990
0
    lex_close(&lex);
991
0
    return result;
992
0
}
993
994
0
static int fd_get_func(int *fd) {
995
0
#ifdef HAVE_UNISTD_H
996
0
    uint8_t c;
997
0
    if (read(*fd, &c, 1) == 1)
998
0
        return c;
999
0
#endif
1000
0
    return EOF;
1001
0
}
1002
1003
0
json_t *json_loadfd(int input, size_t flags, json_error_t *error) {
1004
0
    lex_t lex;
1005
0
    const char *source;
1006
0
    json_t *result;
1007
1008
0
#ifdef HAVE_UNISTD_H
1009
0
    if (input == STDIN_FILENO)
1010
0
        source = "<stdin>";
1011
0
    else
1012
0
#endif
1013
0
        source = "<stream>";
1014
1015
0
    jsonp_error_init(error, source);
1016
1017
0
    if (input < 0) {
1018
0
        error_set(error, NULL, json_error_invalid_argument, "wrong arguments");
1019
0
        return NULL;
1020
0
    }
1021
1022
0
    if (lex_init(&lex, (get_func)fd_get_func, flags, &input))
1023
0
        return NULL;
1024
1025
0
    result = parse_json(&lex, flags, error);
1026
1027
0
    lex_close(&lex);
1028
0
    return result;
1029
0
}
1030
1031
0
json_t *json_load_file(const char *path, size_t flags, json_error_t *error) {
1032
0
    json_t *result;
1033
0
    FILE *fp;
1034
1035
0
    jsonp_error_init(error, path);
1036
1037
0
    if (path == NULL) {
1038
0
        error_set(error, NULL, json_error_invalid_argument, "wrong arguments");
1039
0
        return NULL;
1040
0
    }
1041
1042
0
    fp = fopen(path, "rb");
1043
0
    if (!fp) {
1044
0
        error_set(error, NULL, json_error_cannot_open_file, "unable to open %s: %s", path,
1045
0
                  strerror(errno));
1046
0
        return NULL;
1047
0
    }
1048
1049
0
    result = json_loadf(fp, flags, error);
1050
1051
0
    fclose(fp);
1052
0
    return result;
1053
0
}
1054
1055
0
#define MAX_BUF_LEN 1024
1056
1057
typedef struct {
1058
    char data[MAX_BUF_LEN];
1059
    size_t len;
1060
    size_t pos;
1061
    json_load_callback_t callback;
1062
    void *arg;
1063
} callback_data_t;
1064
1065
0
static int callback_get(void *data) {
1066
0
    char c;
1067
0
    callback_data_t *stream = data;
1068
1069
0
    if (stream->pos >= stream->len) {
1070
0
        stream->pos = 0;
1071
0
        stream->len = stream->callback(stream->data, MAX_BUF_LEN, stream->arg);
1072
0
        if (stream->len == 0 || stream->len == (size_t)-1)
1073
0
            return EOF;
1074
0
    }
1075
1076
0
    c = stream->data[stream->pos];
1077
0
    stream->pos++;
1078
0
    return (unsigned char)c;
1079
0
}
1080
1081
json_t *json_load_callback(json_load_callback_t callback, void *arg, size_t flags,
1082
0
                           json_error_t *error) {
1083
0
    lex_t lex;
1084
0
    json_t *result;
1085
1086
0
    callback_data_t stream_data;
1087
1088
0
    memset(&stream_data, 0, sizeof(stream_data));
1089
0
    stream_data.callback = callback;
1090
0
    stream_data.arg = arg;
1091
1092
0
    jsonp_error_init(error, "<callback>");
1093
1094
0
    if (callback == NULL) {
1095
0
        error_set(error, NULL, json_error_invalid_argument, "wrong arguments");
1096
0
        return NULL;
1097
0
    }
1098
1099
0
    if (lex_init(&lex, (get_func)callback_get, flags, &stream_data))
1100
0
        return NULL;
1101
1102
0
    result = parse_json(&lex, flags, error);
1103
1104
0
    lex_close(&lex);
1105
0
    return result;
1106
0
}