Coverage Report

Created: 2025-07-01 06:58

/src/tarantool/third_party/lua-cjson/lua_cjson.c
Line
Count
Source (jump to first uncovered line)
1
/* Lua CJSON - JSON support for Lua
2
 *
3
 * Copyright (c) 2010-2012  Mark Pulford <mark@kyne.com.au>
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining
6
 * a copy of this software and associated documentation files (the
7
 * "Software"), to deal in the Software without restriction, including
8
 * without limitation the rights to use, copy, modify, merge, publish,
9
 * distribute, sublicense, and/or sell copies of the Software, and to
10
 * permit persons to whom the Software is furnished to do so, subject to
11
 * the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be
14
 * included in all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
 */
24
25
/* Caveats:
26
 * - JSON "null" values are represented as lightuserdata since Lua
27
 *   tables cannot contain "nil". Compare with cjson.null.
28
 * - Invalid UTF-8 characters are not detected and will be passed
29
 *   untouched. If required, UTF-8 error checking should be done
30
 *   outside this library.
31
 * - Javascript comments are not part of the JSON spec, and are not
32
 *   currently supported.
33
 *
34
 * Note: Decoding is slower than encoding. Lua spends significant
35
 *       time (30%) managing tables when parsing JSON since it is
36
 *       difficult to know object/array sizes ahead of time.
37
 */
38
39
#include "trivia/util.h"
40
41
#include <assert.h>
42
#include <string.h>
43
#include <math.h>
44
#include <limits.h>
45
#include <lua.h>
46
#include <lauxlib.h>
47
48
#include "strbuf.h"
49
50
#include "lua/utils.h"
51
#include "lua/serializer.h"
52
#include "mp_extension_types.h" /* MP_DECIMAL, MP_UUID */
53
#include "diag.h"
54
#include "tt_static.h"
55
#include "core/datetime.h"
56
#include "cord_buf.h"
57
#include "tt_uuid.h" /* tt_uuid_to_string(), UUID_STR_LEN */
58
59
typedef enum {
60
    T_OBJ_BEGIN,
61
    T_OBJ_END,
62
    T_ARR_BEGIN,
63
    T_ARR_END,
64
    T_STRING,
65
    T_UINT,
66
    T_INT,
67
    T_NUMBER,
68
    T_BOOLEAN,
69
    T_NULL,
70
    T_COLON,
71
    T_COMMA,
72
    T_END,
73
    T_WHITESPACE,
74
    T_LINEFEED,
75
    T_ERROR,
76
    T_UNKNOWN
77
} json_token_type_t;
78
79
static const char *json_token_type_name[] = {
80
    "'{'",
81
    "'}'",
82
    "'['",
83
    "']'",
84
    "string",
85
    "unsigned int",
86
    "int",
87
    "number",
88
    "boolean",
89
    "null",
90
    "colon",
91
    "comma",
92
    "end",
93
    "whitespace",
94
    "line feed",
95
    "error",
96
    "unknown symbol",
97
    NULL
98
};
99
100
static struct luaL_serializer *luaL_json_default;
101
102
static json_token_type_t ch2token[256];
103
static char escape2char[256];  /* Decoding */
104
105
typedef struct {
106
    const char *data;
107
    const char *ptr;
108
    strbuf_t *tmp;    /* Temporary storage for strings */
109
    struct luaL_serializer *cfg;
110
    int current_depth;
111
    int line_count;
112
    const char *cur_line_ptr;
113
} json_parse_t;
114
115
typedef struct {
116
    json_token_type_t type;
117
    int column_index;
118
    union {
119
        const char *string;
120
        double number;
121
        int boolean;
122
    long long ival;
123
    } value;
124
    int string_len;
125
} json_token_t;
126
127
#if 0
128
static int json_destroy_config(lua_State *l)
129
{
130
    struct luaL_serializer *cfg;
131
132
    cfg = lua_touserdata(l, 1);
133
    if (cfg)
134
    strbuf_free(&encode_buf);
135
    cfg = NULL;
136
137
    return 0;
138
}
139
#endif
140
141
static void json_create_tokens()
142
0
{
143
0
    int i;
144
145
    /* Decoding init */
146
147
    /* Tag all characters as an error */
148
0
    for (i = 0; i < 256; i++)
149
0
    ch2token[i] = T_ERROR;
150
151
    /* Set tokens that require no further processing */
152
0
    ch2token['{'] = T_OBJ_BEGIN;
153
0
    ch2token['}'] = T_OBJ_END;
154
0
    ch2token['['] = T_ARR_BEGIN;
155
0
    ch2token[']'] = T_ARR_END;
156
0
    ch2token[','] = T_COMMA;
157
0
    ch2token[':'] = T_COLON;
158
0
    ch2token['\0'] = T_END;
159
0
    ch2token[' '] = T_WHITESPACE;
160
0
    ch2token['\t'] = T_WHITESPACE;
161
0
    ch2token['\n'] = T_LINEFEED;
162
0
    ch2token['\r'] = T_WHITESPACE;
163
164
    /* Update characters that require further processing */
165
0
    ch2token['f'] = T_UNKNOWN;     /* false? */
166
0
    ch2token['i'] = T_UNKNOWN;     /* inf, ininity? */
167
0
    ch2token['I'] = T_UNKNOWN;
168
0
    ch2token['n'] = T_UNKNOWN;     /* null, nan? */
169
0
    ch2token['N'] = T_UNKNOWN;
170
0
    ch2token['t'] = T_UNKNOWN;     /* true? */
171
0
    ch2token['"'] = T_UNKNOWN;     /* string? */
172
0
    ch2token['+'] = T_UNKNOWN;     /* number? */
173
0
    ch2token['-'] = T_UNKNOWN;
174
0
    for (i = 0; i < 10; i++)
175
0
    ch2token['0' + i] = T_UNKNOWN;
176
177
    /* Lookup table for parsing escape characters */
178
0
    for (i = 0; i < 256; i++)
179
0
    escape2char[i] = 0;          /* String error */
180
0
    escape2char['"'] = '"';
181
0
    escape2char['\\'] = '\\';
182
0
    escape2char['/'] = '/';
183
0
    escape2char['b'] = '\b';
184
0
    escape2char['t'] = '\t';
185
0
    escape2char['n'] = '\n';
186
0
    escape2char['f'] = '\f';
187
0
    escape2char['r'] = '\r';
188
0
    escape2char['u'] = 'u';          /* Unicode parsing required */
189
0
}
190
191
/* ===== ENCODING ===== */
192
193
/* json_append_string args:
194
 * - lua_State
195
 * - JSON strbuf
196
 * - String (Lua stack index)
197
 *
198
 * Returns nothing. Doesn't remove string from Lua stack */
199
static void json_append_string(struct luaL_serializer *cfg, strbuf_t *json,
200
                   const char *str, size_t len)
201
0
{
202
0
    (void) cfg;
203
0
    const char *escstr;
204
0
    size_t i;
205
206
    /* Worst case is len * 6 (all unicode escapes).
207
     * This buffer is reused constantly for small strings
208
     * If there are any excess pages, they won't be hit anyway.
209
     * This gains ~5% speedup. */
210
0
    strbuf_ensure_empty_length(json, len * 6 + 2);
211
212
0
    strbuf_append_char_unsafe(json, '\"');
213
0
    for (i = 0; i < len; i++) {
214
0
        escstr = json_escape_char(str[i]);
215
0
        if (escstr)
216
0
            strbuf_append_string(json, escstr);
217
0
        else
218
0
            strbuf_append_char_unsafe(json, str[i]);
219
0
    }
220
0
    strbuf_append_char_unsafe(json, '\"');
221
0
}
222
223
static void json_append_data(lua_State *l, struct luaL_serializer *cfg,
224
                             int current_depth, strbuf_t *json);
225
226
/* json_append_array args:
227
 * - lua_State
228
 * - JSON strbuf
229
 * - Size of passwd Lua array (top of stack) */
230
static void json_append_array(lua_State *l, struct luaL_serializer *cfg,
231
                  int current_depth, strbuf_t *json,
232
                  int array_length)
233
0
{
234
0
    int comma, i;
235
236
0
    strbuf_append_char(json, '[');
237
238
0
    comma = 0;
239
0
    for (i = 1; i <= array_length; i++) {
240
0
        if (comma)
241
0
            strbuf_append_char(json, ',');
242
0
        else
243
0
            comma = 1;
244
245
0
        lua_rawgeti(l, -1, i);
246
0
        json_append_data(l, cfg, current_depth, json);
247
0
        lua_pop(l, 1);
248
0
    }
249
250
0
    strbuf_append_char(json, ']');
251
0
}
252
253
static void json_append_uint(struct luaL_serializer *cfg, strbuf_t *json,
254
                 uint64_t num)
255
0
{
256
0
    (void) cfg;
257
0
    enum { INT_BUFSIZE = 22 };
258
0
    strbuf_ensure_empty_length(json, INT_BUFSIZE);
259
0
    int len = snprintf(strbuf_empty_ptr(json), INT_BUFSIZE, "%llu",
260
0
               (unsigned long long) num);
261
0
    strbuf_extend_length(json, len);
262
0
}
263
264
static void json_append_int(struct luaL_serializer *cfg, strbuf_t *json,
265
               int64_t num)
266
0
{
267
0
    (void) cfg;
268
0
    enum {INT_BUFSIZE = 22 };
269
0
    strbuf_ensure_empty_length(json, INT_BUFSIZE);
270
0
    int len = snprintf(strbuf_empty_ptr(json), INT_BUFSIZE, "%lld",
271
0
               (long long) num);
272
0
    strbuf_extend_length(json, len);
273
0
}
274
275
static void json_append_nil(struct luaL_serializer *cfg, strbuf_t *json)
276
0
{
277
0
    (void) cfg;
278
0
    strbuf_append_mem(json, "null", 4);
279
0
}
280
281
static void json_append_number(struct luaL_serializer *cfg, strbuf_t *json,
282
                   lua_Number num)
283
0
{
284
0
    if (isnan(num)) {
285
0
    strbuf_append_mem(json, "nan", 3);
286
0
    return;
287
0
    }
288
289
0
    int len;
290
0
    strbuf_ensure_empty_length(json, FPCONV_G_FMT_BUFSIZE);
291
0
    len = fpconv_g_fmt(strbuf_empty_ptr(json), num, cfg->encode_number_precision);
292
0
    strbuf_extend_length(json, len);
293
0
}
294
295
static void json_append_object(lua_State *l, struct luaL_serializer *cfg,
296
                               int current_depth, strbuf_t *json)
297
0
{
298
0
    int comma = 0;
299
300
    /* Object */
301
0
    strbuf_append_char(json, '{');
302
303
    /* const table */
304
0
    lua_pushnil(l);
305
0
    lua_pushnil(l);
306
    /* const table, key_idx(nil), key(nil) */
307
308
0
    while (luaL_next_field(l, cfg) != 0) {
309
        /* Skip nil values */
310
0
        if (lua_isnil(l, -1)) {
311
            /* const table, (table ?), key_idx, key, value */
312
0
            lua_pop(l, 1);
313
            /* const table, (table ?), key_idx, key */
314
0
            continue;
315
0
        }
316
317
0
        if (comma)
318
0
            strbuf_append_char(json, ',');
319
0
        else
320
0
            comma = 1;
321
322
0
        struct luaL_field field;
323
0
        luaL_checkfield(l, cfg, -2, &field);
324
0
        switch (field.type) {
325
0
        case MP_UINT:
326
0
            strbuf_append_char(json, '"');
327
0
            json_append_uint(cfg, json, field.ival);
328
0
            strbuf_append_mem(json, "\":", 2);
329
0
            break;
330
0
        case MP_INT:
331
0
            strbuf_append_char(json, '"');
332
0
            json_append_int(cfg, json, field.ival);
333
0
            strbuf_append_mem(json, "\":", 2);
334
0
            break;
335
0
        case MP_STR:
336
0
            json_append_string(cfg, json, field.sval.data, field.sval.len);
337
0
            strbuf_append_char(json, ':');
338
0
            break;
339
0
        default:
340
0
            luaL_error(l, "table key must be a number or string");
341
0
            break;
342
0
        }
343
344
        /* const table, (table ?), key_idx, key, value */
345
0
        json_append_data(l, cfg, current_depth, json);
346
0
        lua_pop(l, 1);
347
        /* const table, (table ?), key_idx, key */
348
0
    }
349
    /* const table */
350
351
0
    strbuf_append_char(json, '}');
352
0
}
353
354
/* Serialise Lua data into JSON string. */
355
static void json_append_data(lua_State *l, struct luaL_serializer *cfg,
356
                             int current_depth, strbuf_t *json)
357
0
{
358
0
    struct luaL_field field;
359
0
    luaL_checkfield(l, cfg, -1, &field);
360
0
    switch (field.type) {
361
0
    case MP_UINT:
362
0
        return json_append_uint(cfg, json, field.ival);
363
0
    case MP_STR:
364
0
    case MP_BIN:
365
0
        return json_append_string(cfg, json, field.sval.data, field.sval.len);
366
0
    case MP_INT:
367
0
        return json_append_int(cfg, json, field.ival);
368
0
    case MP_FLOAT:
369
0
        return json_append_number(cfg, json, field.fval);
370
0
    case MP_DOUBLE:
371
0
        return json_append_number(cfg, json, field.dval);
372
0
    case MP_BOOL:
373
0
    if (field.bval)
374
0
        strbuf_append_mem(json, "true", 4);
375
0
    else
376
0
        strbuf_append_mem(json, "false", 5);
377
0
    break;
378
0
    case MP_NIL:
379
0
    json_append_nil(cfg, json);
380
0
    break;
381
0
    case MP_MAP:
382
0
    if (current_depth >= cfg->encode_max_depth) {
383
0
        if (! cfg->encode_deep_as_nil)
384
0
            luaL_error(l, "Too high nest level");
385
0
        return json_append_nil(cfg, json); /* Limit nested maps */
386
0
    }
387
0
    json_append_object(l, cfg, current_depth + 1, json);
388
0
    return;
389
0
    case MP_ARRAY:
390
    /* Array */
391
0
    if (current_depth >= cfg->encode_max_depth) {
392
0
        if (! cfg->encode_deep_as_nil)
393
0
            luaL_error(l, "Too high nest level");
394
0
        return json_append_nil(cfg, json); /* Limit nested arrays */
395
0
    }
396
0
    json_append_array(l, cfg, current_depth + 1, json, field.size);
397
0
    return;
398
0
    case MP_EXT:
399
0
        switch (field.ext_type) {
400
0
        case MP_DECIMAL:
401
0
        {
402
0
            const char *str = decimal_str(field.decval);
403
0
            return json_append_string(cfg, json, str, strlen(str));
404
0
        }
405
0
        case MP_UUID:
406
0
            return json_append_string(cfg, json, tt_uuid_str(field.uuidval),
407
0
                                      UUID_STR_LEN);
408
0
        case MP_ERROR:
409
0
        {
410
0
            const char *str = field.errorval->errmsg;
411
0
            return json_append_string(cfg, json, str, strlen(str));
412
0
        }
413
0
        case MP_DATETIME:
414
0
        {
415
0
            char buf[DT_TO_STRING_BUFSIZE];
416
0
            size_t sz = datetime_to_string(field.dateval, buf, sizeof(buf));
417
0
            return json_append_string(cfg, json, buf, sz);
418
0
        }
419
0
        case MP_INTERVAL:
420
0
        {
421
0
            char buf[DT_IVAL_TO_STRING_BUFSIZE];
422
0
            size_t sz = interval_to_string(field.interval, buf, sizeof(buf));
423
0
            return json_append_string(cfg, json, buf, sz);
424
0
        }
425
0
        default:
426
0
            assert(false);
427
0
        }
428
0
    }
429
0
}
430
431
0
static int json_encode(lua_State *l) {
432
0
    luaL_argcheck(l, lua_gettop(l) == 2 || lua_gettop(l) == 1, 1,
433
0
                  "expected 1 or 2 arguments");
434
435
    /* Reuse existing buffer. */
436
0
    strbuf_t encode_buf;
437
0
    struct ibuf *ibuf = cord_ibuf_take();
438
0
    strbuf_create(&encode_buf, STRBUF_DEFAULT_SIZE, ibuf);
439
0
    struct luaL_serializer *cfg = luaL_checkserializer(l);
440
441
0
    if (lua_gettop(l) == 2) {
442
        /*
443
         * user_cfg is per-call local version of serializer instance
444
         * options: it is used if a user passes custom options to
445
         * :encode() method within a separate argument. In this case
446
         * it is required to avoid modifying options of the instance.
447
         * Life span of user_cfg is restricted by the scope of
448
         * :encode() so it is ok to push it onto the Lua stack to be collected
449
         * by the garbage collector.
450
         */
451
0
        struct luaL_serializer *user_cfg = luaL_newserializer_config(l);
452
0
        lua_insert(l, -3);
453
0
        luaL_serializer_copy_options(user_cfg, cfg);
454
0
        luaL_serializer_parse_options(l, user_cfg);
455
0
        lua_pop(l, 1);
456
0
        json_append_data(l, user_cfg, 0, &encode_buf);
457
0
    } else {
458
0
        json_append_data(l, cfg, 0, &encode_buf);
459
0
    }
460
461
0
    char *json = strbuf_string(&encode_buf, NULL);
462
0
    lua_pushlstring(l, json, strbuf_length(&encode_buf));
463
    /*
464
     * Even if an exception is raised above, it is fine to skip the buffer
465
     * destruction. The strbuf object destructor does not free anything, and
466
     * the cord_ibuf object is freed automatically on a next yield.
467
     */
468
0
    strbuf_destroy(&encode_buf);
469
0
    cord_ibuf_put(ibuf);
470
0
    return 1;
471
0
}
472
473
/* ===== DECODING ===== */
474
475
static void json_process_value(lua_State *l, json_parse_t *json,
476
                               json_token_t *token);
477
478
static int hexdigit2int(char hex)
479
0
{
480
0
    if ('0' <= hex  && hex <= '9')
481
0
        return hex - '0';
482
483
    /* Force lowercase */
484
0
    hex |= 0x20;
485
0
    if ('a' <= hex && hex <= 'f')
486
0
        return 10 + hex - 'a';
487
488
0
    return -1;
489
0
}
490
491
static int decode_hex4(const char *hex)
492
0
{
493
0
    int digit[4];
494
0
    int i;
495
496
    /* Convert ASCII hex digit to numeric digit
497
     * Note: this returns an error for invalid hex digits, including
498
     *       NULL */
499
0
    for (i = 0; i < 4; i++) {
500
0
        digit[i] = hexdigit2int(hex[i]);
501
0
        if (digit[i] < 0) {
502
0
            return -1;
503
0
        }
504
0
    }
505
506
0
    return (digit[0] << 12) +
507
0
           (digit[1] << 8) +
508
0
           (digit[2] << 4) +
509
0
            digit[3];
510
0
}
511
512
/* Converts a Unicode codepoint to UTF-8.
513
 * Returns UTF-8 string length, and up to 4 bytes in *utf8 */
514
static int codepoint_to_utf8(char *utf8, int codepoint)
515
0
{
516
    /* 0xxxxxxx */
517
0
    if (codepoint <= 0x7F) {
518
0
        utf8[0] = codepoint;
519
0
        return 1;
520
0
    }
521
522
    /* 110xxxxx 10xxxxxx */
523
0
    if (codepoint <= 0x7FF) {
524
0
        utf8[0] = (codepoint >> 6) | 0xC0;
525
0
        utf8[1] = (codepoint & 0x3F) | 0x80;
526
0
        return 2;
527
0
    }
528
529
    /* 1110xxxx 10xxxxxx 10xxxxxx */
530
0
    if (codepoint <= 0xFFFF) {
531
0
        utf8[0] = (codepoint >> 12) | 0xE0;
532
0
        utf8[1] = ((codepoint >> 6) & 0x3F) | 0x80;
533
0
        utf8[2] = (codepoint & 0x3F) | 0x80;
534
0
        return 3;
535
0
    }
536
537
    /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
538
0
    if (codepoint <= 0x1FFFFF) {
539
0
        utf8[0] = (codepoint >> 18) | 0xF0;
540
0
        utf8[1] = ((codepoint >> 12) & 0x3F) | 0x80;
541
0
        utf8[2] = ((codepoint >> 6) & 0x3F) | 0x80;
542
0
        utf8[3] = (codepoint & 0x3F) | 0x80;
543
0
        return 4;
544
0
    }
545
546
0
    return 0;
547
0
}
548
549
550
/* Called when index pointing to beginning of UTF-16 code escape: \uXXXX
551
 * \u is guaranteed to exist, but the remaining hex characters may be
552
 * missing.
553
 * Translate to UTF-8 and append to temporary token string.
554
 * Must advance index to the next character to be processed.
555
 * Returns: 0   success
556
 *          -1  error
557
 */
558
static int json_append_unicode_escape(json_parse_t *json)
559
0
{
560
0
    char utf8[4];       /* Surrogate pairs require 4 UTF-8 bytes */
561
0
    int codepoint;
562
0
    int surrogate_low;
563
0
    int len;
564
0
    int escape_len = 6;
565
566
    /* Fetch UTF-16 code unit */
567
0
    codepoint = decode_hex4(json->ptr + 2);
568
0
    if (codepoint < 0)
569
0
        return -1;
570
571
    /* UTF-16 surrogate pairs take the following 2 byte form:
572
     *      11011 x yyyyyyyyyy
573
     * When x = 0: y is the high 10 bits of the codepoint
574
     *      x = 1: y is the low 10 bits of the codepoint
575
     *
576
     * Check for a surrogate pair (high or low) */
577
0
    if ((codepoint & 0xF800) == 0xD800) {
578
        /* Error if the 1st surrogate is not high */
579
0
        if (codepoint & 0x400)
580
0
            return -1;
581
582
        /* Ensure the next code is a unicode escape */
583
0
        if (*(json->ptr + escape_len) != '\\' ||
584
0
            *(json->ptr + escape_len + 1) != 'u') {
585
0
            return -1;
586
0
        }
587
588
        /* Fetch the next codepoint */
589
0
        surrogate_low = decode_hex4(json->ptr + 2 + escape_len);
590
0
        if (surrogate_low < 0)
591
0
            return -1;
592
593
        /* Error if the 2nd code is not a low surrogate */
594
0
        if ((surrogate_low & 0xFC00) != 0xDC00)
595
0
            return -1;
596
597
        /* Calculate Unicode codepoint */
598
0
        codepoint = (codepoint & 0x3FF) << 10;
599
0
        surrogate_low &= 0x3FF;
600
0
        codepoint = (codepoint | surrogate_low) + 0x10000;
601
0
        escape_len = 12;
602
0
    }
603
604
    /* Convert codepoint to UTF-8 */
605
0
    len = codepoint_to_utf8(utf8, codepoint);
606
0
    if (!len)
607
0
        return -1;
608
609
    /* Append bytes and advance parse index */
610
0
    strbuf_append_mem_unsafe(json->tmp, utf8, len);
611
0
    json->ptr += escape_len;
612
613
0
    return 0;
614
0
}
615
616
static void json_set_token_error(json_token_t *token, json_parse_t *json,
617
                                 const char *errtype)
618
0
{
619
0
    token->type = T_ERROR;
620
0
    token->column_index = json->ptr - json->cur_line_ptr;
621
0
    token->value.string = errtype;
622
0
}
623
624
static void json_next_string_token(json_parse_t *json, json_token_t *token)
625
0
{
626
0
    char ch;
627
628
    /* Caller must ensure a string is next */
629
0
    assert(*json->ptr == '"');
630
631
    /* Skip " */
632
0
    json->ptr++;
633
634
    /* json->tmp is the temporary strbuf used to accumulate the
635
     * decoded string value.
636
     * json->tmp is sized to handle JSON containing only a string value.
637
     */
638
0
    strbuf_reset(json->tmp);
639
640
0
    while ((ch = *json->ptr) != '"') {
641
0
        if (!ch) {
642
            /* Premature end of the string */
643
0
            json_set_token_error(token, json, "unexpected end of string");
644
0
            return;
645
0
        }
646
647
        /* Handle escapes */
648
0
        if (ch == '\\') {
649
            /* Fetch escape character */
650
0
            ch = *(json->ptr + 1);
651
652
            /* Translate escape code and append to tmp string */
653
0
            ch = escape2char[(unsigned char)ch];
654
0
            if (ch == 'u') {
655
0
                if (json_append_unicode_escape(json) == 0)
656
0
                    continue;
657
658
0
                json_set_token_error(token, json,
659
0
                                     "invalid unicode escape code");
660
0
                return;
661
0
            }
662
0
            if (!ch) {
663
0
                json_set_token_error(token, json, "invalid escape code");
664
0
                return;
665
0
            }
666
667
            /* Skip '\' */
668
0
            json->ptr++;
669
0
        }
670
        /* Append normal character or translated single character
671
         * Unicode escapes are handled above */
672
0
        strbuf_append_char_unsafe(json->tmp, ch);
673
0
        json->ptr++;
674
0
    }
675
0
    json->ptr++;    /* Eat final quote (") */
676
677
0
    strbuf_ensure_null(json->tmp);
678
679
0
    token->type = T_STRING;
680
0
    token->value.string = strbuf_string(json->tmp, &token->string_len);
681
0
}
682
683
/* JSON numbers should take the following form:
684
 *      -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
685
 *
686
 * json_next_number_token() uses strtod() which allows other forms:
687
 * - numbers starting with '+'
688
 * - NaN, -NaN, infinity, -infinity
689
 * - hexadecimal numbers
690
 * - numbers with leading zeros
691
 *
692
 * json_is_invalid_number() detects "numbers" which may pass strtod()'s
693
 * error checking, but should not be allowed with strict JSON.
694
 *
695
 * json_is_invalid_number() may pass numbers which cause strtod()
696
 * to generate an error.
697
 */
698
static int json_is_invalid_number(json_parse_t *json)
699
0
{
700
0
    const char *p = json->ptr;
701
702
    /* Reject numbers starting with + */
703
0
    if (*p == '+')
704
0
        return 1;
705
706
    /* Skip minus sign if it exists */
707
0
    if (*p == '-')
708
0
        p++;
709
710
    /* Reject numbers starting with 0x, or leading zeros */
711
0
    if (*p == '0') {
712
0
        int ch2 = *(p + 1);
713
714
0
        if ((ch2 | 0x20) == 'x' ||          /* Hex */
715
0
            ('0' <= ch2 && ch2 <= '9'))     /* Leading zero */
716
0
            return 1;
717
718
0
        return 0;
719
0
    } else if (*p < '0' || *p > '9') {
720
0
        return 1;
721
0
    }
722
723
0
    return 0;
724
0
}
725
726
static void json_next_number_token(json_parse_t *json, json_token_t *token)
727
0
{
728
0
    char *endptr;
729
730
731
0
    token->type = T_INT;
732
0
    token->value.ival = strtoll(json->ptr, &endptr, 10);
733
0
    if (token->value.ival == LLONG_MAX) {
734
0
        token->type = T_UINT;
735
0
        token->value.ival = strtoull(json->ptr, &endptr, 10);
736
0
    }
737
0
    if (*endptr == '.' || *endptr == 'e' || *endptr == 'E') {
738
0
        token->type = T_NUMBER;
739
0
        token->value.number = fpconv_strtod(json->ptr, &endptr);
740
0
    }
741
742
0
    if (json->ptr == endptr)
743
0
        json_set_token_error(token, json, "invalid number");
744
0
    else
745
0
        json->ptr = endptr;     /* Skip the processed number */
746
0
}
747
748
/* Fills in the token struct.
749
 * T_STRING will return a pointer to the json_parse_t temporary string
750
 * T_ERROR will leave the json->ptr pointer at the error.
751
 */
752
static void json_next_token(json_parse_t *json, json_token_t *token)
753
0
{
754
0
    int ch;
755
756
    /* Eat whitespace. */
757
0
    while (1) {
758
0
        ch = (unsigned char)*(json->ptr);
759
0
        token->type = ch2token[ch];
760
0
        if (token->type == T_LINEFEED) {
761
0
            json->line_count++;
762
0
            json->cur_line_ptr = json->ptr + 1;
763
0
        } else if (token->type != T_WHITESPACE) {
764
0
            break;
765
0
        }
766
0
        json->ptr++;
767
0
    }
768
769
    /* Store location of new token. Required when throwing errors
770
     * for unexpected tokens (syntax errors). */
771
0
    token->column_index = json->ptr - json->cur_line_ptr;
772
773
    /* Don't advance the pointer for an error or the end */
774
0
    if (token->type == T_ERROR) {
775
0
        json_set_token_error(token, json, "invalid token");
776
0
        return;
777
0
    }
778
779
0
    if (token->type == T_END) {
780
0
        return;
781
0
    }
782
783
    /* Found a known single character token, advance index and return */
784
0
    if (token->type != T_UNKNOWN) {
785
0
        json->ptr++;
786
0
        return;
787
0
    }
788
789
    /* Process characters which triggered T_UNKNOWN
790
     *
791
     * Must use strncmp() to match the front of the JSON string.
792
     * JSON identifier must be lowercase.
793
     * When strict_numbers if disabled, either case is allowed for
794
     * Infinity/NaN (since we are no longer following the spec..) */
795
0
    if (ch == '"') {
796
0
        json_next_string_token(json, token);
797
0
        return;
798
0
    } else if (!json_is_invalid_number(json)) {
799
0
        json_next_number_token(json, token);
800
0
        return;
801
0
    } else if (!strncmp(json->ptr, "true", 4)) {
802
0
        token->type = T_BOOLEAN;
803
0
        token->value.boolean = 1;
804
0
        json->ptr += 4;
805
0
        return;
806
0
    } else if (!strncmp(json->ptr, "false", 5)) {
807
0
        token->type = T_BOOLEAN;
808
0
        token->value.boolean = 0;
809
0
        json->ptr += 5;
810
0
        return;
811
0
    } else if (!strncmp(json->ptr, "null", 4)) {
812
0
        token->type = T_NULL;
813
0
        json->ptr += 4;
814
0
        return;
815
0
    } else if (json->cfg->decode_invalid_numbers) {
816
        /*
817
         * RFC4627: Numeric values that cannot be represented as sequences of
818
         * digits (such as Infinity and NaN) are not permitted.
819
         */
820
0
        if (!strncmp(json->ptr, "inf", 3)) {
821
0
            token->type = T_NUMBER;
822
0
            token->value.number = INFINITY;
823
0
            json->ptr += 3;
824
0
            return;
825
0
        } else if (!strncmp(json->ptr, "-inf", 4)) {
826
0
            token->type = T_NUMBER;
827
0
            token->value.number = -INFINITY;
828
0
            json->ptr += 4;
829
0
            return;
830
0
        } else if (!strncmp(json->ptr, "nan", 3) ||
831
0
                   !strncmp(json->ptr, "-nan", 3)) {
832
0
            token->type = T_NUMBER;
833
0
            token->value.number = NAN;
834
0
            json->ptr += (*json->ptr == '-' ? 4 : 3);
835
0
            return;
836
0
        }
837
0
    }
838
839
    /* Token starts with t/f/n but isn't recognised above. */
840
0
    json_set_token_error(token, json, "invalid token");
841
0
}
842
843
enum err_context_length {
844
    ERR_CONTEXT_ARROW_LENGTH = 4,
845
    ERR_CONTEXT_MAX_LENGTH_BEFORE = 8,
846
    ERR_CONTEXT_MAX_LENGTH_AFTER = 8,
847
    ERR_CONTEXT_MAX_LENGTH = ERR_CONTEXT_MAX_LENGTH_BEFORE +
848
    ERR_CONTEXT_MAX_LENGTH_AFTER + ERR_CONTEXT_ARROW_LENGTH,
849
};
850
851
/**
852
 * Copy characters near wrong token with the position @a
853
 * column_index to a static string buffer @a err_context and lay
854
 * out arrow " >> " before this token.
855
 *
856
 * @param context      String static buffer to fill.
857
 * @param json         Structure with pointers to parsing string.
858
 * @param column_index Position of wrong token in the current
859
 *                     line.
860
 */
861
static void fill_err_context(char *err_context, json_parse_t *json,
862
                             int column_index)
863
0
{
864
0
    assert(column_index >= 0);
865
0
    int length_before = column_index < ERR_CONTEXT_MAX_LENGTH_BEFORE ?
866
0
                        column_index : ERR_CONTEXT_MAX_LENGTH_BEFORE;
867
0
    const char *src = json->cur_line_ptr + column_index - length_before;
868
    /* Fill error context before the arrow. */
869
0
    memcpy(err_context, src, length_before);
870
0
    err_context += length_before;
871
0
    src += length_before;
872
873
    /* Make the arrow. */
874
0
    *(err_context++) = ' ';
875
0
    memset(err_context, '>', ERR_CONTEXT_ARROW_LENGTH - 2);
876
0
    err_context += ERR_CONTEXT_ARROW_LENGTH - 2;
877
0
    *(err_context++) = ' ';
878
879
    /* Fill error context after the arrow. */
880
0
    const char *end = err_context + ERR_CONTEXT_MAX_LENGTH_AFTER;
881
0
    for (; err_context < end && *src != '\0' && *src != '\n'; ++src,
882
0
         ++err_context)
883
0
        *err_context = *src;
884
0
    *err_context = '\0';
885
0
}
886
887
/* This function does not return.
888
 * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
889
 * The only supported exception is the temporary parser string
890
 * json->tmp struct.
891
 * json and token should exist on the stack somewhere.
892
 * luaL_error() will long_jmp and release the stack */
893
static void json_throw_parse_error(lua_State *l, json_parse_t *json,
894
                                   const char *exp, json_token_t *token)
895
0
{
896
0
    const char *found;
897
0
    struct ibuf *ibuf = json->tmp->ibuf;
898
0
    strbuf_destroy(json->tmp);
899
0
    cord_ibuf_put(ibuf);
900
901
0
    if (token->type == T_ERROR)
902
0
        found = token->value.string;
903
0
    else
904
0
        found = json_token_type_name[token->type];
905
906
0
    char err_context[ERR_CONTEXT_MAX_LENGTH + 1];
907
0
    fill_err_context(err_context, json, token->column_index);
908
909
    /* Note: token->column_index is 0 based, display starting from 1 */
910
0
    luaL_error(l, "Expected %s but found %s on line %d at character %d here "
911
0
               "'%s'", exp, found, json->line_count, token->column_index + 1,
912
0
               err_context);
913
0
}
914
915
static inline void json_decode_ascend(json_parse_t *json)
916
0
{
917
0
    json->current_depth--;
918
0
}
919
920
static void json_decode_descend(lua_State *l, json_parse_t *json, int slots)
921
0
{
922
0
    json->current_depth++;
923
924
0
    if (json->current_depth <= json->cfg->decode_max_depth &&
925
0
        lua_checkstack(l, slots)) {
926
0
        return;
927
0
    }
928
929
0
    char err_context[ERR_CONTEXT_MAX_LENGTH + 1];
930
0
    fill_err_context(err_context, json, json->ptr - json->cur_line_ptr - 1);
931
932
0
    struct ibuf *ibuf = json->tmp->ibuf;
933
0
    strbuf_destroy(json->tmp);
934
0
    cord_ibuf_put(ibuf);
935
0
    luaL_error(l, "Found too many nested data structures (%d) on line %d at cha"
936
0
               "racter %d here '%s'", json->current_depth, json->line_count,
937
0
               json->ptr - json->cur_line_ptr, err_context);
938
0
}
939
940
static void json_parse_object_context(lua_State *l, json_parse_t *json)
941
0
{
942
0
    json_token_t token;
943
944
    /* 3 slots required:
945
     * .., table, key, value */
946
0
    json_decode_descend(l, json, 3);
947
948
0
    lua_newtable(l);
949
0
    if (json->cfg->decode_save_metatables)
950
0
        luaL_setmaphint(l, -1);
951
952
0
    json_next_token(json, &token);
953
954
    /* Handle empty objects */
955
0
    if (token.type == T_OBJ_END) {
956
0
        json_decode_ascend(json);
957
0
        return;
958
0
    }
959
960
0
    while (1) {
961
0
        if (token.type != T_STRING)
962
0
            json_throw_parse_error(l, json, "object key string", &token);
963
964
        /* Push key */
965
0
        lua_pushlstring(l, token.value.string, token.string_len);
966
967
0
        json_next_token(json, &token);
968
0
        if (token.type != T_COLON)
969
0
            json_throw_parse_error(l, json, "colon", &token);
970
971
        /* Fetch value */
972
0
        json_next_token(json, &token);
973
0
        json_process_value(l, json, &token);
974
975
        /* Set key = value */
976
0
        lua_rawset(l, -3);
977
978
0
        json_next_token(json, &token);
979
980
0
        if (token.type == T_OBJ_END) {
981
0
            json_decode_ascend(json);
982
0
            return;
983
0
        }
984
985
0
        if (token.type != T_COMMA)
986
0
            json_throw_parse_error(l, json, "comma or '}'", &token);
987
988
0
        json_next_token(json, &token);
989
0
    }
990
0
}
991
992
/* Handle the array context */
993
static void json_parse_array_context(lua_State *l, json_parse_t *json)
994
0
{
995
0
    json_token_t token;
996
0
    int i;
997
998
    /* 2 slots required:
999
     * .., table, value */
1000
0
    json_decode_descend(l, json, 2);
1001
1002
0
    lua_newtable(l);
1003
0
    if (json->cfg->decode_save_metatables)
1004
0
        luaL_setarrayhint(l, -1);
1005
1006
0
    json_next_token(json, &token);
1007
1008
    /* Handle empty arrays */
1009
0
    if (token.type == T_ARR_END) {
1010
0
        json_decode_ascend(json);
1011
0
        return;
1012
0
    }
1013
1014
0
    for (i = 1; ; i++) {
1015
0
        json_process_value(l, json, &token);
1016
0
        lua_rawseti(l, -2, i);            /* arr[i] = value */
1017
1018
0
        json_next_token(json, &token);
1019
1020
0
        if (token.type == T_ARR_END) {
1021
0
            json_decode_ascend(json);
1022
0
            return;
1023
0
        }
1024
1025
0
        if (token.type != T_COMMA)
1026
0
            json_throw_parse_error(l, json, "comma or ']'", &token);
1027
1028
0
        json_next_token(json, &token);
1029
0
    }
1030
0
}
1031
1032
/* Handle the "value" context */
1033
static void json_process_value(lua_State *l, json_parse_t *json,
1034
                               json_token_t *token)
1035
0
{
1036
0
    switch (token->type) {
1037
0
    case T_STRING:
1038
0
        lua_pushlstring(l, token->value.string, token->string_len);
1039
0
        break;;
1040
0
    case T_UINT:
1041
0
        luaL_pushuint64(l, token->value.ival);
1042
0
        break;;
1043
0
    case T_INT:
1044
0
        luaL_pushint64(l, token->value.ival);
1045
0
        break;;
1046
0
    case T_NUMBER:
1047
0
        luaL_checkfinite(l, json->cfg, token->value.number);
1048
0
        lua_pushnumber(l, token->value.number);
1049
0
        break;;
1050
0
    case T_BOOLEAN:
1051
0
        lua_pushboolean(l, token->value.boolean);
1052
0
        break;;
1053
0
    case T_OBJ_BEGIN:
1054
0
        json_parse_object_context(l, json);
1055
0
        break;;
1056
0
    case T_ARR_BEGIN:
1057
0
        json_parse_array_context(l, json);
1058
0
        break;;
1059
0
    case T_NULL:
1060
0
    luaL_pushnull(l);
1061
0
        break;;
1062
0
    default:
1063
0
        json_throw_parse_error(l, json, "value", token);
1064
0
    }
1065
0
}
1066
1067
static int json_decode(lua_State *l)
1068
0
{
1069
0
    json_parse_t json;
1070
0
    json_token_t token;
1071
0
    size_t json_len;
1072
1073
0
    luaL_argcheck(l, lua_gettop(l) == 2 || lua_gettop(l) == 1, 1,
1074
0
                  "expected 1 or 2 arguments");
1075
1076
0
    struct luaL_serializer *cfg = luaL_checkserializer(l);
1077
1078
0
    json.cfg = cfg;
1079
0
    if (lua_gettop(l) == 2) {
1080
        /*
1081
         * user_cfg is per-call local version of serializer instance
1082
         * options: it is used if a user passes custom options to
1083
         * :decode() method within a separate argument. In this case
1084
         * it is required to avoid modifying options of the instance.
1085
         * Life span of user_cfg is restricted by the scope of
1086
         * :decode() so it is ok to push it onto the Lua stack to be collected
1087
         * by the garbage collector.
1088
         */
1089
0
        struct luaL_serializer *user_cfg = luaL_newserializer_config(l);
1090
0
        lua_insert(l, -3);
1091
0
        luaL_serializer_copy_options(user_cfg, cfg);
1092
0
        luaL_serializer_parse_options(l, user_cfg);
1093
0
        lua_pop(l, 1);
1094
0
        json.cfg = user_cfg;
1095
0
    }
1096
1097
0
    json.data = luaL_checklstring(l, -1, &json_len);
1098
0
    json.current_depth = 0;
1099
0
    json.ptr = json.data;
1100
0
    json.line_count = 1;
1101
0
    json.cur_line_ptr = json.data;
1102
1103
    /* Detect Unicode other than UTF-8 (see RFC 4627, Sec 3)
1104
     *
1105
     * CJSON can support any simple data type, hence only the first
1106
     * character is guaranteed to be ASCII (at worst: '"'). This is
1107
     * still enough to detect whether the wrong encoding is in use. */
1108
0
    if (json_len >= 2 && (!json.data[0] || !json.data[1]))
1109
0
        luaL_error(l, "JSON parser does not support UTF-16 or UTF-32");
1110
1111
    /* Ensure the temporary buffer can hold the entire string.
1112
     * This means we no longer need to do length checks since the decoded
1113
     * string must be smaller than the entire json string */
1114
0
    strbuf_t decode_buf;
1115
0
    json.tmp = &decode_buf;
1116
0
    struct ibuf *ibuf = cord_ibuf_take();
1117
0
    strbuf_create(&decode_buf, json_len, ibuf);
1118
1119
0
    json_next_token(&json, &token);
1120
0
    json_process_value(l, &json, &token);
1121
1122
    /* Ensure there is no more input left */
1123
0
    json_next_token(&json, &token);
1124
1125
0
    if (token.type != T_END)
1126
0
        json_throw_parse_error(l, &json, "the end", &token);
1127
1128
0
    strbuf_destroy(&decode_buf);
1129
0
    cord_ibuf_put(ibuf);
1130
1131
0
    return 1;
1132
0
}
1133
1134
/* ===== INITIALISATION ===== */
1135
1136
static int
1137
json_new(lua_State *L);
1138
1139
static const luaL_Reg jsonlib[] = {
1140
    { "encode", json_encode },
1141
    { "decode", json_decode },
1142
    { "new",    json_new },
1143
    { NULL, NULL}
1144
};
1145
1146
static int
1147
json_new(lua_State *L)
1148
0
{
1149
0
    luaL_newserializer(L, NULL, jsonlib);
1150
0
    return 1;
1151
0
}
1152
1153
int
1154
luaopen_json(lua_State *L)
1155
0
{
1156
0
    json_create_tokens();
1157
0
    luaL_json_default = luaL_newserializer(L, "json", jsonlib);
1158
0
    luaL_pushnull(L);
1159
0
    lua_setfield(L, -2, "null"); /* compatibility with cjson */
1160
0
    return 1;
1161
0
}
1162
1163
/* vi:ai et sw=4 ts=4:
1164
 */