Coverage Report

Created: 2024-02-16 06:12

/src/open62541/deps/cj5.c
Line
Count
Source (jump to first uncovered line)
1
// MIT License
2
//
3
// Copyright (c) 2020 Sepehr Taghdisian
4
// Copyright (c) 2022 Julius Pfrommer
5
//
6
// Permission is hereby granted, free of charge, to any person obtaining a copy
7
// of this software and associated documentation files (the "Software"), to deal
8
// in the Software without restriction, including without limitation the rights
9
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
// copies of the Software, and to permit persons to whom the Software is
11
// furnished to do so, subject to the following conditions:
12
//
13
// The above copyright notice and this permission notice shall be included in all
14
// copies or substantial portions of the Software.
15
//
16
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
// SOFTWARE.
23
24
#include "cj5.h"
25
#include "parse_num.h"
26
27
#include <math.h>
28
#include <float.h>
29
#include <string.h>
30
31
#if defined(_MSC_VER)
32
# define CJ5_INLINE __inline
33
#else
34
# define CJ5_INLINE inline
35
#endif
36
37
/* vs2008 does not have INFINITY and NAN defined */
38
#ifndef INFINITY
39
# define INFINITY ((double)(DBL_MAX+DBL_MAX))
40
#endif
41
#ifndef NAN
42
# define NAN ((double)(INFINITY-INFINITY))
43
#endif
44
45
#if defined(_MSC_VER)
46
# pragma warning(disable: 4056)
47
# pragma warning(disable: 4756)
48
#endif
49
50
/* Max nesting depth of objects and arrays */
51
3.07M
#define CJ5_MAX_NESTING 32
52
53
#define CJ5__FOURCC(_a, _b, _c, _d)                         \
54
    (((uint32_t)(_a) | ((uint32_t)(_b) << 8) |              \
55
      ((uint32_t)(_c) << 16) | ((uint32_t)(_d) << 24)))
56
57
static const uint32_t CJ5__NULL_FOURCC  = CJ5__FOURCC('n', 'u', 'l', 'l');
58
static const uint32_t CJ5__TRUE_FOURCC  = CJ5__FOURCC('t', 'r', 'u', 'e');
59
static const uint32_t CJ5__FALSE_FOURCC = CJ5__FOURCC('f', 'a', 'l', 's');
60
61
typedef struct {
62
    unsigned int pos;
63
    unsigned int line_start;
64
    unsigned int line;
65
    cj5_error_code error;
66
67
    const char *json5;
68
    unsigned int len;
69
70
    unsigned int curr_tok_idx;
71
72
    cj5_token *tokens;
73
    unsigned int token_count;
74
    unsigned int max_tokens;
75
76
    bool stop_early;
77
} cj5__parser;
78
79
static CJ5_INLINE bool
80
174M
cj5__isrange(char ch, char from, char to) {
81
174M
    return (uint8_t)(ch - from) <= (uint8_t)(to - from);
82
174M
}
83
84
97.3M
#define cj5__isupperchar(ch) cj5__isrange(ch, 'A', 'Z')
85
100M
#define cj5__islowerchar(ch) cj5__isrange(ch, 'a', 'z')
86
177M
#define cj5__isnum(ch)       cj5__isrange(ch, '0', '9')
87
88
static cj5_token *
89
40.3M
cj5__alloc_token(cj5__parser *parser) {
90
40.3M
    cj5_token* token = NULL;
91
40.3M
    if(parser->token_count < parser->max_tokens) {
92
20.2M
        token = &parser->tokens[parser->token_count];
93
20.2M
        memset(token, 0x0, sizeof(cj5_token));
94
20.2M
    } else {
95
20.0M
        parser->error = CJ5_ERROR_OVERFLOW;
96
20.0M
    }
97
98
    // Always increase the index. So we know eventually how many token would be
99
    // required (if there are not enough).
100
40.3M
    parser->token_count++;
101
40.3M
    return token;
102
40.3M
}
103
104
static void
105
11.1M
cj5__parse_string(cj5__parser *parser) {
106
11.1M
    const char *json5 = parser->json5;
107
11.1M
    unsigned int len = parser->len;
108
11.1M
    unsigned int start = parser->pos;
109
11.1M
    char str_open = json5[start];
110
111
11.1M
    parser->pos++;
112
150M
    for(; parser->pos < len; parser->pos++) {
113
150M
        char c = json5[parser->pos];
114
115
        // End of string
116
150M
        if(str_open == c) {
117
11.1M
            cj5_token *token = cj5__alloc_token(parser);
118
11.1M
            if(token) {
119
5.69M
                token->type = CJ5_TOKEN_STRING;
120
5.69M
                token->start = start + 1;
121
5.69M
                token->end = parser->pos - 1;
122
5.69M
                token->size = token->end - token->start + 1;
123
5.69M
                token->parent_id = parser->curr_tok_idx;
124
5.69M
            } 
125
11.1M
            return;
126
11.1M
        }
127
128
        // Unescaped newlines are forbidden
129
139M
        if(c == '\n') {
130
5
            parser->error = CJ5_ERROR_INVALID;
131
5
            return;
132
5
        }
133
134
        // Escape char
135
139M
        if(c == '\\') {
136
15.5M
            if(parser->pos + 1 >= len) {
137
13
                parser->error = CJ5_ERROR_INCOMPLETE;
138
13
                return;
139
13
            }
140
15.5M
            parser->pos++;
141
15.5M
            switch(json5[parser->pos]) {
142
110k
            case '\"':
143
111k
            case '/':
144
114k
            case '\\':
145
120k
            case 'b':
146
176k
            case 'f':
147
182k
            case 'r':
148
183k
            case 'n':
149
192k
            case 't':
150
192k
                break;
151
15.3M
            case 'u': // The next four characters are an utf8 code
152
15.3M
                parser->pos++;
153
15.3M
                if(parser->pos + 4 >= len) {
154
13
                    parser->error = CJ5_ERROR_INVALID;
155
13
                    return;
156
13
                }
157
76.6M
                for(unsigned int i = 0; i < 4; i++) {
158
                    // If it isn't a hex character we have an error
159
61.3M
                    if(!(json5[parser->pos] >= 48 && json5[parser->pos] <= 57) && /* 0-9 */
160
61.3M
                       !(json5[parser->pos] >= 65 && json5[parser->pos] <= 70) && /* A-F */
161
61.3M
                       !(json5[parser->pos] >= 97 && json5[parser->pos] <= 102))  /* a-f */
162
68
                        {
163
68
                            parser->error = CJ5_ERROR_INVALID;
164
68
                            return;
165
68
                        }
166
61.3M
                    parser->pos++;
167
61.3M
                }
168
15.3M
                parser->pos--;
169
15.3M
                break;
170
574
            case '\n': // Escape break line
171
574
                parser->line++;
172
574
                parser->line_start = parser->pos;
173
574
                break;
174
3
            default:
175
3
                parser->error = CJ5_ERROR_INVALID;
176
3
                return;
177
15.5M
            }
178
15.5M
        }
179
139M
    }
180
181
    // The file has ended before the string terminates
182
214
    parser->error = CJ5_ERROR_INCOMPLETE;
183
214
}
184
185
// parser->pos is advanced a last time in the next iteration of the main
186
// parse-loop. So we leave parse-primitive in a state where parse->pos points to
187
// the last character of the primitive value (or the quote-character of the
188
// string).
189
static void
190
29.6M
cj5__parse_primitive(cj5__parser* parser) {
191
29.6M
    const char* json5 = parser->json5;
192
29.6M
    unsigned int len = parser->len;
193
29.6M
    unsigned int start = parser->pos;
194
195
    // String value
196
29.6M
    if(json5[start] == '\"' ||
197
29.6M
       json5[start] == '\'') {
198
4.57M
        cj5__parse_string(parser);
199
4.57M
        return;
200
4.57M
    }
201
202
    // Fast comparison of bool, and null.
203
    // We have to use memcpy here or we can get unaligned accesses
204
25.0M
    uint32_t fourcc = 0;
205
25.0M
    if(start + 4 < len)
206
25.0M
        memcpy(&fourcc, &json5[start], 4);
207
    
208
25.0M
    cj5_token_type type;
209
25.0M
    if(fourcc == CJ5__NULL_FOURCC) {
210
342k
        type = CJ5_TOKEN_NULL;
211
342k
        parser->pos += 3;
212
24.7M
    } else if(fourcc == CJ5__TRUE_FOURCC) {
213
546
        type = CJ5_TOKEN_BOOL;
214
546
        parser->pos += 3;
215
24.7M
    } else if(fourcc == CJ5__FALSE_FOURCC) {
216
        // "false" has five characters
217
37.3k
        type = CJ5_TOKEN_BOOL;
218
37.3k
        if(start + 4 >= len || json5[start+4] != 'e') {
219
20
            parser->error = CJ5_ERROR_INVALID;
220
20
            return;
221
20
        }
222
37.3k
        parser->pos += 4;
223
24.6M
    } else {
224
        // Numbers are checked for basic compatibility.
225
        // But they are fully parsed only in the cj5_get_XXX functions.
226
24.6M
        type = CJ5_TOKEN_NUMBER;
227
66.3M
        for(; parser->pos < len; parser->pos++) {
228
66.3M
            if(!cj5__isnum(json5[parser->pos]) &&
229
66.3M
               !(json5[parser->pos] == '.') &&
230
66.3M
               !cj5__islowerchar(json5[parser->pos]) && 
231
66.3M
               !cj5__isupperchar(json5[parser->pos]) &&
232
66.3M
               !(json5[parser->pos] == '+') && !(json5[parser->pos] == '-')) {
233
24.6M
                break;
234
24.6M
            }
235
66.3M
        }
236
24.6M
        parser->pos--; // Point to the last character that is still inside the
237
                       // primitive value
238
24.6M
    }
239
240
25.0M
    cj5_token *token = cj5__alloc_token(parser);
241
25.0M
    if(token) {
242
12.4M
        token->type = type;
243
12.4M
        token->start = start;
244
12.4M
        token->end = parser->pos;
245
12.4M
        token->size = parser->pos - start + 1;
246
12.4M
        token->parent_id = parser->curr_tok_idx;
247
12.4M
    }
248
25.0M
}
249
250
static void
251
7.63M
cj5__parse_key(cj5__parser* parser) {
252
7.63M
    const char* json5 = parser->json5;
253
7.63M
    unsigned int start = parser->pos;
254
7.63M
    cj5_token* token;
255
256
    // Key is a a normal string
257
7.63M
    if(json5[start] == '\"' || json5[start] == '\'') {
258
6.57M
        cj5__parse_string(parser);
259
6.57M
        return;
260
6.57M
    }
261
262
    // An unquoted key. Must start with a-ZA-Z_$. Can contain numbers later on.
263
1.06M
    unsigned int len = parser->len;
264
4.36M
    for(; parser->pos < len; parser->pos++) {
265
4.36M
        if(cj5__islowerchar(json5[parser->pos]) ||
266
4.36M
           cj5__isupperchar(json5[parser->pos]) ||
267
4.36M
           json5[parser->pos] == '_' || json5[parser->pos] == '$')
268
2.50M
            continue;
269
1.86M
        if(cj5__isnum(json5[parser->pos]) && parser->pos != start)
270
795k
            continue;
271
1.06M
        break;
272
1.86M
    }
273
274
    // An empty key is not allowed
275
1.06M
    if(parser->pos <= start) {
276
127
        parser->error = CJ5_ERROR_INVALID;
277
127
        return;
278
127
    }
279
280
    // Move pos to the last character within the unquoted key
281
1.06M
    parser->pos--;
282
283
1.06M
    token = cj5__alloc_token(parser);
284
1.06M
    if(token) {
285
483k
        token->type = CJ5_TOKEN_STRING;
286
483k
        token->start = start;
287
483k
        token->end = parser->pos;
288
483k
        token->size = parser->pos - start + 1;
289
483k
        token->parent_id = parser->curr_tok_idx;
290
483k
    }
291
1.06M
}
292
293
static void
294
52.6k
cj5__skip_comment(cj5__parser* parser) {
295
52.6k
    const char* json5 = parser->json5;
296
297
    // Single-line comment
298
52.6k
    if(json5[parser->pos] == '#') {
299
51.0k
    skip_line:
300
12.3M
        while(parser->pos < parser->len) {
301
12.3M
            if(json5[parser->pos] == '\n') {
302
50.9k
                parser->pos--; // Reparse the newline in the main parse loop
303
50.9k
                return;
304
50.9k
            }
305
12.2M
            parser->pos++;
306
12.2M
        }
307
51
        return;
308
51.0k
    }
309
310
    // Comment begins with '/' but not enough space for another character
311
3.75k
    if(parser->pos + 1 >= parser->len) {
312
30
        parser->error = CJ5_ERROR_INVALID;
313
30
        return;
314
30
    }
315
3.72k
    parser->pos++;
316
317
    // Comment begins with '//' -> single-line comment
318
3.72k
    if(json5[parser->pos] == '/')
319
2.16k
        goto skip_line;
320
321
    // Multi-line comments begin with '/*' and end with '*/'
322
1.56k
    if(json5[parser->pos] == '*') {
323
1.52k
        parser->pos++;
324
1.62M
        for(; parser->pos + 1 < parser->len; parser->pos++) {
325
1.62M
            if(json5[parser->pos] == '*' && json5[parser->pos + 1] == '/') {
326
1.40k
                parser->pos++;
327
1.40k
                return;
328
1.40k
            }
329
            // Remember we passed a newline
330
1.62M
            if(json5[parser->pos] == '\n') {
331
367
                parser->line++;
332
367
                parser->line_start = parser->pos;
333
367
            }
334
1.62M
        }
335
1.52k
    }
336
337
    // Unknown comment type or the multi-line comment is not terminated
338
161
    parser->error = CJ5_ERROR_INCOMPLETE;
339
161
}
340
341
cj5_result
342
cj5_parse(const char *json5, unsigned int len,
343
          cj5_token *tokens, unsigned int max_tokens,
344
14.6k
          cj5_options *options) {
345
14.6k
    cj5_result r;
346
14.6k
    cj5__parser parser;
347
14.6k
    memset(&parser, 0x0, sizeof(parser));
348
14.6k
    parser.curr_tok_idx = 0;
349
14.6k
    parser.json5 = json5;
350
14.6k
    parser.len = len;
351
14.6k
    parser.tokens = tokens;
352
14.6k
    parser.max_tokens = max_tokens;
353
354
14.6k
    if(options)
355
0
        parser.stop_early = options->stop_early;
356
357
14.6k
    unsigned short depth = 0; // Nesting depth zero means "outside the root object"
358
14.6k
    char nesting[CJ5_MAX_NESTING]; // Contains either '\0', '{' or '[' for the
359
                                   // type of nesting at each depth. '\0'
360
                                   // indicates we are out of the root object.
361
14.6k
    char next[CJ5_MAX_NESTING];    // Next content to parse: 'k' (key), ':', 'v'
362
                                   // (value) or ',' (comma).
363
14.6k
    next[0] = 'v';  // The root is a "value" (object, array or primitive). If we
364
                    // detect a colon after the first value then everything is
365
                    // wrapped into a "virtual root object" and the parsing is
366
                    // restarted.
367
14.6k
    nesting[0] = 0; // Becomes '{' if there is a virtual root object
368
369
14.6k
    cj5_token *token = NULL; // The current token
370
371
19.7k
 start_parsing:
372
81.0M
    for(; parser.pos < len; parser.pos++) {
373
80.9M
        char c = json5[parser.pos];
374
80.9M
        switch(c) {
375
104k
        case '\n': // Skip newline
376
104k
            parser.line++;
377
104k
            parser.line_start = parser.pos;
378
104k
            break;
379
380
4.06k
        case '\r': // Skip whitespace
381
13.0k
        case '\t':
382
14.7k
        case ' ':
383
14.7k
            break;
384
385
48.8k
        case '#': // Skip comment
386
52.6k
        case '/':
387
52.6k
            cj5__skip_comment(&parser);
388
52.6k
            if(parser.error != CJ5_ERROR_NONE &&
389
52.6k
               parser.error != CJ5_ERROR_OVERFLOW)
390
191
                goto finish;
391
52.4k
            break;
392
393
2.98M
        case '{': // Open an object or array
394
3.07M
        case '[':
395
            // Check the nesting depth
396
3.07M
            if(depth + 1 >= CJ5_MAX_NESTING) {
397
4
                parser.error = CJ5_ERROR_INVALID;
398
4
                goto finish;
399
4
            }
400
401
            // Correct next?
402
3.07M
            if(next[depth] != 'v') {
403
27
                parser.error = CJ5_ERROR_INVALID;
404
27
                goto finish;
405
27
            }
406
407
3.07M
            depth++; // Increase the nesting depth
408
3.07M
            nesting[depth] = c; // Set the nesting type
409
3.07M
            next[depth] = (c == '{') ? 'k' : 'v'; // next is either a key or a value
410
411
            // Create a token for the object or array
412
3.07M
            token = cj5__alloc_token(&parser);
413
3.07M
            if(token) {
414
1.59M
                token->parent_id = parser.curr_tok_idx;
415
1.59M
                token->type = (c == '{') ? CJ5_TOKEN_OBJECT : CJ5_TOKEN_ARRAY;
416
1.59M
                token->start = parser.pos;
417
1.59M
                token->size = 0;
418
1.59M
                parser.curr_tok_idx = parser.token_count - 1; // The new curr_tok_idx
419
                                                              // is for this token
420
1.59M
            }
421
3.07M
            break;
422
423
2.98M
        case '}': // Close an object or array
424
3.07M
        case ']':
425
            // Check the nesting depth. Note that a "virtual root object" at
426
            // depth zero must not be closed.
427
3.07M
            if(depth == 0) {
428
35
                parser.error = CJ5_ERROR_INVALID;
429
35
                goto finish;
430
35
            }
431
432
            // Check and adjust the nesting. Note that ']' - '[' == 2 and '}' -
433
            // '{' == 2. Arrays can always be closed. Objects can only close
434
            // when a key or a comma is expected.
435
3.07M
            if(c - nesting[depth] != 2 ||
436
3.07M
               (c == '}' && next[depth] != 'k' && next[depth] != ',')) {
437
9
                parser.error = CJ5_ERROR_INVALID;
438
9
                goto finish;
439
9
            }
440
441
3.07M
            if(token) {
442
                // Finalize the current token
443
1.59M
                token->end = parser.pos;
444
445
                // Move to the parent and increase the parent size. Omit this
446
                // when we leave the root (parent the same as the current
447
                // token).
448
1.59M
                if(parser.curr_tok_idx != token->parent_id) {
449
1.58M
                    parser.curr_tok_idx = token->parent_id;
450
1.58M
                    token = &tokens[token->parent_id];
451
1.58M
                    token->size++;
452
1.58M
                }
453
1.59M
            }
454
455
            // Step one level up
456
3.07M
            depth--;
457
3.07M
            next[depth] = (depth == 0) ? 0 : ','; // zero if we step out the root
458
                                                  // object. then we do not look for
459
                                                  // another element.
460
461
            // The first element was successfully parsed. Stop early or try to
462
            // parse the full input string?
463
3.07M
            if(depth == 0 && parser.stop_early)
464
0
                goto finish;
465
466
3.07M
            break;
467
468
7.64M
        case ':': // Colon (between key and value)
469
7.64M
            if(next[depth] != ':') {
470
4.62k
                parser.error = CJ5_ERROR_INVALID;
471
4.62k
                goto finish;
472
4.62k
            }
473
7.63M
            next[depth] = 'v';
474
7.63M
            break;
475
476
29.7M
        case ',': // Comma
477
29.7M
            if(next[depth] != ',') {
478
31
                parser.error = CJ5_ERROR_INVALID;
479
31
                goto finish;
480
31
            }
481
29.7M
            next[depth] = (nesting[depth] == '{') ? 'k' : 'v';
482
29.7M
            break;
483
484
37.2M
        default: // Value or key
485
37.2M
            if(next[depth] == 'v') {
486
29.6M
                cj5__parse_primitive(&parser); // Parse primitive value
487
29.6M
                if(nesting[depth] != 0) {
488
                    // Parent is object or array
489
29.6M
                    if(token)
490
26.7M
                        token->size++;
491
29.6M
                    next[depth] = ',';
492
29.6M
                } else {
493
                    // The current value was the root element. Don't look for
494
                    // any next element.
495
5.09k
                    next[depth] = 0;
496
497
                    // The first element was successfully parsed. Stop early or try to
498
                    // parse the full input string?
499
5.09k
                    if(parser.stop_early)
500
0
                        goto finish;
501
5.09k
                }
502
29.6M
            } else if(next[depth] == 'k') {
503
7.63M
                cj5__parse_key(&parser);
504
7.63M
                if(token)
505
4.43M
                    token->size++; // Keys count towards the length
506
7.63M
                next[depth] = ':';
507
7.63M
            } else {
508
274
                parser.error = CJ5_ERROR_INVALID;
509
274
            }
510
511
37.2M
            if(parser.error && parser.error != CJ5_ERROR_OVERFLOW)
512
737
                goto finish;
513
514
37.2M
            break;
515
80.9M
        }
516
80.9M
    }
517
518
    // Are we back to the initial nesting depth?
519
14.1k
    if(depth != 0) {
520
59
        parser.error = CJ5_ERROR_INCOMPLETE;
521
59
        goto finish;
522
59
    }
523
524
    // Close the virtual root object if there is one
525
14.0k
    if(nesting[0] == '{' && parser.error != CJ5_ERROR_OVERFLOW) {
526
        // Check the we end after a complete key-value pair (or dangling comma)
527
4.59k
        if(next[0] != 'k' && next[0] != ',')
528
87
            parser.error = CJ5_ERROR_INVALID;
529
4.59k
        tokens[0].end = parser.pos - 1;
530
4.59k
    }
531
532
19.7k
 finish:
533
    // If parsing failed at the initial nesting depth, create a virtual root object
534
    // and restart parsing.
535
19.7k
    if(parser.error != CJ5_ERROR_NONE &&
536
19.7k
       parser.error != CJ5_ERROR_OVERFLOW &&
537
19.7k
       depth == 0 && nesting[0] != '{') {
538
5.10k
        parser.token_count = 0;
539
5.10k
        token = cj5__alloc_token(&parser);
540
5.10k
        if(token) {
541
5.10k
            token->parent_id = 0;
542
5.10k
            token->type = CJ5_TOKEN_OBJECT;
543
5.10k
            token->start = 0;
544
5.10k
            token->size = 0;
545
546
5.10k
            nesting[0] = '{';
547
5.10k
            next[0] = 'k';
548
549
5.10k
            parser.curr_tok_idx = 0;
550
5.10k
            parser.pos = 0;
551
5.10k
            parser.error = CJ5_ERROR_NONE;
552
5.10k
            goto start_parsing;
553
5.10k
        }
554
5.10k
    }
555
556
14.6k
    memset(&r, 0x0, sizeof(r));
557
14.6k
    r.error = parser.error;
558
14.6k
    r.error_line = parser.line;
559
14.6k
    r.error_col = parser.pos - parser.line_start;
560
14.6k
    r.num_tokens = parser.token_count; // How many tokens (would) have been
561
                                       // consumed by the parser?
562
563
    // Not a single token was parsed -> return an error
564
14.6k
    if(r.num_tokens == 0)
565
27
        r.error = CJ5_ERROR_INCOMPLETE;
566
567
    // Set the tokens and original string only if successfully parsed
568
14.6k
    if(r.error == CJ5_ERROR_NONE) {
569
13.1k
        r.tokens = tokens;
570
13.1k
        r.json5 = json5;
571
13.1k
    }
572
573
14.6k
    return r;
574
19.7k
}
575
576
cj5_error_code
577
0
cj5_get_bool(const cj5_result *r, unsigned int tok_index, bool *out) {
578
0
    const cj5_token *token = &r->tokens[tok_index];
579
0
    if(token->type != CJ5_TOKEN_BOOL)
580
0
        return CJ5_ERROR_INVALID;
581
0
    *out = (r->json5[token->start] == 't');
582
0
    return CJ5_ERROR_NONE;
583
0
}
584
585
cj5_error_code
586
0
cj5_get_float(const cj5_result *r, unsigned int tok_index, double *out) {
587
0
    const cj5_token *token = &r->tokens[tok_index];
588
0
    if(token->type != CJ5_TOKEN_NUMBER)
589
0
        return CJ5_ERROR_INVALID;
590
591
0
    const char *tokstr = &r->json5[token->start];
592
0
    size_t toksize = token->end - token->start + 1;
593
0
    if(toksize == 0)
594
0
        return CJ5_ERROR_INVALID;
595
596
    // Skip prefixed +/-
597
0
    bool neg = false;
598
0
    if(tokstr[0] == '+' || tokstr[0] == '-') {
599
0
        neg = (tokstr[0] == '-');
600
0
        tokstr++;
601
0
        toksize--;
602
0
    }
603
604
    // Detect prefixed inf/nan
605
0
    if(strncmp(tokstr, "Infinity", toksize) == 0) {
606
0
        *out = neg ? -INFINITY : INFINITY;
607
0
        return CJ5_ERROR_NONE;
608
0
    } else if(strncmp(tokstr, "NaN", toksize) == 0) {
609
0
        *out = NAN;
610
0
        return CJ5_ERROR_NONE;
611
0
    }
612
613
    // reset the +/- detection and parse
614
0
    tokstr = &r->json5[token->start];
615
0
    toksize = token->end - token->start + 1;
616
0
    size_t parsed = parseDouble(tokstr, toksize, out);
617
618
    // There must only be whitespace between the end of the parsed number and
619
    // the end of the token
620
0
    for(size_t i = parsed; i < toksize; i++) {
621
0
        if(tokstr[i] != ' ' && tokstr[i] -'\t' >= 5)
622
0
            return CJ5_ERROR_INVALID;
623
0
    }
624
625
0
    return (parsed != 0) ? CJ5_ERROR_NONE : CJ5_ERROR_INVALID;
626
0
}
627
628
cj5_error_code
629
cj5_get_int(const cj5_result *r, unsigned int tok_index,
630
0
            int64_t *out) {
631
0
    const cj5_token *token = &r->tokens[tok_index];
632
0
    if(token->type != CJ5_TOKEN_NUMBER)
633
0
        return CJ5_ERROR_INVALID;
634
0
    size_t parsed = parseInt64(&r->json5[token->start], token->size, out);
635
0
    return (parsed != 0) ? CJ5_ERROR_NONE : CJ5_ERROR_INVALID;
636
0
}
637
638
cj5_error_code
639
cj5_get_uint(const cj5_result *r, unsigned int tok_index,
640
0
             uint64_t *out) {
641
0
    const cj5_token *token = &r->tokens[tok_index];
642
0
    if(token->type != CJ5_TOKEN_NUMBER)
643
0
        return CJ5_ERROR_INVALID;
644
0
    size_t parsed = parseUInt64(&r->json5[token->start], token->size, out);
645
0
    return (parsed != 0) ? CJ5_ERROR_NONE : CJ5_ERROR_INVALID;
646
0
}
647
648
static const uint32_t SURROGATE_OFFSET = 0x10000u - (0xD800u << 10) - 0xDC00;
649
650
static cj5_error_code
651
10.3M
parse_codepoint(const char *pos, uint32_t *out_utf) {
652
10.3M
    uint32_t utf = 0;
653
51.6M
    for(unsigned int i = 0; i < 4; i++) {
654
41.3M
        char byte = pos[i];
655
41.3M
        if(cj5__isnum(byte)) {
656
32.0M
            byte = (char)(byte - '0');
657
32.0M
        } else if(cj5__isrange(byte, 'a', 'f')) {
658
9.25M
            byte = (char)(byte - ('a' - 10));
659
9.25M
        } else if(cj5__isrange(byte, 'A', 'F')) {
660
1.62k
            byte = (char)(byte - ('A' - 10));
661
1.62k
        } else {
662
16
            return CJ5_ERROR_INVALID;
663
16
        }
664
41.3M
        utf = (utf << 4) | ((uint8_t)byte & 0xF);
665
41.3M
    }
666
10.3M
    *out_utf = utf;
667
10.3M
    return CJ5_ERROR_NONE;
668
10.3M
}
669
670
cj5_error_code
671
cj5_get_str(const cj5_result *r, unsigned int tok_index,
672
95.5k
            char *buf, unsigned int *buflen) {
673
95.5k
    const cj5_token *token = &r->tokens[tok_index];
674
95.5k
    if(token->type != CJ5_TOKEN_STRING)
675
0
        return CJ5_ERROR_INVALID;
676
677
95.5k
    const char *pos = &r->json5[token->start];
678
95.5k
    const char *end = &r->json5[token->end + 1];
679
95.5k
    unsigned int outpos = 0;
680
20.3M
    for(; pos < end; pos++) {
681
20.2M
        uint8_t c = (uint8_t)*pos;
682
683
        // Process an escape character
684
20.2M
        if(c == '\\') {
685
10.5M
            if(pos + 1 >= end)
686
0
                return CJ5_ERROR_INCOMPLETE;
687
10.5M
            pos++;
688
10.5M
            c = (uint8_t)*pos;
689
10.5M
            switch(c) {
690
110k
            case '\"': buf[outpos++] = '\"'; break;
691
885
            case '\\': buf[outpos++] = '\\'; break;
692
203
            case '\n': buf[outpos++] = '\n'; break; // escape newline
693
194
            case '/':  buf[outpos++] = '/';  break;
694
5.80k
            case 'b':  buf[outpos++] = '\b'; break;
695
55.4k
            case 'f':  buf[outpos++] = '\f'; break;
696
4.01k
            case 'r':  buf[outpos++] = '\r'; break;
697
689
            case 'n':  buf[outpos++] = '\n'; break;
698
4.03k
            case 't':  buf[outpos++] = '\t'; break;
699
10.3M
            case 'u': {
700
                // Parse the unicode code point
701
10.3M
                if(pos + 4 >= end)
702
0
                    return CJ5_ERROR_INCOMPLETE;
703
10.3M
                pos++;
704
10.3M
                uint32_t utf;
705
10.3M
                cj5_error_code err = parse_codepoint(pos, &utf);
706
10.3M
                if(err != CJ5_ERROR_NONE)
707
0
                    return err;
708
10.3M
                pos += 3;
709
710
10.3M
                if(0xD800 <= utf && utf <= 0xDBFF) {
711
                    // Parse a surrogate pair
712
472
                    if(pos + 6 >= end)
713
16
                        return CJ5_ERROR_INVALID;
714
456
                    if(pos[1] != '\\' && pos[3] != 'u')
715
20
                        return CJ5_ERROR_INVALID;
716
436
                    pos += 3;
717
436
                    uint32_t trail;
718
436
                    err = parse_codepoint(pos, &trail);
719
436
                    if(err != CJ5_ERROR_NONE)
720
16
                        return err;
721
420
                    pos += 3;
722
420
                    utf = (utf << 10) + trail + SURROGATE_OFFSET;
723
10.3M
                } else if(0xDC00 <= utf && utf <= 0xDFFF) {
724
                    // Invalid Unicode '\\u%04X'
725
8
                    return CJ5_ERROR_INVALID;
726
8
                }
727
                
728
                // Write the utf8 bytes of the code point
729
10.3M
                if(utf <= 0x7F) { // Plain ASCII
730
10.3M
                    buf[outpos++] = (char)utf;
731
10.3M
                } else if(utf <= 0x07FF) { // 2-byte unicode
732
199
                    buf[outpos++] = (char)(((utf >> 6) & 0x1F) | 0xC0);
733
199
                    buf[outpos++] = (char)(((utf >> 0) & 0x3F) | 0x80);
734
838
                } else if(utf <= 0xFFFF) { // 3-byte unicode
735
419
                    buf[outpos++] = (char)(((utf >> 12) & 0x0F) | 0xE0);
736
419
                    buf[outpos++] = (char)(((utf >>  6) & 0x3F) | 0x80);
737
419
                    buf[outpos++] = (char)(((utf >>  0) & 0x3F) | 0x80);
738
419
                } else if(utf <= 0x10FFFF) { // 4-byte unicode
739
411
                    buf[outpos++] = (char)(((utf >> 18) & 0x07) | 0xF0);
740
411
                    buf[outpos++] = (char)(((utf >> 12) & 0x3F) | 0x80);
741
411
                    buf[outpos++] = (char)(((utf >>  6) & 0x3F) | 0x80);
742
411
                    buf[outpos++] = (char)(((utf >>  0) & 0x3F) | 0x80);
743
411
                } else {
744
8
                    return CJ5_ERROR_INVALID; // Not a utf8 string
745
8
                }
746
10.3M
                break;
747
10.3M
            }
748
10.3M
            default:
749
0
                return CJ5_ERROR_INVALID;
750
10.5M
            }
751
10.5M
            continue;
752
10.5M
        }
753
754
        // Unprintable ascii characters must be escaped. JSON5 allows nested
755
        // quotes if the quote character is not the same as the surrounding
756
        // quote character, e.g. 'this is my "quote"'. This logic is in the
757
        // token parsing code and not in this "string extraction" method.
758
9.71M
        if(c < ' '   || c == 127)
759
21
            return CJ5_ERROR_INVALID;
760
761
        // Ascii character or utf8 byte
762
9.71M
        buf[outpos++] = (char)c;
763
9.71M
    }
764
765
    // Terminate with \0
766
95.4k
    buf[outpos] = 0;
767
768
    // Set the output length
769
95.4k
    if(buflen)
770
95.4k
        *buflen = outpos;
771
95.4k
    return CJ5_ERROR_NONE;
772
95.5k
}
773
774
void
775
0
cj5_skip(const cj5_result *r, unsigned int *tok_index) {
776
0
    unsigned int idx = *tok_index;
777
0
    unsigned int end = r->tokens[idx].end;
778
0
    do { idx++; } while(idx < r->num_tokens &&
779
0
                        r->tokens[idx].start < end);
780
0
    *tok_index = idx;
781
0
}
782
783
cj5_error_code
784
cj5_find(const cj5_result *r, unsigned int *tok_index,
785
0
         const char *key) {
786
    // It has to be an object
787
0
    unsigned int idx = *tok_index;
788
0
    if(r->tokens[idx].type != CJ5_TOKEN_OBJECT)
789
0
        return CJ5_ERROR_INVALID;
790
0
    unsigned int size = r->tokens[idx].size;
791
792
    // Skip to the first key
793
0
    idx++;
794
795
    // Size is number of keys + number of values
796
0
    for(unsigned int i = 0; i < size; i += 2) {
797
        // Key has to be a string
798
0
        if(r->tokens[idx].type != CJ5_TOKEN_STRING)
799
0
            return CJ5_ERROR_INVALID;
800
801
        // Return the index to the value if the key matches
802
0
        const char *keystart = &r->json5[r->tokens[idx].start];
803
0
        size_t keysize = r->tokens[idx].end - r->tokens[idx].start + 1;
804
0
        if(strncmp(key, keystart, keysize) == 0) {
805
0
            *tok_index = idx + 1;
806
0
            return CJ5_ERROR_NONE;
807
0
        }
808
809
        // Skip over the value
810
0
        idx++;
811
0
        cj5_skip(r, &idx);
812
0
    }
813
0
    return CJ5_ERROR_NOTFOUND;
814
0
}