Coverage Report

Created: 2026-05-16 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/open62541_15/deps/cj5.c
Line
Count
Source
1
// MIT License
2
//
3
// Copyright (c) 2020 Sepehr Taghdisian
4
// Copyright (c) 2022, 2024 Julius Pfrommer
5
//
6
// Permission is hereby granted, free of charge, to any person obtaining a copy
7
// of this software and associated documentation files (the "Software"), to deal
8
// in the Software without restriction, including without limitation the rights
9
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
// copies of the Software, and to permit persons to whom the Software is
11
// furnished to do so, subject to the following conditions:
12
//
13
// The above copyright notice and this permission notice shall be included in all
14
// copies or substantial portions of the Software.
15
//
16
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
// SOFTWARE.
23
24
#include "cj5.h"
25
#include "parse_num.h"
26
#include "utf8.h"
27
28
#include <math.h>
29
#include <float.h>
30
#include <string.h>
31
32
#if defined(_MSC_VER)
33
# define CJ5_INLINE __inline
34
#else
35
# define CJ5_INLINE inline
36
#endif
37
38
/* vs2008 does not have INFINITY and NAN defined */
39
#ifndef INFINITY
40
# define INFINITY ((double)(DBL_MAX+DBL_MAX))
41
#endif
42
#ifndef NAN
43
# define NAN ((double)(INFINITY-INFINITY))
44
#endif
45
46
#if defined(_MSC_VER)
47
# pragma warning(disable: 4056)
48
# pragma warning(disable: 4756)
49
#endif
50
51
/* Max nesting depth of objects and arrays */
52
7.02M
#define CJ5_MAX_NESTING 32
53
54
#define CJ5__FOURCC(_a, _b, _c, _d)                         \
55
    (((uint32_t)(_a) | ((uint32_t)(_b) << 8) |              \
56
      ((uint32_t)(_c) << 16) | ((uint32_t)(_d) << 24)))
57
58
static const uint32_t CJ5__NULL_FOURCC  = CJ5__FOURCC('n', 'u', 'l', 'l');
59
static const uint32_t CJ5__TRUE_FOURCC  = CJ5__FOURCC('t', 'r', 'u', 'e');
60
static const uint32_t CJ5__FALSE_FOURCC = CJ5__FOURCC('f', 'a', 'l', 's');
61
62
typedef struct {
63
    unsigned int pos;
64
    cj5_error_code error;
65
66
    const char *json5;
67
    unsigned int len;
68
69
    unsigned int curr_tok_idx;
70
71
    cj5_token *tokens;
72
    unsigned int token_count;
73
    unsigned int max_tokens;
74
75
    bool stop_early;
76
} cj5__parser;
77
78
static CJ5_INLINE bool
79
848M
cj5__isrange(char ch, char from, char to) {
80
848M
    return (uint8_t)(ch - from) <= (uint8_t)(to - from);
81
848M
}
82
83
634M
#define cj5__isupperchar(ch) cj5__isrange(ch, 'A', 'Z')
84
715M
#define cj5__islowerchar(ch) cj5__isrange(ch, 'a', 'z')
85
770M
#define cj5__isnum(ch)       cj5__isrange(ch, '0', '9')
86
87
static cj5_token *
88
172M
cj5__alloc_token(cj5__parser *parser) {
89
172M
    cj5_token* token = NULL;
90
172M
    if(parser->token_count < parser->max_tokens) {
91
83.7M
        token = &parser->tokens[parser->token_count];
92
83.7M
        memset(token, 0x0, sizeof(cj5_token));
93
88.2M
    } else {
94
88.2M
        parser->error = CJ5_ERROR_OVERFLOW;
95
88.2M
    }
96
97
    // Always increase the index. So we know eventually how many token would be
98
    // required (if there are not enough).
99
172M
    parser->token_count++;
100
172M
    return token;
101
172M
}
102
103
static void
104
14.7M
cj5__parse_string(cj5__parser *parser) {
105
14.7M
    const char *json5 = parser->json5;
106
14.7M
    unsigned int len = parser->len;
107
14.7M
    unsigned int start = parser->pos;
108
14.7M
    char str_open = json5[start];
109
110
14.7M
    parser->pos++;
111
492M
    for(; parser->pos < len; parser->pos++) {
112
492M
        char c = json5[parser->pos];
113
114
        // End of string
115
492M
        if(str_open == c) {
116
14.7M
            cj5_token *token = cj5__alloc_token(parser);
117
14.7M
            if(token) {
118
7.81M
                token->type = CJ5_TOKEN_STRING;
119
7.81M
                token->start = start + 1;
120
7.81M
                token->end = parser->pos - 1;
121
7.81M
                token->size = token->end - token->start + 1;
122
7.81M
                token->parent_id = parser->curr_tok_idx;
123
7.81M
            } 
124
14.7M
            return;
125
14.7M
        }
126
127
        // Unescaped newlines are forbidden
128
478M
        if(c == '\n') {
129
307
            parser->error = CJ5_ERROR_INVALID;
130
307
            return;
131
307
        }
132
133
        // Skip escape character
134
478M
        if(c == '\\') {
135
15.0M
            if(parser->pos + 1 >= len) {
136
30
                parser->error = CJ5_ERROR_INCOMPLETE;
137
30
                return;
138
30
            }
139
15.0M
            parser->pos++;
140
15.0M
        }
141
478M
    }
142
143
    // The file has ended before the string terminates
144
3.42k
    parser->error = CJ5_ERROR_INCOMPLETE;
145
3.42k
}
146
147
// parser->pos is advanced a last time in the next iteration of the main
148
// parse-loop. So we leave parse-primitive in a state where parse->pos points to
149
// the last character of the primitive value (or the quote-character of the
150
// string).
151
static void
152
145M
cj5__parse_primitive(cj5__parser* parser) {
153
145M
    const char* json5 = parser->json5;
154
145M
    unsigned int len = parser->len;
155
145M
    unsigned int start = parser->pos;
156
157
    // String value
158
145M
    if(json5[start] == '\"' ||
159
139M
       json5[start] == '\'') {
160
7.86M
        cj5__parse_string(parser);
161
7.86M
        return;
162
7.86M
    }
163
164
    // Fast comparison of bool, and null.
165
    // Make the comparison case-insensitive.
166
138M
    uint32_t fourcc = 0;
167
138M
    if(start + 3 < len) {
168
134M
        fourcc += (unsigned char)json5[start] | 32U;
169
134M
        fourcc += ((unsigned char)json5[start+1] | 32U) << 8;
170
134M
        fourcc += ((unsigned char)json5[start+2] | 32U) << 16;
171
134M
        fourcc += ((unsigned char)json5[start+3] | 32U) << 24;
172
134M
    }
173
    
174
138M
    cj5_token_type type;
175
138M
    if(fourcc == CJ5__NULL_FOURCC) {
176
3.75M
        type = CJ5_TOKEN_NULL;
177
3.75M
        parser->pos += 3;
178
134M
    } else if(fourcc == CJ5__TRUE_FOURCC) {
179
44.1k
        type = CJ5_TOKEN_BOOL;
180
44.1k
        parser->pos += 3;
181
134M
    } else if(fourcc == CJ5__FALSE_FOURCC) {
182
        // "false" has five characters
183
21.5k
        type = CJ5_TOKEN_BOOL;
184
21.5k
        if(start + 4 >= len || (json5[start+4] | 32) != 'e') {
185
182
            parser->error = CJ5_ERROR_INVALID;
186
182
            return;
187
182
        }
188
21.3k
        parser->pos += 4;
189
134M
    } else {
190
        // Numbers are checked for basic compatibility.
191
        // But they are fully parsed only in the cj5_get_XXX functions.
192
134M
        type = CJ5_TOKEN_NUMBER;
193
355M
        for(; parser->pos < len; parser->pos++) {
194
351M
            if(!cj5__isnum(json5[parser->pos]) &&
195
179M
               !(json5[parser->pos] == '.') &&
196
177M
               !cj5__islowerchar(json5[parser->pos]) && 
197
136M
               !cj5__isupperchar(json5[parser->pos]) &&
198
132M
               !(json5[parser->pos] == '+') && !(json5[parser->pos] == '-')) {
199
130M
                break;
200
130M
            }
201
351M
        }
202
134M
        parser->pos--; // Point to the last character that is still inside the
203
                       // primitive value
204
134M
    }
205
206
138M
    cj5_token *token = cj5__alloc_token(parser);
207
138M
    if(token) {
208
66.2M
        token->type = type;
209
66.2M
        token->start = start;
210
66.2M
        token->end = parser->pos;
211
66.2M
        token->size = parser->pos - start + 1;
212
66.2M
        token->parent_id = parser->curr_tok_idx;
213
66.2M
    }
214
138M
}
215
216
static void
217
19.0M
cj5__parse_key(cj5__parser* parser) {
218
19.0M
    const char* json5 = parser->json5;
219
19.0M
    unsigned int start = parser->pos;
220
19.0M
    cj5_token* token;
221
222
    // Key is a a normal string
223
19.0M
    if(json5[start] == '\"' || json5[start] == '\'') {
224
6.92M
        cj5__parse_string(parser);
225
6.92M
        return;
226
6.92M
    }
227
228
    // An unquoted key. Must start with a-ZA-Z_$. Can contain numbers later on.
229
12.1M
    unsigned int len = parser->len;
230
93.1M
    for(; parser->pos < len; parser->pos++) {
231
93.1M
        if(cj5__islowerchar(json5[parser->pos]) ||
232
53.1M
           cj5__isupperchar(json5[parser->pos]) ||
233
38.1M
           json5[parser->pos] == '_' || json5[parser->pos] == '$')
234
61.3M
            continue;
235
31.7M
        if(cj5__isnum(json5[parser->pos]) && parser->pos != start)
236
19.6M
            continue;
237
12.1M
        break;
238
31.7M
    }
239
240
    // An empty key is not allowed
241
12.1M
    if(parser->pos <= start) {
242
1.26k
        parser->error = CJ5_ERROR_INVALID;
243
1.26k
        return;
244
1.26k
    }
245
246
    // Move pos to the last character within the unquoted key
247
12.1M
    parser->pos--;
248
249
12.1M
    token = cj5__alloc_token(parser);
250
12.1M
    if(token) {
251
6.04M
        token->type = CJ5_TOKEN_STRING;
252
6.04M
        token->start = start;
253
6.04M
        token->end = parser->pos;
254
6.04M
        token->size = parser->pos - start + 1;
255
6.04M
        token->parent_id = parser->curr_tok_idx;
256
6.04M
    }
257
12.1M
}
258
259
static void
260
377k
cj5__skip_comment(cj5__parser* parser) {
261
377k
    const char* json5 = parser->json5;
262
263
    // Single-line comment
264
377k
    if(json5[parser->pos] == '#') {
265
365k
    skip_line:
266
69.1M
        while(parser->pos < parser->len) {
267
69.0M
            if(json5[parser->pos] == '\n') {
268
316k
                parser->pos--; // Reparse the newline in the main parse loop
269
316k
                return;
270
316k
            }
271
68.7M
            parser->pos++;
272
68.7M
        }
273
48.6k
        return;
274
365k
    }
275
276
    // Comment begins with '/' but not enough space for another character
277
173k
    if(parser->pos + 1 >= parser->len) {
278
184
        parser->error = CJ5_ERROR_INVALID;
279
184
        return;
280
184
    }
281
173k
    parser->pos++;
282
283
    // Comment begins with '//' -> single-line comment
284
173k
    if(json5[parser->pos] == '/')
285
161k
        goto skip_line;
286
287
    // Multi-line comments begin with '/*' and end with '*/'
288
12.0k
    if(json5[parser->pos] == '*') {
289
11.6k
        parser->pos++;
290
9.32M
        for(; parser->pos + 1 < parser->len; parser->pos++) {
291
9.32M
            if(json5[parser->pos] == '*' && json5[parser->pos + 1] == '/') {
292
11.1k
                parser->pos++;
293
11.1k
                return;
294
11.1k
            }
295
9.32M
        }
296
11.6k
    }
297
298
    // Unknown comment type or the multi-line comment is not terminated
299
862
    parser->error = CJ5_ERROR_INCOMPLETE;
300
862
}
301
302
cj5_result
303
cj5_parse(const char *json5, unsigned int len,
304
          cj5_token *tokens, unsigned int max_tokens,
305
5.28M
          cj5_options *options) {
306
5.28M
    cj5_result r;
307
5.28M
    cj5__parser parser;
308
5.28M
    memset(&parser, 0x0, sizeof(parser));
309
5.28M
    parser.curr_tok_idx = 0;
310
5.28M
    parser.json5 = json5;
311
5.28M
    parser.len = len;
312
5.28M
    parser.tokens = tokens;
313
5.28M
    parser.max_tokens = max_tokens;
314
315
5.28M
    if(options)
316
5.28M
        parser.stop_early = options->stop_early;
317
318
5.28M
    unsigned short depth = 0; // Nesting depth zero means "outside the root object"
319
5.28M
    char nesting[CJ5_MAX_NESTING]; // Contains either '\0', '{' or '[' for the
320
                                   // type of nesting at each depth. '\0'
321
                                   // indicates we are out of the root object.
322
5.28M
    char next[CJ5_MAX_NESTING];    // Next content to parse: 'k' (key), ':', 'v'
323
                                   // (value) or ',' (comma).
324
5.28M
    next[0] = 'v';  // The root is a "value" (object, array or primitive). If we
325
                    // detect a colon after the first value then everything is
326
                    // wrapped into a "virtual root object" and the parsing is
327
                    // restarted.
328
5.28M
    nesting[0] = 0; // Becomes '{' if there is a virtual root object
329
330
5.28M
    cj5_token *token = NULL; // The current token
331
332
5.30M
 start_parsing:
333
352M
    for(; parser.pos < len; parser.pos++) {
334
347M
        char c = json5[parser.pos];
335
347M
        switch(c) {
336
2.24M
        case '\n': // Skip newline and whitespace
337
3.55M
        case '\r':
338
3.94M
        case '\t':
339
4.47M
        case ' ':
340
4.47M
            break;
341
342
203k
        case '#': // Skip comment
343
377k
        case '/':
344
377k
            cj5__skip_comment(&parser);
345
377k
            if(parser.error != CJ5_ERROR_NONE &&
346
66.0k
               parser.error != CJ5_ERROR_OVERFLOW)
347
1.04k
                goto finish;
348
376k
            break;
349
350
6.80M
        case '{': // Open an object or array
351
7.02M
        case '[':
352
            // Check the nesting depth
353
7.02M
            if(depth + 1 >= CJ5_MAX_NESTING) {
354
23
                parser.error = CJ5_ERROR_INVALID;
355
23
                goto finish;
356
23
            }
357
358
            // Correct next?
359
7.02M
            if(next[depth] != 'v') {
360
262
                parser.error = CJ5_ERROR_INVALID;
361
262
                goto finish;
362
262
            }
363
364
7.02M
            depth++; // Increase the nesting depth
365
7.02M
            nesting[depth] = c; // Set the nesting type
366
7.02M
            next[depth] = (c == '{') ? 'k' : 'v'; // next is either a key or a value
367
368
            // Create a token for the object or array
369
7.02M
            token = cj5__alloc_token(&parser);
370
7.02M
            if(token) {
371
3.61M
                token->parent_id = parser.curr_tok_idx;
372
3.61M
                token->type = (c == '{') ? CJ5_TOKEN_OBJECT : CJ5_TOKEN_ARRAY;
373
3.61M
                token->start = parser.pos;
374
3.61M
                token->size = 0;
375
3.61M
                parser.curr_tok_idx = parser.token_count - 1; // The new curr_tok_idx
376
                                                              // is for this token
377
3.61M
            }
378
7.02M
            break;
379
380
6.79M
        case '}': // Close an object or array
381
7.00M
        case ']':
382
            // Check the nesting depth. Note that a "virtual root object" at
383
            // depth zero must not be closed.
384
7.00M
            if(depth == 0) {
385
153
                parser.error = CJ5_ERROR_INVALID;
386
153
                goto finish;
387
153
            }
388
389
            // Check and adjust the nesting. Note that ']' - '[' == 2 and '}' -
390
            // '{' == 2. Arrays can always be closed. Objects can only close
391
            // when a key or a comma is expected.
392
7.00M
            if(c - nesting[depth] != 2 ||
393
7.00M
               (c == '}' && next[depth] != 'k' && next[depth] != ',')) {
394
90
                parser.error = CJ5_ERROR_INVALID;
395
90
                goto finish;
396
90
            }
397
398
7.00M
            if(token) {
399
                // Finalize the current token
400
3.59M
                token->end = parser.pos;
401
402
                // Move to the parent and increase the parent size. Omit this
403
                // when we leave the root (parent the same as the current
404
                // token).
405
3.59M
                if(parser.curr_tok_idx != token->parent_id) {
406
3.55M
                    parser.curr_tok_idx = token->parent_id;
407
3.55M
                    token = &tokens[token->parent_id];
408
3.55M
                    token->size++;
409
3.55M
                }
410
3.59M
            }
411
412
            // Step one level up
413
7.00M
            depth--;
414
7.00M
            next[depth] = (depth == 0) ? 0 : ','; // zero if we step out the root
415
                                                  // object. then we do not look for
416
                                                  // another element.
417
418
            // The first element was successfully parsed. Stop early or try to
419
            // parse the full input string?
420
7.00M
            if(depth == 0 && parser.stop_early)
421
10.8k
                goto finish;
422
423
6.99M
            break;
424
425
19.0M
        case ':': // Colon (between key and value)
426
19.0M
            if(next[depth] != ':') {
427
20.5k
                parser.error = CJ5_ERROR_INVALID;
428
20.5k
                goto finish;
429
20.5k
            }
430
19.0M
            next[depth] = 'v';
431
19.0M
            break;
432
433
144M
        case ',': // Comma
434
144M
            if(next[depth] != ',') {
435
197
                parser.error = CJ5_ERROR_INVALID;
436
197
                goto finish;
437
197
            }
438
144M
            next[depth] = (nesting[depth] == '{') ? 'k' : 'v';
439
144M
            break;
440
441
165M
        default: // Value or key
442
165M
            if(next[depth] == 'v') {
443
145M
                cj5__parse_primitive(&parser); // Parse primitive value
444
145M
                if(nesting[depth] != 0) {
445
                    // Parent is object or array
446
140M
                    if(token)
447
132M
                        token->size++;
448
140M
                    next[depth] = ',';
449
140M
                } else {
450
                    // The current value was the root element. Don't look for
451
                    // any next element.
452
5.24M
                    next[depth] = 0;
453
454
                    // The first element was successfully parsed. Stop early or try to
455
                    // parse the full input string?
456
5.24M
                    if(parser.stop_early)
457
0
                        goto finish;
458
5.24M
                }
459
145M
            } else if(next[depth] == 'k') {
460
19.0M
                cj5__parse_key(&parser);
461
19.0M
                if(token)
462
15.1M
                    token->size++; // Keys count towards the length
463
19.0M
                next[depth] = ':';
464
19.0M
            } else {
465
3.62k
                parser.error = CJ5_ERROR_INVALID;
466
3.62k
            }
467
468
165M
            if(parser.error && parser.error != CJ5_ERROR_OVERFLOW)
469
8.83k
                goto finish;
470
471
165M
            break;
472
347M
        }
473
347M
    }
474
475
    // Are we back to the initial nesting depth?
476
5.26M
    if(depth != 0) {
477
1.13k
        parser.error = CJ5_ERROR_INCOMPLETE;
478
1.13k
        goto finish;
479
1.13k
    }
480
481
    // Close the virtual root object if there is one
482
5.26M
    if(nesting[0] == '{' && parser.error != CJ5_ERROR_OVERFLOW) {
483
        // Check the we end after a complete key-value pair (or dangling comma)
484
19.5k
        if(next[0] != 'k' && next[0] != ',')
485
525
            parser.error = CJ5_ERROR_INVALID;
486
19.5k
        tokens[0].end = parser.pos - 1;
487
19.5k
    }
488
489
5.30M
 finish:
490
    // If parsing failed at the initial nesting depth, create a virtual root object
491
    // and restart parsing.
492
5.30M
    if(parser.error != CJ5_ERROR_NONE &&
493
35.9k
       parser.error != CJ5_ERROR_OVERFLOW &&
494
32.7k
       depth == 0 && nesting[0] != '{') {
495
24.9k
        parser.token_count = 0;
496
24.9k
        token = cj5__alloc_token(&parser);
497
24.9k
        if(token) {
498
24.9k
            token->parent_id = 0;
499
24.9k
            token->type = CJ5_TOKEN_OBJECT;
500
24.9k
            token->start = 0;
501
24.9k
            token->size = 0;
502
503
24.9k
            nesting[0] = '{';
504
24.9k
            next[0] = 'k';
505
506
24.9k
            parser.curr_tok_idx = 0;
507
24.9k
            parser.pos = 0;
508
24.9k
            parser.error = CJ5_ERROR_NONE;
509
24.9k
            goto start_parsing;
510
24.9k
        }
511
24.9k
    }
512
513
5.28M
    memset(&r, 0x0, sizeof(r));
514
5.28M
    r.error = parser.error;
515
5.28M
    r.error_pos = parser.pos;
516
5.28M
    r.num_tokens = parser.token_count; // How many tokens (would) have been
517
                                       // consumed by the parser?
518
519
    // Not a single token was parsed -> return an error
520
5.28M
    if(r.num_tokens == 0)
521
344
        r.error = CJ5_ERROR_INCOMPLETE;
522
523
    // Set the tokens and original string only if successfully parsed
524
5.28M
    if(r.error == CJ5_ERROR_NONE) {
525
5.27M
        r.tokens = tokens;
526
5.27M
        r.json5 = json5;
527
5.27M
    }
528
529
5.28M
    return r;
530
5.30M
}
531
532
cj5_error_code
533
0
cj5_get_bool(const cj5_result *r, unsigned int tok_index, bool *out) {
534
0
    const cj5_token *token = &r->tokens[tok_index];
535
0
    if(token->type != CJ5_TOKEN_BOOL)
536
0
        return CJ5_ERROR_INVALID;
537
0
    *out = (r->json5[token->start] == 't');
538
0
    return CJ5_ERROR_NONE;
539
0
}
540
541
cj5_error_code
542
0
cj5_get_float(const cj5_result *r, unsigned int tok_index, double *out) {
543
0
    const cj5_token *token = &r->tokens[tok_index];
544
0
    if(token->type != CJ5_TOKEN_NUMBER)
545
0
        return CJ5_ERROR_INVALID;
546
547
0
    const char *tokstr = &r->json5[token->start];
548
0
    size_t toksize = token->end - token->start + 1;
549
0
    if(toksize == 0)
550
0
        return CJ5_ERROR_INVALID;
551
552
    // Skip prefixed +/-
553
0
    bool neg = false;
554
0
    if(tokstr[0] == '+' || tokstr[0] == '-') {
555
0
        neg = (tokstr[0] == '-');
556
0
        tokstr++;
557
0
        toksize--;
558
0
    }
559
560
    // Detect prefixed inf/nan
561
0
    if(strncmp(tokstr, "Infinity", toksize) == 0) {
562
0
        *out = neg ? -INFINITY : INFINITY;
563
0
        return CJ5_ERROR_NONE;
564
0
    } else if(strncmp(tokstr, "NaN", toksize) == 0) {
565
0
        *out = NAN;
566
0
        return CJ5_ERROR_NONE;
567
0
    }
568
569
    // reset the +/- detection and parse
570
0
    tokstr = &r->json5[token->start];
571
0
    toksize = token->end - token->start + 1;
572
0
    size_t parsed = parseDouble(tokstr, toksize, out);
573
574
    // There must only be whitespace between the end of the parsed number and
575
    // the end of the token
576
0
    for(size_t i = parsed; i < toksize; i++) {
577
0
        if(tokstr[i] != ' ' && tokstr[i] -'\t' >= 5)
578
0
            return CJ5_ERROR_INVALID;
579
0
    }
580
581
0
    return (parsed != 0) ? CJ5_ERROR_NONE : CJ5_ERROR_INVALID;
582
0
}
583
584
cj5_error_code
585
cj5_get_int(const cj5_result *r, unsigned int tok_index,
586
0
            int64_t *out) {
587
0
    const cj5_token *token = &r->tokens[tok_index];
588
0
    if(token->type != CJ5_TOKEN_NUMBER)
589
0
        return CJ5_ERROR_INVALID;
590
0
    size_t parsed = parseInt64(&r->json5[token->start], token->size, out);
591
0
    return (parsed != 0) ? CJ5_ERROR_NONE : CJ5_ERROR_INVALID;
592
0
}
593
594
cj5_error_code
595
cj5_get_uint(const cj5_result *r, unsigned int tok_index,
596
0
             uint64_t *out) {
597
0
    const cj5_token *token = &r->tokens[tok_index];
598
0
    if(token->type != CJ5_TOKEN_NUMBER)
599
0
        return CJ5_ERROR_INVALID;
600
0
    size_t parsed = parseUInt64(&r->json5[token->start], token->size, out);
601
0
    return (parsed != 0) ? CJ5_ERROR_NONE : CJ5_ERROR_INVALID;
602
0
}
603
604
static const uint32_t SURROGATE_OFFSET = 0x10000u - (0xD800u << 10) - 0xDC00;
605
606
static cj5_error_code
607
882k
parse_codepoint(const char *pos, uint32_t *out_utf) {
608
882k
    uint32_t utf = 0;
609
4.39M
    for(unsigned int i = 0; i < 4; i++) {
610
3.51M
        char byte = pos[i];
611
3.51M
        if(cj5__isnum(byte)) {
612
2.57M
            byte = (char)(byte - '0');
613
2.57M
        } else if(cj5__isrange(byte, 'a', 'f')) {
614
870k
            byte = (char)(byte - ('a' - 10));
615
870k
        } else if(cj5__isrange(byte, 'A', 'F')) {
616
62.2k
            byte = (char)(byte - ('A' - 10));
617
62.2k
        } else {
618
5.74k
            return CJ5_ERROR_INVALID;
619
5.74k
        }
620
3.51M
        utf = (utf << 4) | ((uint8_t)byte & 0xF);
621
3.51M
    }
622
876k
    *out_utf = utf;
623
876k
    return CJ5_ERROR_NONE;
624
882k
}
625
626
cj5_error_code
627
cj5_get_str(const cj5_result *r, unsigned int tok_index,
628
1.70M
            char *buf, unsigned int *buflen) {
629
1.70M
    const cj5_token *token = &r->tokens[tok_index];
630
1.70M
    if(token->type != CJ5_TOKEN_STRING)
631
0
        return CJ5_ERROR_INVALID;
632
633
1.70M
    const char *pos = &r->json5[token->start];
634
1.70M
    const char *end = &r->json5[token->end + 1];
635
1.70M
    unsigned int outpos = 0;
636
172M
    for(; pos < end; pos++) {
637
170M
        uint8_t c = (uint8_t)*pos;
638
        // Unprintable ascii characters must be escaped
639
170M
        if(c < ' ' || c == 127)
640
10.6k
            return CJ5_ERROR_INVALID;
641
642
        // Unescaped Ascii character or utf8 byte
643
170M
        if(c != '\\') {
644
161M
            buf[outpos++] = (char)c;
645
161M
            continue;
646
161M
        }
647
648
        // End of input before the escaped character
649
8.80M
        if(pos + 1 >= end)
650
9
            return CJ5_ERROR_INCOMPLETE;
651
652
        // Process escaped character
653
8.80M
        pos++;
654
8.80M
        c = (uint8_t)*pos;
655
8.80M
        switch(c) {
656
62.8k
        case 'b': buf[outpos++] = '\b'; break;
657
27.8k
        case 'f': buf[outpos++] = '\f'; break;
658
20.6k
        case 'r': buf[outpos++] = '\r'; break;
659
97.9k
        case 'n': buf[outpos++] = '\n'; break;
660
32.8k
        case 't': buf[outpos++] = '\t'; break;
661
7.69M
        default:  buf[outpos++] = (char)c; break;
662
872k
        case 'u': {
663
            // Parse a unicode code point
664
872k
            if(pos + 4 >= end)
665
609
                return CJ5_ERROR_INCOMPLETE;
666
871k
            pos++;
667
871k
            uint32_t utf;
668
871k
            cj5_error_code err = parse_codepoint(pos, &utf);
669
871k
            if(err != CJ5_ERROR_NONE)
670
5.43k
                return err;
671
866k
            pos += 3;
672
673
            // Parse a surrogate pair
674
866k
            if(0xd800 <= utf && utf <= 0xdfff) {
675
10.9k
                if(pos + 6 >= end)
676
86
                    return CJ5_ERROR_INVALID;
677
10.8k
                if(pos[1] != '\\' && pos[2] != 'u')
678
160
                    return CJ5_ERROR_INVALID;
679
10.6k
                pos += 3;
680
10.6k
                uint32_t utf2;
681
10.6k
                err = parse_codepoint(pos, &utf2);
682
10.6k
                if(err != CJ5_ERROR_NONE)
683
305
                    return err;
684
10.3k
                pos += 3;
685
                // High or low surrogate pair
686
10.3k
                utf = (utf <= 0xdbff) ?
687
6.92k
                    (utf << 10) + utf2 + SURROGATE_OFFSET :
688
10.3k
                    (utf2 << 10) + utf + SURROGATE_OFFSET;
689
10.3k
            }
690
691
            // Write the utf8 bytes of the code point
692
865k
            unsigned len = utf8_from_codepoint((unsigned char*)buf + outpos, utf);
693
865k
            if(len == 0)
694
326
                return CJ5_ERROR_INVALID; // Not a utf8 string
695
865k
            outpos += len;
696
865k
            break;
697
865k
        }
698
8.80M
        }
699
8.80M
    }
700
701
    // Terminate with \0
702
1.68M
    buf[outpos] = 0;
703
704
    // Set the output length
705
1.68M
    if(buflen)
706
1.68M
        *buflen = outpos;
707
1.68M
    return CJ5_ERROR_NONE;
708
1.70M
}
709
710
void
711
9.88k
cj5_skip(const cj5_result *r, unsigned int *tok_index) {
712
9.88k
    unsigned int idx = *tok_index;
713
9.88k
    unsigned int end = r->tokens[idx].end;
714
9.94k
    do { idx++; } while(idx < r->num_tokens &&
715
8.21k
                        r->tokens[idx].start < end);
716
9.88k
    *tok_index = idx;
717
9.88k
}
718
719
cj5_error_code
720
cj5_find(const cj5_result *r, unsigned int *tok_index,
721
0
         const char *key) {
722
    // It has to be an object
723
0
    unsigned int idx = *tok_index;
724
0
    if(r->tokens[idx].type != CJ5_TOKEN_OBJECT)
725
0
        return CJ5_ERROR_INVALID;
726
0
    unsigned int size = r->tokens[idx].size;
727
728
    // Skip to the first key
729
0
    idx++;
730
731
    // Size is number of keys + number of values
732
0
    for(unsigned int i = 0; i < size; i += 2) {
733
        // Key has to be a string
734
0
        if(r->tokens[idx].type != CJ5_TOKEN_STRING)
735
0
            return CJ5_ERROR_INVALID;
736
737
        // Return the index to the value if the key matches
738
0
        const char *keystart = &r->json5[r->tokens[idx].start];
739
0
        size_t keysize = r->tokens[idx].end - r->tokens[idx].start + 1;
740
0
        if(strncmp(key, keystart, keysize) == 0) {
741
0
            *tok_index = idx + 1;
742
0
            return CJ5_ERROR_NONE;
743
0
        }
744
745
        // Skip over the value
746
0
        idx++;
747
0
        cj5_skip(r, &idx);
748
0
    }
749
0
    return CJ5_ERROR_NOTFOUND;
750
0
}