/src/open62541/deps/cj5.c
Line | Count | Source (jump to first uncovered line) |
1 | | // MIT License |
2 | | // |
3 | | // Copyright (c) 2020 Sepehr Taghdisian |
4 | | // Copyright (c) 2022 Julius Pfrommer |
5 | | // |
6 | | // Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | // of this software and associated documentation files (the "Software"), to deal |
8 | | // in the Software without restriction, including without limitation the rights |
9 | | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | // copies of the Software, and to permit persons to whom the Software is |
11 | | // furnished to do so, subject to the following conditions: |
12 | | // |
13 | | // The above copyright notice and this permission notice shall be included in all |
14 | | // copies or substantial portions of the Software. |
15 | | // |
16 | | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | // SOFTWARE. |
23 | | |
24 | | #include "cj5.h" |
25 | | #include "parse_num.h" |
26 | | |
27 | | #include <math.h> |
28 | | #include <float.h> |
29 | | #include <string.h> |
30 | | |
31 | | #if defined(_MSC_VER) |
32 | | # define CJ5_INLINE __inline |
33 | | #else |
34 | | # define CJ5_INLINE inline |
35 | | #endif |
36 | | |
37 | | /* vs2008 does not have INFINITY and NAN defined */ |
38 | | #ifndef INFINITY |
39 | | # define INFINITY ((double)(DBL_MAX+DBL_MAX)) |
40 | | #endif |
41 | | #ifndef NAN |
42 | | # define NAN ((double)(INFINITY-INFINITY)) |
43 | | #endif |
44 | | |
45 | | #if defined(_MSC_VER) |
46 | | # pragma warning(disable: 4056) |
47 | | # pragma warning(disable: 4756) |
48 | | #endif |
49 | | |
50 | | /* Max nesting depth of objects and arrays */ |
51 | 0 | #define CJ5_MAX_NESTING 32 |
52 | | |
53 | | #define CJ5__FOURCC(_a, _b, _c, _d) \ |
54 | | (((uint32_t)(_a) | ((uint32_t)(_b) << 8) | \ |
55 | | ((uint32_t)(_c) << 16) | ((uint32_t)(_d) << 24))) |
56 | | |
57 | | static const uint32_t CJ5__NULL_FOURCC = CJ5__FOURCC('n', 'u', 'l', 'l'); |
58 | | static const uint32_t CJ5__TRUE_FOURCC = CJ5__FOURCC('t', 'r', 'u', 'e'); |
59 | | static const uint32_t CJ5__FALSE_FOURCC = CJ5__FOURCC('f', 'a', 'l', 's'); |
60 | | |
61 | | typedef struct { |
62 | | unsigned int pos; |
63 | | unsigned int line_start; |
64 | | unsigned int line; |
65 | | cj5_error_code error; |
66 | | |
67 | | const char *json5; |
68 | | unsigned int len; |
69 | | |
70 | | unsigned int curr_tok_idx; |
71 | | |
72 | | cj5_token *tokens; |
73 | | unsigned int token_count; |
74 | | unsigned int max_tokens; |
75 | | |
76 | | bool stop_early; |
77 | | } cj5__parser; |
78 | | |
79 | | static CJ5_INLINE bool |
80 | 0 | cj5__isrange(char ch, char from, char to) { |
81 | 0 | return (uint8_t)(ch - from) <= (uint8_t)(to - from); |
82 | 0 | } |
83 | | |
84 | 0 | #define cj5__isupperchar(ch) cj5__isrange(ch, 'A', 'Z') |
85 | 0 | #define cj5__islowerchar(ch) cj5__isrange(ch, 'a', 'z') |
86 | 0 | #define cj5__isnum(ch) cj5__isrange(ch, '0', '9') |
87 | | |
88 | | static cj5_token * |
89 | 0 | cj5__alloc_token(cj5__parser *parser) { |
90 | 0 | cj5_token* token = NULL; |
91 | 0 | if(parser->token_count < parser->max_tokens) { |
92 | 0 | token = &parser->tokens[parser->token_count]; |
93 | 0 | memset(token, 0x0, sizeof(cj5_token)); |
94 | 0 | } else { |
95 | 0 | parser->error = CJ5_ERROR_OVERFLOW; |
96 | 0 | } |
97 | | |
98 | | // Always increase the index. So we know eventually how many token would be |
99 | | // required (if there are not enough). |
100 | 0 | parser->token_count++; |
101 | 0 | return token; |
102 | 0 | } |
103 | | |
104 | | static void |
105 | 0 | cj5__parse_string(cj5__parser *parser) { |
106 | 0 | const char *json5 = parser->json5; |
107 | 0 | unsigned int len = parser->len; |
108 | 0 | unsigned int start = parser->pos; |
109 | 0 | char str_open = json5[start]; |
110 | |
|
111 | 0 | parser->pos++; |
112 | 0 | for(; parser->pos < len; parser->pos++) { |
113 | 0 | char c = json5[parser->pos]; |
114 | | |
115 | | // End of string |
116 | 0 | if(str_open == c) { |
117 | 0 | cj5_token *token = cj5__alloc_token(parser); |
118 | 0 | if(token) { |
119 | 0 | token->type = CJ5_TOKEN_STRING; |
120 | 0 | token->start = start + 1; |
121 | 0 | token->end = parser->pos - 1; |
122 | 0 | token->size = token->end - token->start + 1; |
123 | 0 | token->parent_id = parser->curr_tok_idx; |
124 | 0 | } |
125 | 0 | return; |
126 | 0 | } |
127 | | |
128 | | // Unescaped newlines are forbidden |
129 | 0 | if(c == '\n') { |
130 | 0 | parser->error = CJ5_ERROR_INVALID; |
131 | 0 | return; |
132 | 0 | } |
133 | | |
134 | | // Escape char |
135 | 0 | if(c == '\\') { |
136 | 0 | if(parser->pos + 1 >= len) { |
137 | 0 | parser->error = CJ5_ERROR_INCOMPLETE; |
138 | 0 | return; |
139 | 0 | } |
140 | 0 | parser->pos++; |
141 | 0 | switch(json5[parser->pos]) { |
142 | 0 | case '\"': |
143 | 0 | case '/': |
144 | 0 | case '\\': |
145 | 0 | case 'b': |
146 | 0 | case 'f': |
147 | 0 | case 'r': |
148 | 0 | case 'n': |
149 | 0 | case 't': |
150 | 0 | break; |
151 | 0 | case 'u': // The next four characters are an utf8 code |
152 | 0 | parser->pos++; |
153 | 0 | if(parser->pos + 4 >= len) { |
154 | 0 | parser->error = CJ5_ERROR_INVALID; |
155 | 0 | return; |
156 | 0 | } |
157 | 0 | for(unsigned int i = 0; i < 4; i++) { |
158 | | // If it isn't a hex character we have an error |
159 | 0 | if(!(json5[parser->pos] >= 48 && json5[parser->pos] <= 57) && /* 0-9 */ |
160 | 0 | !(json5[parser->pos] >= 65 && json5[parser->pos] <= 70) && /* A-F */ |
161 | 0 | !(json5[parser->pos] >= 97 && json5[parser->pos] <= 102)) /* a-f */ |
162 | 0 | { |
163 | 0 | parser->error = CJ5_ERROR_INVALID; |
164 | 0 | return; |
165 | 0 | } |
166 | 0 | parser->pos++; |
167 | 0 | } |
168 | 0 | parser->pos--; |
169 | 0 | break; |
170 | 0 | case '\n': // Escape break line |
171 | 0 | parser->line++; |
172 | 0 | parser->line_start = parser->pos; |
173 | 0 | break; |
174 | 0 | default: |
175 | 0 | parser->error = CJ5_ERROR_INVALID; |
176 | 0 | return; |
177 | 0 | } |
178 | 0 | } |
179 | 0 | } |
180 | | |
181 | | // The file has ended before the string terminates |
182 | 0 | parser->error = CJ5_ERROR_INCOMPLETE; |
183 | 0 | } |
184 | | |
185 | | // parser->pos is advanced a last time in the next iteration of the main |
186 | | // parse-loop. So we leave parse-primitive in a state where parse->pos points to |
187 | | // the last character of the primitive value (or the quote-character of the |
188 | | // string). |
189 | | static void |
190 | 0 | cj5__parse_primitive(cj5__parser* parser) { |
191 | 0 | const char* json5 = parser->json5; |
192 | 0 | unsigned int len = parser->len; |
193 | 0 | unsigned int start = parser->pos; |
194 | | |
195 | | // String value |
196 | 0 | if(json5[start] == '\"' || |
197 | 0 | json5[start] == '\'') { |
198 | 0 | cj5__parse_string(parser); |
199 | 0 | return; |
200 | 0 | } |
201 | | |
202 | | // Fast comparison of bool, and null. |
203 | | // We have to use memcpy here or we can get unaligned accesses |
204 | 0 | uint32_t fourcc = 0; |
205 | 0 | if(start + 4 < len) |
206 | 0 | memcpy(&fourcc, &json5[start], 4); |
207 | | |
208 | 0 | cj5_token_type type; |
209 | 0 | if(fourcc == CJ5__NULL_FOURCC) { |
210 | 0 | type = CJ5_TOKEN_NULL; |
211 | 0 | parser->pos += 3; |
212 | 0 | } else if(fourcc == CJ5__TRUE_FOURCC) { |
213 | 0 | type = CJ5_TOKEN_BOOL; |
214 | 0 | parser->pos += 3; |
215 | 0 | } else if(fourcc == CJ5__FALSE_FOURCC) { |
216 | | // "false" has five characters |
217 | 0 | type = CJ5_TOKEN_BOOL; |
218 | 0 | if(start + 4 >= len || json5[start+4] != 'e') { |
219 | 0 | parser->error = CJ5_ERROR_INVALID; |
220 | 0 | return; |
221 | 0 | } |
222 | 0 | parser->pos += 4; |
223 | 0 | } else { |
224 | | // Numbers are checked for basic compatibility. |
225 | | // But they are fully parsed only in the cj5_get_XXX functions. |
226 | 0 | type = CJ5_TOKEN_NUMBER; |
227 | 0 | for(; parser->pos < len; parser->pos++) { |
228 | 0 | if(!cj5__isnum(json5[parser->pos]) && |
229 | 0 | !(json5[parser->pos] == '.') && |
230 | 0 | !cj5__islowerchar(json5[parser->pos]) && |
231 | 0 | !cj5__isupperchar(json5[parser->pos]) && |
232 | 0 | !(json5[parser->pos] == '+') && !(json5[parser->pos] == '-')) { |
233 | 0 | break; |
234 | 0 | } |
235 | 0 | } |
236 | 0 | parser->pos--; // Point to the last character that is still inside the |
237 | | // primitive value |
238 | 0 | } |
239 | | |
240 | 0 | cj5_token *token = cj5__alloc_token(parser); |
241 | 0 | if(token) { |
242 | 0 | token->type = type; |
243 | 0 | token->start = start; |
244 | 0 | token->end = parser->pos; |
245 | 0 | token->size = parser->pos - start + 1; |
246 | 0 | token->parent_id = parser->curr_tok_idx; |
247 | 0 | } |
248 | 0 | } |
249 | | |
250 | | static void |
251 | 0 | cj5__parse_key(cj5__parser* parser) { |
252 | 0 | const char* json5 = parser->json5; |
253 | 0 | unsigned int start = parser->pos; |
254 | 0 | cj5_token* token; |
255 | | |
256 | | // Key is a a normal string |
257 | 0 | if(json5[start] == '\"' || json5[start] == '\'') { |
258 | 0 | cj5__parse_string(parser); |
259 | 0 | return; |
260 | 0 | } |
261 | | |
262 | | // An unquoted key. Must start with a-ZA-Z_$. Can contain numbers later on. |
263 | 0 | unsigned int len = parser->len; |
264 | 0 | for(; parser->pos < len; parser->pos++) { |
265 | 0 | if(cj5__islowerchar(json5[parser->pos]) || |
266 | 0 | cj5__isupperchar(json5[parser->pos]) || |
267 | 0 | json5[parser->pos] == '_' || json5[parser->pos] == '$') |
268 | 0 | continue; |
269 | 0 | if(cj5__isnum(json5[parser->pos]) && parser->pos != start) |
270 | 0 | continue; |
271 | 0 | break; |
272 | 0 | } |
273 | | |
274 | | // An empty key is not allowed |
275 | 0 | if(parser->pos <= start) { |
276 | 0 | parser->error = CJ5_ERROR_INVALID; |
277 | 0 | return; |
278 | 0 | } |
279 | | |
280 | | // Move pos to the last character within the unquoted key |
281 | 0 | parser->pos--; |
282 | |
|
283 | 0 | token = cj5__alloc_token(parser); |
284 | 0 | if(token) { |
285 | 0 | token->type = CJ5_TOKEN_STRING; |
286 | 0 | token->start = start; |
287 | 0 | token->end = parser->pos; |
288 | 0 | token->size = parser->pos - start + 1; |
289 | 0 | token->parent_id = parser->curr_tok_idx; |
290 | 0 | } |
291 | 0 | } |
292 | | |
293 | | static void |
294 | 0 | cj5__skip_comment(cj5__parser* parser) { |
295 | 0 | const char* json5 = parser->json5; |
296 | | |
297 | | // Single-line comment |
298 | 0 | if(json5[parser->pos] == '#') { |
299 | 0 | skip_line: |
300 | 0 | while(parser->pos < parser->len) { |
301 | 0 | if(json5[parser->pos] == '\n') { |
302 | 0 | parser->pos--; // Reparse the newline in the main parse loop |
303 | 0 | return; |
304 | 0 | } |
305 | 0 | parser->pos++; |
306 | 0 | } |
307 | 0 | return; |
308 | 0 | } |
309 | | |
310 | | // Comment begins with '/' but not enough space for another character |
311 | 0 | if(parser->pos + 1 >= parser->len) { |
312 | 0 | parser->error = CJ5_ERROR_INVALID; |
313 | 0 | return; |
314 | 0 | } |
315 | 0 | parser->pos++; |
316 | | |
317 | | // Comment begins with '//' -> single-line comment |
318 | 0 | if(json5[parser->pos] == '/') |
319 | 0 | goto skip_line; |
320 | | |
321 | | // Multi-line comments begin with '/*' and end with '*/' |
322 | 0 | if(json5[parser->pos] == '*') { |
323 | 0 | parser->pos++; |
324 | 0 | for(; parser->pos + 1 < parser->len; parser->pos++) { |
325 | 0 | if(json5[parser->pos] == '*' && json5[parser->pos + 1] == '/') { |
326 | 0 | parser->pos++; |
327 | 0 | return; |
328 | 0 | } |
329 | | // Remember we passed a newline |
330 | 0 | if(json5[parser->pos] == '\n') { |
331 | 0 | parser->line++; |
332 | 0 | parser->line_start = parser->pos; |
333 | 0 | } |
334 | 0 | } |
335 | 0 | } |
336 | | |
337 | | // Unknown comment type or the multi-line comment is not terminated |
338 | 0 | parser->error = CJ5_ERROR_INCOMPLETE; |
339 | 0 | } |
340 | | |
341 | | cj5_result |
342 | | cj5_parse(const char *json5, unsigned int len, |
343 | | cj5_token *tokens, unsigned int max_tokens, |
344 | 0 | cj5_options *options) { |
345 | 0 | cj5_result r; |
346 | 0 | cj5__parser parser; |
347 | 0 | memset(&parser, 0x0, sizeof(parser)); |
348 | 0 | parser.curr_tok_idx = 0; |
349 | 0 | parser.json5 = json5; |
350 | 0 | parser.len = len; |
351 | 0 | parser.tokens = tokens; |
352 | 0 | parser.max_tokens = max_tokens; |
353 | |
|
354 | 0 | if(options) |
355 | 0 | parser.stop_early = options->stop_early; |
356 | |
|
357 | 0 | unsigned short depth = 0; // Nesting depth zero means "outside the root object" |
358 | 0 | char nesting[CJ5_MAX_NESTING]; // Contains either '\0', '{' or '[' for the |
359 | | // type of nesting at each depth. '\0' |
360 | | // indicates we are out of the root object. |
361 | 0 | char next[CJ5_MAX_NESTING]; // Next content to parse: 'k' (key), ':', 'v' |
362 | | // (value) or ',' (comma). |
363 | 0 | next[0] = 'v'; // The root is a "value" (object, array or primitive). If we |
364 | | // detect a colon after the first value then everything is |
365 | | // wrapped into a "virtual root object" and the parsing is |
366 | | // restarted. |
367 | 0 | nesting[0] = 0; // Becomes '{' if there is a virtual root object |
368 | |
|
369 | 0 | cj5_token *token = NULL; // The current token |
370 | |
|
371 | 0 | start_parsing: |
372 | 0 | for(; parser.pos < len; parser.pos++) { |
373 | 0 | char c = json5[parser.pos]; |
374 | 0 | switch(c) { |
375 | 0 | case '\n': // Skip newline |
376 | 0 | parser.line++; |
377 | 0 | parser.line_start = parser.pos; |
378 | 0 | break; |
379 | | |
380 | 0 | case '\r': // Skip whitespace |
381 | 0 | case '\t': |
382 | 0 | case ' ': |
383 | 0 | break; |
384 | | |
385 | 0 | case '#': // Skip comment |
386 | 0 | case '/': |
387 | 0 | cj5__skip_comment(&parser); |
388 | 0 | if(parser.error != CJ5_ERROR_NONE && |
389 | 0 | parser.error != CJ5_ERROR_OVERFLOW) |
390 | 0 | goto finish; |
391 | 0 | break; |
392 | | |
393 | 0 | case '{': // Open an object or array |
394 | 0 | case '[': |
395 | | // Check the nesting depth |
396 | 0 | if(depth + 1 >= CJ5_MAX_NESTING) { |
397 | 0 | parser.error = CJ5_ERROR_INVALID; |
398 | 0 | goto finish; |
399 | 0 | } |
400 | | |
401 | | // Correct next? |
402 | 0 | if(next[depth] != 'v') { |
403 | 0 | parser.error = CJ5_ERROR_INVALID; |
404 | 0 | goto finish; |
405 | 0 | } |
406 | | |
407 | 0 | depth++; // Increase the nesting depth |
408 | 0 | nesting[depth] = c; // Set the nesting type |
409 | 0 | next[depth] = (c == '{') ? 'k' : 'v'; // next is either a key or a value |
410 | | |
411 | | // Create a token for the object or array |
412 | 0 | token = cj5__alloc_token(&parser); |
413 | 0 | if(token) { |
414 | 0 | token->parent_id = parser.curr_tok_idx; |
415 | 0 | token->type = (c == '{') ? CJ5_TOKEN_OBJECT : CJ5_TOKEN_ARRAY; |
416 | 0 | token->start = parser.pos; |
417 | 0 | token->size = 0; |
418 | 0 | parser.curr_tok_idx = parser.token_count - 1; // The new curr_tok_idx |
419 | | // is for this token |
420 | 0 | } |
421 | 0 | break; |
422 | | |
423 | 0 | case '}': // Close an object or array |
424 | 0 | case ']': |
425 | | // Check the nesting depth. Note that a "virtual root object" at |
426 | | // depth zero must not be closed. |
427 | 0 | if(depth == 0) { |
428 | 0 | parser.error = CJ5_ERROR_INVALID; |
429 | 0 | goto finish; |
430 | 0 | } |
431 | | |
432 | | // Check and adjust the nesting. Note that ']' - '[' == 2 and '}' - |
433 | | // '{' == 2. Arrays can always be closed. Objects can only close |
434 | | // when a key or a comma is expected. |
435 | 0 | if(c - nesting[depth] != 2 || |
436 | 0 | (c == '}' && next[depth] != 'k' && next[depth] != ',')) { |
437 | 0 | parser.error = CJ5_ERROR_INVALID; |
438 | 0 | goto finish; |
439 | 0 | } |
440 | | |
441 | 0 | if(token) { |
442 | | // Finalize the current token |
443 | 0 | token->end = parser.pos; |
444 | | |
445 | | // Move to the parent and increase the parent size. Omit this |
446 | | // when we leave the root (parent the same as the current |
447 | | // token). |
448 | 0 | if(parser.curr_tok_idx != token->parent_id) { |
449 | 0 | parser.curr_tok_idx = token->parent_id; |
450 | 0 | token = &tokens[token->parent_id]; |
451 | 0 | token->size++; |
452 | 0 | } |
453 | 0 | } |
454 | | |
455 | | // Step one level up |
456 | 0 | depth--; |
457 | 0 | next[depth] = (depth == 0) ? 0 : ','; // zero if we step out the root |
458 | | // object. then we do not look for |
459 | | // another element. |
460 | | |
461 | | // The first element was successfully parsed. Stop early or try to |
462 | | // parse the full input string? |
463 | 0 | if(depth == 0 && parser.stop_early) |
464 | 0 | goto finish; |
465 | | |
466 | 0 | break; |
467 | | |
468 | 0 | case ':': // Colon (between key and value) |
469 | 0 | if(next[depth] != ':') { |
470 | 0 | parser.error = CJ5_ERROR_INVALID; |
471 | 0 | goto finish; |
472 | 0 | } |
473 | 0 | next[depth] = 'v'; |
474 | 0 | break; |
475 | | |
476 | 0 | case ',': // Comma |
477 | 0 | if(next[depth] != ',') { |
478 | 0 | parser.error = CJ5_ERROR_INVALID; |
479 | 0 | goto finish; |
480 | 0 | } |
481 | 0 | next[depth] = (nesting[depth] == '{') ? 'k' : 'v'; |
482 | 0 | break; |
483 | | |
484 | 0 | default: // Value or key |
485 | 0 | if(next[depth] == 'v') { |
486 | 0 | cj5__parse_primitive(&parser); // Parse primitive value |
487 | 0 | if(nesting[depth] != 0) { |
488 | | // Parent is object or array |
489 | 0 | if(token) |
490 | 0 | token->size++; |
491 | 0 | next[depth] = ','; |
492 | 0 | } else { |
493 | | // The current value was the root element. Don't look for |
494 | | // any next element. |
495 | 0 | next[depth] = 0; |
496 | | |
497 | | // The first element was successfully parsed. Stop early or try to |
498 | | // parse the full input string? |
499 | 0 | if(parser.stop_early) |
500 | 0 | goto finish; |
501 | 0 | } |
502 | 0 | } else if(next[depth] == 'k') { |
503 | 0 | cj5__parse_key(&parser); |
504 | 0 | if(token) |
505 | 0 | token->size++; // Keys count towards the length |
506 | 0 | next[depth] = ':'; |
507 | 0 | } else { |
508 | 0 | parser.error = CJ5_ERROR_INVALID; |
509 | 0 | } |
510 | | |
511 | 0 | if(parser.error && parser.error != CJ5_ERROR_OVERFLOW) |
512 | 0 | goto finish; |
513 | | |
514 | 0 | break; |
515 | 0 | } |
516 | 0 | } |
517 | | |
518 | | // Are we back to the initial nesting depth? |
519 | 0 | if(depth != 0) { |
520 | 0 | parser.error = CJ5_ERROR_INCOMPLETE; |
521 | 0 | goto finish; |
522 | 0 | } |
523 | | |
524 | | // Close the virtual root object if there is one |
525 | 0 | if(nesting[0] == '{' && parser.error != CJ5_ERROR_OVERFLOW) { |
526 | | // Check the we end after a complete key-value pair (or dangling comma) |
527 | 0 | if(next[0] != 'k' && next[0] != ',') |
528 | 0 | parser.error = CJ5_ERROR_INVALID; |
529 | 0 | tokens[0].end = parser.pos - 1; |
530 | 0 | } |
531 | |
|
532 | 0 | finish: |
533 | | // If parsing failed at the initial nesting depth, create a virtual root object |
534 | | // and restart parsing. |
535 | 0 | if(parser.error != CJ5_ERROR_NONE && |
536 | 0 | parser.error != CJ5_ERROR_OVERFLOW && |
537 | 0 | depth == 0 && nesting[0] != '{') { |
538 | 0 | parser.token_count = 0; |
539 | 0 | token = cj5__alloc_token(&parser); |
540 | 0 | if(token) { |
541 | 0 | token->parent_id = 0; |
542 | 0 | token->type = CJ5_TOKEN_OBJECT; |
543 | 0 | token->start = 0; |
544 | 0 | token->size = 0; |
545 | |
|
546 | 0 | nesting[0] = '{'; |
547 | 0 | next[0] = 'k'; |
548 | |
|
549 | 0 | parser.curr_tok_idx = 0; |
550 | 0 | parser.pos = 0; |
551 | 0 | parser.error = CJ5_ERROR_NONE; |
552 | 0 | goto start_parsing; |
553 | 0 | } |
554 | 0 | } |
555 | | |
556 | 0 | memset(&r, 0x0, sizeof(r)); |
557 | 0 | r.error = parser.error; |
558 | 0 | r.error_line = parser.line; |
559 | 0 | r.error_col = parser.pos - parser.line_start; |
560 | 0 | r.num_tokens = parser.token_count; // How many tokens (would) have been |
561 | | // consumed by the parser? |
562 | | |
563 | | // Not a single token was parsed -> return an error |
564 | 0 | if(r.num_tokens == 0) |
565 | 0 | r.error = CJ5_ERROR_INCOMPLETE; |
566 | | |
567 | | // Set the tokens and original string only if successfully parsed |
568 | 0 | if(r.error == CJ5_ERROR_NONE) { |
569 | 0 | r.tokens = tokens; |
570 | 0 | r.json5 = json5; |
571 | 0 | } |
572 | |
|
573 | 0 | return r; |
574 | 0 | } |
575 | | |
576 | | cj5_error_code |
577 | 0 | cj5_get_bool(const cj5_result *r, unsigned int tok_index, bool *out) { |
578 | 0 | const cj5_token *token = &r->tokens[tok_index]; |
579 | 0 | if(token->type != CJ5_TOKEN_BOOL) |
580 | 0 | return CJ5_ERROR_INVALID; |
581 | 0 | *out = (r->json5[token->start] == 't'); |
582 | 0 | return CJ5_ERROR_NONE; |
583 | 0 | } |
584 | | |
585 | | cj5_error_code |
586 | 0 | cj5_get_float(const cj5_result *r, unsigned int tok_index, double *out) { |
587 | 0 | const cj5_token *token = &r->tokens[tok_index]; |
588 | 0 | if(token->type != CJ5_TOKEN_NUMBER) |
589 | 0 | return CJ5_ERROR_INVALID; |
590 | | |
591 | 0 | const char *tokstr = &r->json5[token->start]; |
592 | 0 | size_t toksize = token->end - token->start + 1; |
593 | 0 | if(toksize == 0) |
594 | 0 | return CJ5_ERROR_INVALID; |
595 | | |
596 | | // Skip prefixed +/- |
597 | 0 | bool neg = false; |
598 | 0 | if(tokstr[0] == '+' || tokstr[0] == '-') { |
599 | 0 | neg = (tokstr[0] == '-'); |
600 | 0 | tokstr++; |
601 | 0 | toksize--; |
602 | 0 | } |
603 | | |
604 | | // Detect prefixed inf/nan |
605 | 0 | if(strncmp(tokstr, "Infinity", toksize) == 0) { |
606 | 0 | *out = neg ? -INFINITY : INFINITY; |
607 | 0 | return CJ5_ERROR_NONE; |
608 | 0 | } else if(strncmp(tokstr, "NaN", toksize) == 0) { |
609 | 0 | *out = NAN; |
610 | 0 | return CJ5_ERROR_NONE; |
611 | 0 | } |
612 | | |
613 | | // reset the +/- detection and parse |
614 | 0 | tokstr = &r->json5[token->start]; |
615 | 0 | toksize = token->end - token->start + 1; |
616 | 0 | size_t parsed = parseDouble(tokstr, toksize, out); |
617 | | |
618 | | // There must only be whitespace between the end of the parsed number and |
619 | | // the end of the token |
620 | 0 | for(size_t i = parsed; i < toksize; i++) { |
621 | 0 | if(tokstr[i] != ' ' && tokstr[i] -'\t' >= 5) |
622 | 0 | return CJ5_ERROR_INVALID; |
623 | 0 | } |
624 | | |
625 | 0 | return (parsed != 0) ? CJ5_ERROR_NONE : CJ5_ERROR_INVALID; |
626 | 0 | } |
627 | | |
628 | | cj5_error_code |
629 | | cj5_get_int(const cj5_result *r, unsigned int tok_index, |
630 | 0 | int64_t *out) { |
631 | 0 | const cj5_token *token = &r->tokens[tok_index]; |
632 | 0 | if(token->type != CJ5_TOKEN_NUMBER) |
633 | 0 | return CJ5_ERROR_INVALID; |
634 | 0 | size_t parsed = parseInt64(&r->json5[token->start], token->size, out); |
635 | 0 | return (parsed != 0) ? CJ5_ERROR_NONE : CJ5_ERROR_INVALID; |
636 | 0 | } |
637 | | |
638 | | cj5_error_code |
639 | | cj5_get_uint(const cj5_result *r, unsigned int tok_index, |
640 | 0 | uint64_t *out) { |
641 | 0 | const cj5_token *token = &r->tokens[tok_index]; |
642 | 0 | if(token->type != CJ5_TOKEN_NUMBER) |
643 | 0 | return CJ5_ERROR_INVALID; |
644 | 0 | size_t parsed = parseUInt64(&r->json5[token->start], token->size, out); |
645 | 0 | return (parsed != 0) ? CJ5_ERROR_NONE : CJ5_ERROR_INVALID; |
646 | 0 | } |
647 | | |
648 | | static const uint32_t SURROGATE_OFFSET = 0x10000u - (0xD800u << 10) - 0xDC00; |
649 | | |
650 | | static cj5_error_code |
651 | 0 | parse_codepoint(const char *pos, uint32_t *out_utf) { |
652 | 0 | uint32_t utf = 0; |
653 | 0 | for(unsigned int i = 0; i < 4; i++) { |
654 | 0 | char byte = pos[i]; |
655 | 0 | if(cj5__isnum(byte)) { |
656 | 0 | byte = (char)(byte - '0'); |
657 | 0 | } else if(cj5__isrange(byte, 'a', 'f')) { |
658 | 0 | byte = (char)(byte - ('a' - 10)); |
659 | 0 | } else if(cj5__isrange(byte, 'A', 'F')) { |
660 | 0 | byte = (char)(byte - ('A' - 10)); |
661 | 0 | } else { |
662 | 0 | return CJ5_ERROR_INVALID; |
663 | 0 | } |
664 | 0 | utf = (utf << 4) | ((uint8_t)byte & 0xF); |
665 | 0 | } |
666 | 0 | *out_utf = utf; |
667 | 0 | return CJ5_ERROR_NONE; |
668 | 0 | } |
669 | | |
670 | | cj5_error_code |
671 | | cj5_get_str(const cj5_result *r, unsigned int tok_index, |
672 | 0 | char *buf, unsigned int *buflen) { |
673 | 0 | const cj5_token *token = &r->tokens[tok_index]; |
674 | 0 | if(token->type != CJ5_TOKEN_STRING) |
675 | 0 | return CJ5_ERROR_INVALID; |
676 | | |
677 | 0 | const char *pos = &r->json5[token->start]; |
678 | 0 | const char *end = &r->json5[token->end + 1]; |
679 | 0 | unsigned int outpos = 0; |
680 | 0 | for(; pos < end; pos++) { |
681 | 0 | uint8_t c = (uint8_t)*pos; |
682 | | |
683 | | // Process an escape character |
684 | 0 | if(c == '\\') { |
685 | 0 | if(pos + 1 >= end) |
686 | 0 | return CJ5_ERROR_INCOMPLETE; |
687 | 0 | pos++; |
688 | 0 | c = (uint8_t)*pos; |
689 | 0 | switch(c) { |
690 | 0 | case '\"': buf[outpos++] = '\"'; break; |
691 | 0 | case '\\': buf[outpos++] = '\\'; break; |
692 | 0 | case '\n': buf[outpos++] = '\n'; break; // escape newline |
693 | 0 | case '/': buf[outpos++] = '/'; break; |
694 | 0 | case 'b': buf[outpos++] = '\b'; break; |
695 | 0 | case 'f': buf[outpos++] = '\f'; break; |
696 | 0 | case 'r': buf[outpos++] = '\r'; break; |
697 | 0 | case 'n': buf[outpos++] = '\n'; break; |
698 | 0 | case 't': buf[outpos++] = '\t'; break; |
699 | 0 | case 'u': { |
700 | | // Parse the unicode code point |
701 | 0 | if(pos + 4 >= end) |
702 | 0 | return CJ5_ERROR_INCOMPLETE; |
703 | 0 | pos++; |
704 | 0 | uint32_t utf; |
705 | 0 | cj5_error_code err = parse_codepoint(pos, &utf); |
706 | 0 | if(err != CJ5_ERROR_NONE) |
707 | 0 | return err; |
708 | 0 | pos += 3; |
709 | |
|
710 | 0 | if(0xD800 <= utf && utf <= 0xDBFF) { |
711 | | // Parse a surrogate pair |
712 | 0 | if(pos + 6 >= end) |
713 | 0 | return CJ5_ERROR_INVALID; |
714 | 0 | if(pos[1] != '\\' && pos[3] != 'u') |
715 | 0 | return CJ5_ERROR_INVALID; |
716 | 0 | pos += 3; |
717 | 0 | uint32_t trail; |
718 | 0 | err = parse_codepoint(pos, &trail); |
719 | 0 | if(err != CJ5_ERROR_NONE) |
720 | 0 | return err; |
721 | 0 | pos += 3; |
722 | 0 | utf = (utf << 10) + trail + SURROGATE_OFFSET; |
723 | 0 | } else if(0xDC00 <= utf && utf <= 0xDFFF) { |
724 | | // Invalid Unicode '\\u%04X' |
725 | 0 | return CJ5_ERROR_INVALID; |
726 | 0 | } |
727 | | |
728 | | // Write the utf8 bytes of the code point |
729 | 0 | if(utf <= 0x7F) { // Plain ASCII |
730 | 0 | buf[outpos++] = (char)utf; |
731 | 0 | } else if(utf <= 0x07FF) { // 2-byte unicode |
732 | 0 | buf[outpos++] = (char)(((utf >> 6) & 0x1F) | 0xC0); |
733 | 0 | buf[outpos++] = (char)(((utf >> 0) & 0x3F) | 0x80); |
734 | 0 | } else if(utf <= 0xFFFF) { // 3-byte unicode |
735 | 0 | buf[outpos++] = (char)(((utf >> 12) & 0x0F) | 0xE0); |
736 | 0 | buf[outpos++] = (char)(((utf >> 6) & 0x3F) | 0x80); |
737 | 0 | buf[outpos++] = (char)(((utf >> 0) & 0x3F) | 0x80); |
738 | 0 | } else if(utf <= 0x10FFFF) { // 4-byte unicode |
739 | 0 | buf[outpos++] = (char)(((utf >> 18) & 0x07) | 0xF0); |
740 | 0 | buf[outpos++] = (char)(((utf >> 12) & 0x3F) | 0x80); |
741 | 0 | buf[outpos++] = (char)(((utf >> 6) & 0x3F) | 0x80); |
742 | 0 | buf[outpos++] = (char)(((utf >> 0) & 0x3F) | 0x80); |
743 | 0 | } else { |
744 | 0 | return CJ5_ERROR_INVALID; // Not a utf8 string |
745 | 0 | } |
746 | 0 | break; |
747 | 0 | } |
748 | 0 | default: |
749 | 0 | return CJ5_ERROR_INVALID; |
750 | 0 | } |
751 | 0 | continue; |
752 | 0 | } |
753 | | |
754 | | // Unprintable ascii characters must be escaped. JSON5 allows nested |
755 | | // quotes if the quote character is not the same as the surrounding |
756 | | // quote character, e.g. 'this is my "quote"'. This logic is in the |
757 | | // token parsing code and not in this "string extraction" method. |
758 | 0 | if(c < ' ' || c == 127) |
759 | 0 | return CJ5_ERROR_INVALID; |
760 | | |
761 | | // Ascii character or utf8 byte |
762 | 0 | buf[outpos++] = (char)c; |
763 | 0 | } |
764 | | |
765 | | // Terminate with \0 |
766 | 0 | buf[outpos] = 0; |
767 | | |
768 | | // Set the output length |
769 | 0 | if(buflen) |
770 | 0 | *buflen = outpos; |
771 | 0 | return CJ5_ERROR_NONE; |
772 | 0 | } |
773 | | |
774 | | void |
775 | 0 | cj5_skip(const cj5_result *r, unsigned int *tok_index) { |
776 | 0 | unsigned int idx = *tok_index; |
777 | 0 | unsigned int end = r->tokens[idx].end; |
778 | 0 | do { idx++; } while(idx < r->num_tokens && |
779 | 0 | r->tokens[idx].start < end); |
780 | 0 | *tok_index = idx; |
781 | 0 | } |
782 | | |
783 | | cj5_error_code |
784 | | cj5_find(const cj5_result *r, unsigned int *tok_index, |
785 | 0 | const char *key) { |
786 | | // It has to be an object |
787 | 0 | unsigned int idx = *tok_index; |
788 | 0 | if(r->tokens[idx].type != CJ5_TOKEN_OBJECT) |
789 | 0 | return CJ5_ERROR_INVALID; |
790 | 0 | unsigned int size = r->tokens[idx].size; |
791 | | |
792 | | // Skip to the first key |
793 | 0 | idx++; |
794 | | |
795 | | // Size is number of keys + number of values |
796 | 0 | for(unsigned int i = 0; i < size; i += 2) { |
797 | | // Key has to be a string |
798 | 0 | if(r->tokens[idx].type != CJ5_TOKEN_STRING) |
799 | 0 | return CJ5_ERROR_INVALID; |
800 | | |
801 | | // Return the index to the value if the key matches |
802 | 0 | const char *keystart = &r->json5[r->tokens[idx].start]; |
803 | 0 | size_t keysize = r->tokens[idx].end - r->tokens[idx].start + 1; |
804 | 0 | if(strncmp(key, keystart, keysize) == 0) { |
805 | 0 | *tok_index = idx + 1; |
806 | 0 | return CJ5_ERROR_NONE; |
807 | 0 | } |
808 | | |
809 | | // Skip over the value |
810 | 0 | idx++; |
811 | 0 | cj5_skip(r, &idx); |
812 | 0 | } |
813 | 0 | return CJ5_ERROR_NOTFOUND; |
814 | 0 | } |