/src/open62541/deps/cj5.c
Line | Count | Source (jump to first uncovered line) |
1 | | // MIT License |
2 | | // |
3 | | // Copyright (c) 2020 Sepehr Taghdisian |
4 | | // Copyright (c) 2022, 2024 Julius Pfrommer |
5 | | // |
6 | | // Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | // of this software and associated documentation files (the "Software"), to deal |
8 | | // in the Software without restriction, including without limitation the rights |
9 | | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | // copies of the Software, and to permit persons to whom the Software is |
11 | | // furnished to do so, subject to the following conditions: |
12 | | // |
13 | | // The above copyright notice and this permission notice shall be included in all |
14 | | // copies or substantial portions of the Software. |
15 | | // |
16 | | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | // SOFTWARE. |
23 | | |
24 | | #include "cj5.h" |
25 | | #include "parse_num.h" |
26 | | #include "utf8.h" |
27 | | |
28 | | #include <math.h> |
29 | | #include <float.h> |
30 | | #include <string.h> |
31 | | |
32 | | #if defined(_MSC_VER) |
33 | | # define CJ5_INLINE __inline |
34 | | #else |
35 | | # define CJ5_INLINE inline |
36 | | #endif |
37 | | |
38 | | /* vs2008 does not have INFINITY and NAN defined */ |
39 | | #ifndef INFINITY |
40 | | # define INFINITY ((double)(DBL_MAX+DBL_MAX)) |
41 | | #endif |
42 | | #ifndef NAN |
43 | | # define NAN ((double)(INFINITY-INFINITY)) |
44 | | #endif |
45 | | |
46 | | #if defined(_MSC_VER) |
47 | | # pragma warning(disable: 4056) |
48 | | # pragma warning(disable: 4756) |
49 | | #endif |
50 | | |
51 | | /* Max nesting depth of objects and arrays */ |
52 | 0 | #define CJ5_MAX_NESTING 32 |
53 | | |
54 | | #define CJ5__FOURCC(_a, _b, _c, _d) \ |
55 | | (((uint32_t)(_a) | ((uint32_t)(_b) << 8) | \ |
56 | | ((uint32_t)(_c) << 16) | ((uint32_t)(_d) << 24))) |
57 | | |
58 | | static const uint32_t CJ5__NULL_FOURCC = CJ5__FOURCC('n', 'u', 'l', 'l'); |
59 | | static const uint32_t CJ5__TRUE_FOURCC = CJ5__FOURCC('t', 'r', 'u', 'e'); |
60 | | static const uint32_t CJ5__FALSE_FOURCC = CJ5__FOURCC('f', 'a', 'l', 's'); |
61 | | |
62 | | typedef struct { |
63 | | unsigned int pos; |
64 | | cj5_error_code error; |
65 | | |
66 | | const char *json5; |
67 | | unsigned int len; |
68 | | |
69 | | unsigned int curr_tok_idx; |
70 | | |
71 | | cj5_token *tokens; |
72 | | unsigned int token_count; |
73 | | unsigned int max_tokens; |
74 | | |
75 | | bool stop_early; |
76 | | } cj5__parser; |
77 | | |
78 | | static CJ5_INLINE bool |
79 | 0 | cj5__isrange(char ch, char from, char to) { |
80 | 0 | return (uint8_t)(ch - from) <= (uint8_t)(to - from); |
81 | 0 | } |
82 | | |
83 | 0 | #define cj5__isupperchar(ch) cj5__isrange(ch, 'A', 'Z') |
84 | 0 | #define cj5__islowerchar(ch) cj5__isrange(ch, 'a', 'z') |
85 | 0 | #define cj5__isnum(ch) cj5__isrange(ch, '0', '9') |
86 | | |
87 | | static cj5_token * |
88 | 0 | cj5__alloc_token(cj5__parser *parser) { |
89 | 0 | cj5_token* token = NULL; |
90 | 0 | if(parser->token_count < parser->max_tokens) { |
91 | 0 | token = &parser->tokens[parser->token_count]; |
92 | 0 | memset(token, 0x0, sizeof(cj5_token)); |
93 | 0 | } else { |
94 | 0 | parser->error = CJ5_ERROR_OVERFLOW; |
95 | 0 | } |
96 | | |
97 | | // Always increase the index. So we know eventually how many token would be |
98 | | // required (if there are not enough). |
99 | 0 | parser->token_count++; |
100 | 0 | return token; |
101 | 0 | } |
102 | | |
103 | | static void |
104 | 0 | cj5__parse_string(cj5__parser *parser) { |
105 | 0 | const char *json5 = parser->json5; |
106 | 0 | unsigned int len = parser->len; |
107 | 0 | unsigned int start = parser->pos; |
108 | 0 | char str_open = json5[start]; |
109 | |
|
110 | 0 | parser->pos++; |
111 | 0 | for(; parser->pos < len; parser->pos++) { |
112 | 0 | char c = json5[parser->pos]; |
113 | | |
114 | | // End of string |
115 | 0 | if(str_open == c) { |
116 | 0 | cj5_token *token = cj5__alloc_token(parser); |
117 | 0 | if(token) { |
118 | 0 | token->type = CJ5_TOKEN_STRING; |
119 | 0 | token->start = start + 1; |
120 | 0 | token->end = parser->pos - 1; |
121 | 0 | token->size = token->end - token->start + 1; |
122 | 0 | token->parent_id = parser->curr_tok_idx; |
123 | 0 | } |
124 | 0 | return; |
125 | 0 | } |
126 | | |
127 | | // Unescaped newlines are forbidden |
128 | 0 | if(c == '\n') { |
129 | 0 | parser->error = CJ5_ERROR_INVALID; |
130 | 0 | return; |
131 | 0 | } |
132 | | |
133 | | // Skip escape character |
134 | 0 | if(c == '\\') { |
135 | 0 | if(parser->pos + 1 >= len) { |
136 | 0 | parser->error = CJ5_ERROR_INCOMPLETE; |
137 | 0 | return; |
138 | 0 | } |
139 | 0 | parser->pos++; |
140 | 0 | } |
141 | 0 | } |
142 | | |
143 | | // The file has ended before the string terminates |
144 | 0 | parser->error = CJ5_ERROR_INCOMPLETE; |
145 | 0 | } |
146 | | |
147 | | // parser->pos is advanced a last time in the next iteration of the main |
148 | | // parse-loop. So we leave parse-primitive in a state where parse->pos points to |
149 | | // the last character of the primitive value (or the quote-character of the |
150 | | // string). |
151 | | static void |
152 | 0 | cj5__parse_primitive(cj5__parser* parser) { |
153 | 0 | const char* json5 = parser->json5; |
154 | 0 | unsigned int len = parser->len; |
155 | 0 | unsigned int start = parser->pos; |
156 | | |
157 | | // String value |
158 | 0 | if(json5[start] == '\"' || |
159 | 0 | json5[start] == '\'') { |
160 | 0 | cj5__parse_string(parser); |
161 | 0 | return; |
162 | 0 | } |
163 | | |
164 | | // Fast comparison of bool, and null. |
165 | | // Make the comparison case-insensitive. |
166 | 0 | uint32_t fourcc = 0; |
167 | 0 | if(start + 3 < len) { |
168 | 0 | fourcc += json5[start] | 32; |
169 | 0 | fourcc += (json5[start+1] | 32) << 8; |
170 | 0 | fourcc += (json5[start+2] | 32) << 16; |
171 | 0 | fourcc += (json5[start+3] | 32) << 24; |
172 | 0 | } |
173 | | |
174 | 0 | cj5_token_type type; |
175 | 0 | if(fourcc == CJ5__NULL_FOURCC) { |
176 | 0 | type = CJ5_TOKEN_NULL; |
177 | 0 | parser->pos += 3; |
178 | 0 | } else if(fourcc == CJ5__TRUE_FOURCC) { |
179 | 0 | type = CJ5_TOKEN_BOOL; |
180 | 0 | parser->pos += 3; |
181 | 0 | } else if(fourcc == CJ5__FALSE_FOURCC) { |
182 | | // "false" has five characters |
183 | 0 | type = CJ5_TOKEN_BOOL; |
184 | 0 | if(start + 4 >= len || (json5[start+4] | 32) != 'e') { |
185 | 0 | parser->error = CJ5_ERROR_INVALID; |
186 | 0 | return; |
187 | 0 | } |
188 | 0 | parser->pos += 4; |
189 | 0 | } else { |
190 | | // Numbers are checked for basic compatibility. |
191 | | // But they are fully parsed only in the cj5_get_XXX functions. |
192 | 0 | type = CJ5_TOKEN_NUMBER; |
193 | 0 | for(; parser->pos < len; parser->pos++) { |
194 | 0 | if(!cj5__isnum(json5[parser->pos]) && |
195 | 0 | !(json5[parser->pos] == '.') && |
196 | 0 | !cj5__islowerchar(json5[parser->pos]) && |
197 | 0 | !cj5__isupperchar(json5[parser->pos]) && |
198 | 0 | !(json5[parser->pos] == '+') && !(json5[parser->pos] == '-')) { |
199 | 0 | break; |
200 | 0 | } |
201 | 0 | } |
202 | 0 | parser->pos--; // Point to the last character that is still inside the |
203 | | // primitive value |
204 | 0 | } |
205 | | |
206 | 0 | cj5_token *token = cj5__alloc_token(parser); |
207 | 0 | if(token) { |
208 | 0 | token->type = type; |
209 | 0 | token->start = start; |
210 | 0 | token->end = parser->pos; |
211 | 0 | token->size = parser->pos - start + 1; |
212 | 0 | token->parent_id = parser->curr_tok_idx; |
213 | 0 | } |
214 | 0 | } |
215 | | |
216 | | static void |
217 | 0 | cj5__parse_key(cj5__parser* parser) { |
218 | 0 | const char* json5 = parser->json5; |
219 | 0 | unsigned int start = parser->pos; |
220 | 0 | cj5_token* token; |
221 | | |
222 | | // Key is a a normal string |
223 | 0 | if(json5[start] == '\"' || json5[start] == '\'') { |
224 | 0 | cj5__parse_string(parser); |
225 | 0 | return; |
226 | 0 | } |
227 | | |
228 | | // An unquoted key. Must start with a-ZA-Z_$. Can contain numbers later on. |
229 | 0 | unsigned int len = parser->len; |
230 | 0 | for(; parser->pos < len; parser->pos++) { |
231 | 0 | if(cj5__islowerchar(json5[parser->pos]) || |
232 | 0 | cj5__isupperchar(json5[parser->pos]) || |
233 | 0 | json5[parser->pos] == '_' || json5[parser->pos] == '$') |
234 | 0 | continue; |
235 | 0 | if(cj5__isnum(json5[parser->pos]) && parser->pos != start) |
236 | 0 | continue; |
237 | 0 | break; |
238 | 0 | } |
239 | | |
240 | | // An empty key is not allowed |
241 | 0 | if(parser->pos <= start) { |
242 | 0 | parser->error = CJ5_ERROR_INVALID; |
243 | 0 | return; |
244 | 0 | } |
245 | | |
246 | | // Move pos to the last character within the unquoted key |
247 | 0 | parser->pos--; |
248 | |
|
249 | 0 | token = cj5__alloc_token(parser); |
250 | 0 | if(token) { |
251 | 0 | token->type = CJ5_TOKEN_STRING; |
252 | 0 | token->start = start; |
253 | 0 | token->end = parser->pos; |
254 | 0 | token->size = parser->pos - start + 1; |
255 | 0 | token->parent_id = parser->curr_tok_idx; |
256 | 0 | } |
257 | 0 | } |
258 | | |
259 | | static void |
260 | 0 | cj5__skip_comment(cj5__parser* parser) { |
261 | 0 | const char* json5 = parser->json5; |
262 | | |
263 | | // Single-line comment |
264 | 0 | if(json5[parser->pos] == '#') { |
265 | 0 | skip_line: |
266 | 0 | while(parser->pos < parser->len) { |
267 | 0 | if(json5[parser->pos] == '\n') { |
268 | 0 | parser->pos--; // Reparse the newline in the main parse loop |
269 | 0 | return; |
270 | 0 | } |
271 | 0 | parser->pos++; |
272 | 0 | } |
273 | 0 | return; |
274 | 0 | } |
275 | | |
276 | | // Comment begins with '/' but not enough space for another character |
277 | 0 | if(parser->pos + 1 >= parser->len) { |
278 | 0 | parser->error = CJ5_ERROR_INVALID; |
279 | 0 | return; |
280 | 0 | } |
281 | 0 | parser->pos++; |
282 | | |
283 | | // Comment begins with '//' -> single-line comment |
284 | 0 | if(json5[parser->pos] == '/') |
285 | 0 | goto skip_line; |
286 | | |
287 | | // Multi-line comments begin with '/*' and end with '*/' |
288 | 0 | if(json5[parser->pos] == '*') { |
289 | 0 | parser->pos++; |
290 | 0 | for(; parser->pos + 1 < parser->len; parser->pos++) { |
291 | 0 | if(json5[parser->pos] == '*' && json5[parser->pos + 1] == '/') { |
292 | 0 | parser->pos++; |
293 | 0 | return; |
294 | 0 | } |
295 | 0 | } |
296 | 0 | } |
297 | | |
298 | | // Unknown comment type or the multi-line comment is not terminated |
299 | 0 | parser->error = CJ5_ERROR_INCOMPLETE; |
300 | 0 | } |
301 | | |
302 | | cj5_result |
303 | | cj5_parse(const char *json5, unsigned int len, |
304 | | cj5_token *tokens, unsigned int max_tokens, |
305 | 0 | cj5_options *options) { |
306 | 0 | cj5_result r; |
307 | 0 | cj5__parser parser; |
308 | 0 | memset(&parser, 0x0, sizeof(parser)); |
309 | 0 | parser.curr_tok_idx = 0; |
310 | 0 | parser.json5 = json5; |
311 | 0 | parser.len = len; |
312 | 0 | parser.tokens = tokens; |
313 | 0 | parser.max_tokens = max_tokens; |
314 | |
|
315 | 0 | if(options) |
316 | 0 | parser.stop_early = options->stop_early; |
317 | |
|
318 | 0 | unsigned short depth = 0; // Nesting depth zero means "outside the root object" |
319 | 0 | char nesting[CJ5_MAX_NESTING]; // Contains either '\0', '{' or '[' for the |
320 | | // type of nesting at each depth. '\0' |
321 | | // indicates we are out of the root object. |
322 | 0 | char next[CJ5_MAX_NESTING]; // Next content to parse: 'k' (key), ':', 'v' |
323 | | // (value) or ',' (comma). |
324 | 0 | next[0] = 'v'; // The root is a "value" (object, array or primitive). If we |
325 | | // detect a colon after the first value then everything is |
326 | | // wrapped into a "virtual root object" and the parsing is |
327 | | // restarted. |
328 | 0 | nesting[0] = 0; // Becomes '{' if there is a virtual root object |
329 | |
|
330 | 0 | cj5_token *token = NULL; // The current token |
331 | |
|
332 | 0 | start_parsing: |
333 | 0 | for(; parser.pos < len; parser.pos++) { |
334 | 0 | char c = json5[parser.pos]; |
335 | 0 | switch(c) { |
336 | 0 | case '\n': // Skip newline and whitespace |
337 | 0 | case '\r': |
338 | 0 | case '\t': |
339 | 0 | case ' ': |
340 | 0 | break; |
341 | | |
342 | 0 | case '#': // Skip comment |
343 | 0 | case '/': |
344 | 0 | cj5__skip_comment(&parser); |
345 | 0 | if(parser.error != CJ5_ERROR_NONE && |
346 | 0 | parser.error != CJ5_ERROR_OVERFLOW) |
347 | 0 | goto finish; |
348 | 0 | break; |
349 | | |
350 | 0 | case '{': // Open an object or array |
351 | 0 | case '[': |
352 | | // Check the nesting depth |
353 | 0 | if(depth + 1 >= CJ5_MAX_NESTING) { |
354 | 0 | parser.error = CJ5_ERROR_INVALID; |
355 | 0 | goto finish; |
356 | 0 | } |
357 | | |
358 | | // Correct next? |
359 | 0 | if(next[depth] != 'v') { |
360 | 0 | parser.error = CJ5_ERROR_INVALID; |
361 | 0 | goto finish; |
362 | 0 | } |
363 | | |
364 | 0 | depth++; // Increase the nesting depth |
365 | 0 | nesting[depth] = c; // Set the nesting type |
366 | 0 | next[depth] = (c == '{') ? 'k' : 'v'; // next is either a key or a value |
367 | | |
368 | | // Create a token for the object or array |
369 | 0 | token = cj5__alloc_token(&parser); |
370 | 0 | if(token) { |
371 | 0 | token->parent_id = parser.curr_tok_idx; |
372 | 0 | token->type = (c == '{') ? CJ5_TOKEN_OBJECT : CJ5_TOKEN_ARRAY; |
373 | 0 | token->start = parser.pos; |
374 | 0 | token->size = 0; |
375 | 0 | parser.curr_tok_idx = parser.token_count - 1; // The new curr_tok_idx |
376 | | // is for this token |
377 | 0 | } |
378 | 0 | break; |
379 | | |
380 | 0 | case '}': // Close an object or array |
381 | 0 | case ']': |
382 | | // Check the nesting depth. Note that a "virtual root object" at |
383 | | // depth zero must not be closed. |
384 | 0 | if(depth == 0) { |
385 | 0 | parser.error = CJ5_ERROR_INVALID; |
386 | 0 | goto finish; |
387 | 0 | } |
388 | | |
389 | | // Check and adjust the nesting. Note that ']' - '[' == 2 and '}' - |
390 | | // '{' == 2. Arrays can always be closed. Objects can only close |
391 | | // when a key or a comma is expected. |
392 | 0 | if(c - nesting[depth] != 2 || |
393 | 0 | (c == '}' && next[depth] != 'k' && next[depth] != ',')) { |
394 | 0 | parser.error = CJ5_ERROR_INVALID; |
395 | 0 | goto finish; |
396 | 0 | } |
397 | | |
398 | 0 | if(token) { |
399 | | // Finalize the current token |
400 | 0 | token->end = parser.pos; |
401 | | |
402 | | // Move to the parent and increase the parent size. Omit this |
403 | | // when we leave the root (parent the same as the current |
404 | | // token). |
405 | 0 | if(parser.curr_tok_idx != token->parent_id) { |
406 | 0 | parser.curr_tok_idx = token->parent_id; |
407 | 0 | token = &tokens[token->parent_id]; |
408 | 0 | token->size++; |
409 | 0 | } |
410 | 0 | } |
411 | | |
412 | | // Step one level up |
413 | 0 | depth--; |
414 | 0 | next[depth] = (depth == 0) ? 0 : ','; // zero if we step out the root |
415 | | // object. then we do not look for |
416 | | // another element. |
417 | | |
418 | | // The first element was successfully parsed. Stop early or try to |
419 | | // parse the full input string? |
420 | 0 | if(depth == 0 && parser.stop_early) |
421 | 0 | goto finish; |
422 | | |
423 | 0 | break; |
424 | | |
425 | 0 | case ':': // Colon (between key and value) |
426 | 0 | if(next[depth] != ':') { |
427 | 0 | parser.error = CJ5_ERROR_INVALID; |
428 | 0 | goto finish; |
429 | 0 | } |
430 | 0 | next[depth] = 'v'; |
431 | 0 | break; |
432 | | |
433 | 0 | case ',': // Comma |
434 | 0 | if(next[depth] != ',') { |
435 | 0 | parser.error = CJ5_ERROR_INVALID; |
436 | 0 | goto finish; |
437 | 0 | } |
438 | 0 | next[depth] = (nesting[depth] == '{') ? 'k' : 'v'; |
439 | 0 | break; |
440 | | |
441 | 0 | default: // Value or key |
442 | 0 | if(next[depth] == 'v') { |
443 | 0 | cj5__parse_primitive(&parser); // Parse primitive value |
444 | 0 | if(nesting[depth] != 0) { |
445 | | // Parent is object or array |
446 | 0 | if(token) |
447 | 0 | token->size++; |
448 | 0 | next[depth] = ','; |
449 | 0 | } else { |
450 | | // The current value was the root element. Don't look for |
451 | | // any next element. |
452 | 0 | next[depth] = 0; |
453 | | |
454 | | // The first element was successfully parsed. Stop early or try to |
455 | | // parse the full input string? |
456 | 0 | if(parser.stop_early) |
457 | 0 | goto finish; |
458 | 0 | } |
459 | 0 | } else if(next[depth] == 'k') { |
460 | 0 | cj5__parse_key(&parser); |
461 | 0 | if(token) |
462 | 0 | token->size++; // Keys count towards the length |
463 | 0 | next[depth] = ':'; |
464 | 0 | } else { |
465 | 0 | parser.error = CJ5_ERROR_INVALID; |
466 | 0 | } |
467 | | |
468 | 0 | if(parser.error && parser.error != CJ5_ERROR_OVERFLOW) |
469 | 0 | goto finish; |
470 | | |
471 | 0 | break; |
472 | 0 | } |
473 | 0 | } |
474 | | |
475 | | // Are we back to the initial nesting depth? |
476 | 0 | if(depth != 0) { |
477 | 0 | parser.error = CJ5_ERROR_INCOMPLETE; |
478 | 0 | goto finish; |
479 | 0 | } |
480 | | |
481 | | // Close the virtual root object if there is one |
482 | 0 | if(nesting[0] == '{' && parser.error != CJ5_ERROR_OVERFLOW) { |
483 | | // Check the we end after a complete key-value pair (or dangling comma) |
484 | 0 | if(next[0] != 'k' && next[0] != ',') |
485 | 0 | parser.error = CJ5_ERROR_INVALID; |
486 | 0 | tokens[0].end = parser.pos - 1; |
487 | 0 | } |
488 | |
|
489 | 0 | finish: |
490 | | // If parsing failed at the initial nesting depth, create a virtual root object |
491 | | // and restart parsing. |
492 | 0 | if(parser.error != CJ5_ERROR_NONE && |
493 | 0 | parser.error != CJ5_ERROR_OVERFLOW && |
494 | 0 | depth == 0 && nesting[0] != '{') { |
495 | 0 | parser.token_count = 0; |
496 | 0 | token = cj5__alloc_token(&parser); |
497 | 0 | if(token) { |
498 | 0 | token->parent_id = 0; |
499 | 0 | token->type = CJ5_TOKEN_OBJECT; |
500 | 0 | token->start = 0; |
501 | 0 | token->size = 0; |
502 | |
|
503 | 0 | nesting[0] = '{'; |
504 | 0 | next[0] = 'k'; |
505 | |
|
506 | 0 | parser.curr_tok_idx = 0; |
507 | 0 | parser.pos = 0; |
508 | 0 | parser.error = CJ5_ERROR_NONE; |
509 | 0 | goto start_parsing; |
510 | 0 | } |
511 | 0 | } |
512 | | |
513 | 0 | memset(&r, 0x0, sizeof(r)); |
514 | 0 | r.error = parser.error; |
515 | 0 | r.error_pos = parser.pos; |
516 | 0 | r.num_tokens = parser.token_count; // How many tokens (would) have been |
517 | | // consumed by the parser? |
518 | | |
519 | | // Not a single token was parsed -> return an error |
520 | 0 | if(r.num_tokens == 0) |
521 | 0 | r.error = CJ5_ERROR_INCOMPLETE; |
522 | | |
523 | | // Set the tokens and original string only if successfully parsed |
524 | 0 | if(r.error == CJ5_ERROR_NONE) { |
525 | 0 | r.tokens = tokens; |
526 | 0 | r.json5 = json5; |
527 | 0 | } |
528 | |
|
529 | 0 | return r; |
530 | 0 | } |
531 | | |
532 | | cj5_error_code |
533 | 0 | cj5_get_bool(const cj5_result *r, unsigned int tok_index, bool *out) { |
534 | 0 | const cj5_token *token = &r->tokens[tok_index]; |
535 | 0 | if(token->type != CJ5_TOKEN_BOOL) |
536 | 0 | return CJ5_ERROR_INVALID; |
537 | 0 | *out = (r->json5[token->start] == 't'); |
538 | 0 | return CJ5_ERROR_NONE; |
539 | 0 | } |
540 | | |
541 | | cj5_error_code |
542 | 0 | cj5_get_float(const cj5_result *r, unsigned int tok_index, double *out) { |
543 | 0 | const cj5_token *token = &r->tokens[tok_index]; |
544 | 0 | if(token->type != CJ5_TOKEN_NUMBER) |
545 | 0 | return CJ5_ERROR_INVALID; |
546 | | |
547 | 0 | const char *tokstr = &r->json5[token->start]; |
548 | 0 | size_t toksize = token->end - token->start + 1; |
549 | 0 | if(toksize == 0) |
550 | 0 | return CJ5_ERROR_INVALID; |
551 | | |
552 | | // Skip prefixed +/- |
553 | 0 | bool neg = false; |
554 | 0 | if(tokstr[0] == '+' || tokstr[0] == '-') { |
555 | 0 | neg = (tokstr[0] == '-'); |
556 | 0 | tokstr++; |
557 | 0 | toksize--; |
558 | 0 | } |
559 | | |
560 | | // Detect prefixed inf/nan |
561 | 0 | if(strncmp(tokstr, "Infinity", toksize) == 0) { |
562 | 0 | *out = neg ? -INFINITY : INFINITY; |
563 | 0 | return CJ5_ERROR_NONE; |
564 | 0 | } else if(strncmp(tokstr, "NaN", toksize) == 0) { |
565 | 0 | *out = NAN; |
566 | 0 | return CJ5_ERROR_NONE; |
567 | 0 | } |
568 | | |
569 | | // reset the +/- detection and parse |
570 | 0 | tokstr = &r->json5[token->start]; |
571 | 0 | toksize = token->end - token->start + 1; |
572 | 0 | size_t parsed = parseDouble(tokstr, toksize, out); |
573 | | |
574 | | // There must only be whitespace between the end of the parsed number and |
575 | | // the end of the token |
576 | 0 | for(size_t i = parsed; i < toksize; i++) { |
577 | 0 | if(tokstr[i] != ' ' && tokstr[i] -'\t' >= 5) |
578 | 0 | return CJ5_ERROR_INVALID; |
579 | 0 | } |
580 | | |
581 | 0 | return (parsed != 0) ? CJ5_ERROR_NONE : CJ5_ERROR_INVALID; |
582 | 0 | } |
583 | | |
584 | | cj5_error_code |
585 | | cj5_get_int(const cj5_result *r, unsigned int tok_index, |
586 | 0 | int64_t *out) { |
587 | 0 | const cj5_token *token = &r->tokens[tok_index]; |
588 | 0 | if(token->type != CJ5_TOKEN_NUMBER) |
589 | 0 | return CJ5_ERROR_INVALID; |
590 | 0 | size_t parsed = parseInt64(&r->json5[token->start], token->size, out); |
591 | 0 | return (parsed != 0) ? CJ5_ERROR_NONE : CJ5_ERROR_INVALID; |
592 | 0 | } |
593 | | |
594 | | cj5_error_code |
595 | | cj5_get_uint(const cj5_result *r, unsigned int tok_index, |
596 | 0 | uint64_t *out) { |
597 | 0 | const cj5_token *token = &r->tokens[tok_index]; |
598 | 0 | if(token->type != CJ5_TOKEN_NUMBER) |
599 | 0 | return CJ5_ERROR_INVALID; |
600 | 0 | size_t parsed = parseUInt64(&r->json5[token->start], token->size, out); |
601 | 0 | return (parsed != 0) ? CJ5_ERROR_NONE : CJ5_ERROR_INVALID; |
602 | 0 | } |
603 | | |
604 | | static const uint32_t SURROGATE_OFFSET = 0x10000u - (0xD800u << 10) - 0xDC00; |
605 | | |
606 | | static cj5_error_code |
607 | 0 | parse_codepoint(const char *pos, uint32_t *out_utf) { |
608 | 0 | uint32_t utf = 0; |
609 | 0 | for(unsigned int i = 0; i < 4; i++) { |
610 | 0 | char byte = pos[i]; |
611 | 0 | if(cj5__isnum(byte)) { |
612 | 0 | byte = (char)(byte - '0'); |
613 | 0 | } else if(cj5__isrange(byte, 'a', 'f')) { |
614 | 0 | byte = (char)(byte - ('a' - 10)); |
615 | 0 | } else if(cj5__isrange(byte, 'A', 'F')) { |
616 | 0 | byte = (char)(byte - ('A' - 10)); |
617 | 0 | } else { |
618 | 0 | return CJ5_ERROR_INVALID; |
619 | 0 | } |
620 | 0 | utf = (utf << 4) | ((uint8_t)byte & 0xF); |
621 | 0 | } |
622 | 0 | *out_utf = utf; |
623 | 0 | return CJ5_ERROR_NONE; |
624 | 0 | } |
625 | | |
626 | | cj5_error_code |
627 | | cj5_get_str(const cj5_result *r, unsigned int tok_index, |
628 | 0 | char *buf, unsigned int *buflen) { |
629 | 0 | const cj5_token *token = &r->tokens[tok_index]; |
630 | 0 | if(token->type != CJ5_TOKEN_STRING) |
631 | 0 | return CJ5_ERROR_INVALID; |
632 | | |
633 | 0 | const char *pos = &r->json5[token->start]; |
634 | 0 | const char *end = &r->json5[token->end + 1]; |
635 | 0 | unsigned int outpos = 0; |
636 | 0 | for(; pos < end; pos++) { |
637 | 0 | uint8_t c = (uint8_t)*pos; |
638 | | // Unprintable ascii characters must be escaped |
639 | 0 | if(c < ' ' || c == 127) |
640 | 0 | return CJ5_ERROR_INVALID; |
641 | | |
642 | | // Unescaped Ascii character or utf8 byte |
643 | 0 | if(c != '\\') { |
644 | 0 | buf[outpos++] = (char)c; |
645 | 0 | continue; |
646 | 0 | } |
647 | | |
648 | | // End of input before the escaped character |
649 | 0 | if(pos + 1 >= end) |
650 | 0 | return CJ5_ERROR_INCOMPLETE; |
651 | | |
652 | | // Process escaped character |
653 | 0 | pos++; |
654 | 0 | c = (uint8_t)*pos; |
655 | 0 | switch(c) { |
656 | 0 | case 'b': buf[outpos++] = '\b'; break; |
657 | 0 | case 'f': buf[outpos++] = '\f'; break; |
658 | 0 | case 'r': buf[outpos++] = '\r'; break; |
659 | 0 | case 'n': buf[outpos++] = '\n'; break; |
660 | 0 | case 't': buf[outpos++] = '\t'; break; |
661 | 0 | default: buf[outpos++] = c; break; |
662 | 0 | case 'u': { |
663 | | // Parse a unicode code point |
664 | 0 | if(pos + 4 >= end) |
665 | 0 | return CJ5_ERROR_INCOMPLETE; |
666 | 0 | pos++; |
667 | 0 | uint32_t utf; |
668 | 0 | cj5_error_code err = parse_codepoint(pos, &utf); |
669 | 0 | if(err != CJ5_ERROR_NONE) |
670 | 0 | return err; |
671 | 0 | pos += 3; |
672 | | |
673 | | // Parse a surrogate pair |
674 | 0 | if(0xd800 <= utf && utf <= 0xdfff) { |
675 | 0 | if(pos + 6 >= end) |
676 | 0 | return CJ5_ERROR_INVALID; |
677 | 0 | if(pos[1] != '\\' && pos[2] != 'u') |
678 | 0 | return CJ5_ERROR_INVALID; |
679 | 0 | pos += 3; |
680 | 0 | uint32_t utf2; |
681 | 0 | err = parse_codepoint(pos, &utf2); |
682 | 0 | if(err != CJ5_ERROR_NONE) |
683 | 0 | return err; |
684 | 0 | pos += 3; |
685 | | // High or low surrogate pair |
686 | 0 | utf = (utf <= 0xdbff) ? |
687 | 0 | (utf << 10) + utf2 + SURROGATE_OFFSET : |
688 | 0 | (utf2 << 10) + utf + SURROGATE_OFFSET; |
689 | 0 | } |
690 | | |
691 | | // Write the utf8 bytes of the code point |
692 | 0 | unsigned len = utf8_from_codepoint((unsigned char*)buf + outpos, utf); |
693 | 0 | if(len == 0) |
694 | 0 | return CJ5_ERROR_INVALID; // Not a utf8 string |
695 | 0 | outpos += len; |
696 | 0 | break; |
697 | 0 | } |
698 | 0 | } |
699 | 0 | } |
700 | | |
701 | | // Terminate with \0 |
702 | 0 | buf[outpos] = 0; |
703 | | |
704 | | // Set the output length |
705 | 0 | if(buflen) |
706 | 0 | *buflen = outpos; |
707 | 0 | return CJ5_ERROR_NONE; |
708 | 0 | } |
709 | | |
710 | | void |
711 | 0 | cj5_skip(const cj5_result *r, unsigned int *tok_index) { |
712 | 0 | unsigned int idx = *tok_index; |
713 | 0 | unsigned int end = r->tokens[idx].end; |
714 | 0 | do { idx++; } while(idx < r->num_tokens && |
715 | 0 | r->tokens[idx].start < end); |
716 | 0 | *tok_index = idx; |
717 | 0 | } |
718 | | |
719 | | cj5_error_code |
720 | | cj5_find(const cj5_result *r, unsigned int *tok_index, |
721 | 0 | const char *key) { |
722 | | // It has to be an object |
723 | 0 | unsigned int idx = *tok_index; |
724 | 0 | if(r->tokens[idx].type != CJ5_TOKEN_OBJECT) |
725 | 0 | return CJ5_ERROR_INVALID; |
726 | 0 | unsigned int size = r->tokens[idx].size; |
727 | | |
728 | | // Skip to the first key |
729 | 0 | idx++; |
730 | | |
731 | | // Size is number of keys + number of values |
732 | 0 | for(unsigned int i = 0; i < size; i += 2) { |
733 | | // Key has to be a string |
734 | 0 | if(r->tokens[idx].type != CJ5_TOKEN_STRING) |
735 | 0 | return CJ5_ERROR_INVALID; |
736 | | |
737 | | // Return the index to the value if the key matches |
738 | 0 | const char *keystart = &r->json5[r->tokens[idx].start]; |
739 | 0 | size_t keysize = r->tokens[idx].end - r->tokens[idx].start + 1; |
740 | 0 | if(strncmp(key, keystart, keysize) == 0) { |
741 | 0 | *tok_index = idx + 1; |
742 | 0 | return CJ5_ERROR_NONE; |
743 | 0 | } |
744 | | |
745 | | // Skip over the value |
746 | 0 | idx++; |
747 | 0 | cj5_skip(r, &idx); |
748 | 0 | } |
749 | 0 | return CJ5_ERROR_NOTFOUND; |
750 | 0 | } |