/src/libwebsockets/lib/misc/lejp.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * libwebsockets - small server side websockets and web server implementation |
3 | | * |
4 | | * Copyright (C) 2010 - 2020 Andy Green <andy@warmcat.com> |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to |
8 | | * deal in the Software without restriction, including without limitation the |
9 | | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
10 | | * sell copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
21 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
22 | | * IN THE SOFTWARE. |
23 | | */ |
24 | | |
25 | | #include "private-lib-core.h" |
26 | | #include <string.h> |
27 | | #include <stdio.h> |
28 | | |
29 | | static const char * const parser_errs[] = { |
30 | | "", |
31 | | "", |
32 | | "No opening '{'", |
33 | | "Expected closing '}'", |
34 | | "Expected '\"'", |
35 | | "String underrun", |
36 | | "Illegal unescaped control char", |
37 | | "Illegal escape format", |
38 | | "Illegal hex number", |
39 | | "Expected ':'", |
40 | | "Illegal value start", |
41 | | "Digit required after decimal point", |
42 | | "Bad number format", |
43 | | "Bad exponent format", |
44 | | "Unknown token", |
45 | | "Too many ']'", |
46 | | "Mismatched ']'", |
47 | | "Expected ']'", |
48 | | "JSON nesting limit exceeded", |
49 | | "Nesting tracking used up", |
50 | | "Number too long", |
51 | | "Comma or block end expected", |
52 | | "Unknown", |
53 | | "Parser callback errored (see earlier error)", |
54 | | }; |
55 | | |
56 | | /** |
57 | | * lejp_construct - prepare a struct lejp_ctx for use |
58 | | * |
59 | | * \param ctx: pointer to your struct lejp_ctx |
60 | | * \param callback: your user callback which will received parsed tokens |
61 | | * \param user: optional user data pointer untouched by lejp |
62 | | * \param paths: your array of name elements you are interested in |
63 | | * \param count_paths: LWS_ARRAY_SIZE() of @paths |
64 | | * |
65 | | * Prepares your context struct for use with lejp |
66 | | */ |
67 | | |
68 | | void |
69 | | lejp_construct(struct lejp_ctx *ctx, |
70 | | signed char (*callback)(struct lejp_ctx *ctx, char reason), void *user, |
71 | | const char * const *paths, unsigned char count_paths) |
72 | 0 | { |
73 | 0 | ctx->st[0].s = 0; |
74 | 0 | ctx->st[0].p = 0; |
75 | 0 | ctx->st[0].i = 0; |
76 | 0 | ctx->st[0].b = 0; |
77 | 0 | ctx->sp = 0; |
78 | 0 | ctx->ipos = 0; |
79 | 0 | ctx->outer_array = 0; |
80 | 0 | ctx->path_match = 0; |
81 | 0 | ctx->path_stride = 0; |
82 | 0 | ctx->path[0] = '\0'; |
83 | 0 | ctx->user = user; |
84 | 0 | ctx->line = 1; |
85 | 0 | ctx->flags = 0; /* user may set after construction */ |
86 | |
|
87 | 0 | ctx->pst_sp = 0; |
88 | 0 | ctx->pst[0].callback = callback; |
89 | 0 | ctx->pst[0].paths = paths; |
90 | 0 | ctx->pst[0].count_paths = count_paths; |
91 | 0 | ctx->pst[0].user = NULL; |
92 | 0 | ctx->pst[0].ppos = 0; |
93 | |
|
94 | 0 | ctx->pst[0].callback(ctx, LEJPCB_CONSTRUCTED); |
95 | 0 | } |
96 | | |
97 | | /** |
98 | | * lejp_destruct - retire a previously constructed struct lejp_ctx |
99 | | * |
100 | | * \param ctx: pointer to your struct lejp_ctx |
101 | | * |
102 | | * lejp does not perform any allocations, but since your user code might, this |
103 | | * provides a one-time LEJPCB_DESTRUCTED callback at destruction time where |
104 | | * you can clean up in your callback. |
105 | | */ |
106 | | |
107 | | void |
108 | | lejp_destruct(struct lejp_ctx *ctx) |
109 | 0 | { |
110 | | /* no allocations... just let callback know what it happening */ |
111 | 0 | if (ctx && ctx->pst[0].callback) |
112 | 0 | ctx->pst[0].callback(ctx, LEJPCB_DESTRUCTED); |
113 | 0 | } |
114 | | |
115 | | /** |
116 | | * lejp_change_callback - switch to a different callback from now on |
117 | | * |
118 | | * \param ctx: pointer to your struct lejp_ctx |
119 | | * \param callback: your user callback which will received parsed tokens |
120 | | * |
121 | | * This tells the old callback it was destroyed, in case you want to take any |
122 | | * action because that callback "lost focus", then changes to the new |
123 | | * callback and tells it first that it was constructed, and then started. |
124 | | * |
125 | | * Changing callback is a cheap and powerful trick to split out handlers |
126 | | * according to information earlier in the parse. For example you may have |
127 | | * a JSON pair "schema" whose value defines what can be expected for the rest |
128 | | * of the JSON. Rather than having one huge callback for all cases, you can |
129 | | * have an initial one looking for "schema" which then calls |
130 | | * lejp_change_callback() to a handler specific for the schema. |
131 | | * |
132 | | * Notice that afterwards, you need to construct the context again anyway to |
133 | | * parse another JSON object, and the callback is reset then to the main, |
134 | | * schema-interpreting one. The construction action is very lightweight. |
135 | | */ |
136 | | |
137 | | void |
138 | | lejp_change_callback(struct lejp_ctx *ctx, |
139 | | signed char (*callback)(struct lejp_ctx *ctx, char reason)) |
140 | 0 | { |
141 | 0 | ctx->pst[0].callback(ctx, LEJPCB_DESTRUCTED); |
142 | 0 | ctx->pst[0].callback = callback; |
143 | 0 | ctx->pst[0].callback(ctx, LEJPCB_CONSTRUCTED); |
144 | 0 | ctx->pst[0].callback(ctx, LEJPCB_START); |
145 | 0 | } |
146 | | |
147 | | void |
148 | | lejp_check_path_match(struct lejp_ctx *ctx) |
149 | 0 | { |
150 | 0 | const char *p, *q; |
151 | 0 | int n; |
152 | 0 | size_t s = sizeof(char *); |
153 | |
|
154 | 0 | if (ctx->path_stride) |
155 | 0 | s = ctx->path_stride; |
156 | | |
157 | | /* we only need to check if a match is not active */ |
158 | 0 | for (n = 0; //!ctx->path_match && |
159 | 0 | n < ctx->pst[ctx->pst_sp].count_paths; n++) { |
160 | 0 | ctx->wildcount = 0; |
161 | 0 | p = ctx->path; |
162 | |
|
163 | 0 | q = *((char **)(((char *)ctx->pst[ctx->pst_sp].paths) + ((unsigned int)n * s))); |
164 | | //lwsl_notice("%s: %s %s\n", __func__, p, q); |
165 | 0 | while (*p && *q) { |
166 | 0 | if (*q != '*') { |
167 | 0 | if (*p != *q) |
168 | 0 | break; |
169 | 0 | p++; |
170 | 0 | q++; |
171 | 0 | continue; |
172 | 0 | } |
173 | 0 | ctx->wild[ctx->wildcount++] = (uint16_t)lws_ptr_diff_size_t(p, ctx->path); |
174 | 0 | q++; |
175 | | /* |
176 | | * if * has something after it, match to . |
177 | | * if ends with *, eat everything. |
178 | | * This implies match sequences must be ordered like |
179 | | * x.*.* |
180 | | * x.* |
181 | | * if both options are possible |
182 | | */ |
183 | 0 | while (*p && ((*p != '.' && *p != '[') || !*q)) |
184 | 0 | p++; |
185 | 0 | } |
186 | 0 | if (*p || *q) |
187 | 0 | continue; |
188 | | |
189 | 0 | ctx->path_match = (uint8_t)(n + 1); |
190 | 0 | ctx->path_match_len = ctx->pst[ctx->pst_sp].ppos; |
191 | 0 | return; |
192 | 0 | } |
193 | | |
194 | 0 | if (!ctx->path_match) |
195 | 0 | ctx->wildcount = 0; |
196 | 0 | } |
197 | | |
198 | | int |
199 | | lejp_get_wildcard(struct lejp_ctx *ctx, int wildcard, char *dest, int len) |
200 | 0 | { |
201 | 0 | int n; |
202 | |
|
203 | 0 | if (wildcard >= ctx->wildcount || !len) |
204 | 0 | return 0; |
205 | | |
206 | 0 | n = ctx->wild[wildcard]; |
207 | |
|
208 | 0 | while (--len && n < ctx->pst[ctx->pst_sp].ppos && |
209 | 0 | (n == ctx->wild[wildcard] || ctx->path[n] != '.')) |
210 | 0 | *dest++ = ctx->path[n++]; |
211 | |
|
212 | 0 | *dest = '\0'; |
213 | 0 | n++; |
214 | |
|
215 | 0 | return n - ctx->wild[wildcard]; |
216 | 0 | } |
217 | | |
218 | | /** |
219 | | * lejp_parse - interpret some more incoming data incrementally |
220 | | * |
221 | | * \param ctx: previously constructed parsing context |
222 | | * \param json: char buffer with the new data to interpret |
223 | | * \param len: amount of data in the buffer |
224 | | * |
225 | | * Because lejp is a stream parser, it incrementally parses as new data |
226 | | * becomes available, maintaining all state in the context struct. So an |
227 | | * incomplete JSON is a normal situation, getting you a LEJP_CONTINUE |
228 | | * return, signalling there's no error but to call again with more data when |
229 | | * it comes to complete the parsing. Successful parsing completes with a |
230 | | * 0 or positive integer indicating how much of the last input buffer was |
231 | | * unused. |
232 | | */ |
233 | | |
234 | | static const char esc_char[] = "\"\\/bfnrt"; |
235 | | static const char esc_tran[] = "\"\\/\b\f\n\r\t"; |
236 | | static const char tokens[] = "rue alse ull "; |
237 | | |
238 | | int |
239 | | lejp_parse(struct lejp_ctx *ctx, const unsigned char *json, int len) |
240 | 0 | { |
241 | 0 | unsigned char c, n, s, defer = 0; |
242 | 0 | int ret = LEJP_REJECT_UNKNOWN; |
243 | |
|
244 | 0 | if (!ctx->sp && !ctx->pst[ctx->pst_sp].ppos) |
245 | 0 | ctx->pst[ctx->pst_sp].callback(ctx, LEJPCB_START); |
246 | |
|
247 | 0 | while (len--) { |
248 | 0 | c = *json++; |
249 | 0 | s = (unsigned char)ctx->st[ctx->sp].s; |
250 | | |
251 | | /* skip whitespace unless we should care */ |
252 | 0 | if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '#') { |
253 | 0 | if (c == '\n') { |
254 | 0 | ctx->line++; |
255 | 0 | ctx->st[ctx->sp].s &= (char)~LEJP_FLAG_WS_COMMENTLINE; |
256 | 0 | } |
257 | 0 | if (!(s & LEJP_FLAG_WS_KEEP)) { |
258 | 0 | if (c == '#') |
259 | 0 | ctx->st[ctx->sp].s |= |
260 | 0 | LEJP_FLAG_WS_COMMENTLINE; |
261 | 0 | continue; |
262 | 0 | } |
263 | 0 | } |
264 | | |
265 | 0 | if (ctx->st[ctx->sp].s & LEJP_FLAG_WS_COMMENTLINE) |
266 | 0 | continue; |
267 | | |
268 | 0 | switch (s) { |
269 | 0 | case LEJP_IDLE: |
270 | 0 | if (!ctx->sp && c == '[') { |
271 | | /* push */ |
272 | 0 | ctx->outer_array = 1; |
273 | 0 | ctx->st[ctx->sp].s = LEJP_MP_ARRAY_END; |
274 | 0 | c = LEJP_MP_VALUE; |
275 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos++] = '['; |
276 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos++] = ']'; |
277 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos] = '\0'; |
278 | |
|
279 | 0 | if (ctx->flags & LEJP_FLAG_FEAT_LEADING_WC) |
280 | 0 | lejp_check_path_match(ctx); |
281 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, LEJPCB_ARRAY_START)) |
282 | 0 | goto reject_callback; |
283 | 0 | ctx->i[ctx->ipos++] = 0; |
284 | 0 | if (ctx->flags & LEJP_FLAG_FEAT_LEADING_WC) |
285 | 0 | lejp_check_path_match(ctx); |
286 | 0 | if (ctx->ipos > LWS_ARRAY_SIZE(ctx->i)) { |
287 | 0 | ret = LEJP_REJECT_MP_DELIM_ISTACK; |
288 | 0 | goto reject; |
289 | 0 | } |
290 | 0 | goto add_stack_level; |
291 | 0 | } |
292 | 0 | if (c != '{') { |
293 | 0 | ret = LEJP_REJECT_IDLE_NO_BRACE; |
294 | 0 | goto reject; |
295 | 0 | } |
296 | | |
297 | 0 | if (ctx->flags & LEJP_FLAG_FEAT_OBJECT_INDEXES) { |
298 | | /* since insides of {} can have ',', we should |
299 | | * add an index level so we can count them |
300 | | */ |
301 | 0 | ctx->i[ctx->ipos++] = 0; |
302 | 0 | if (ctx->ipos > LWS_ARRAY_SIZE(ctx->i)) { |
303 | 0 | ret = LEJP_REJECT_MP_DELIM_ISTACK; |
304 | 0 | goto reject; |
305 | 0 | } |
306 | 0 | } |
307 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, |
308 | 0 | LEJPCB_OBJECT_START)) |
309 | 0 | goto reject_callback; |
310 | 0 | ctx->st[ctx->sp].s = LEJP_MEMBERS; |
311 | 0 | break; |
312 | 0 | case LEJP_MEMBERS: |
313 | 0 | if (c == '}') { |
314 | 0 | if (ctx->sp >= 1) |
315 | 0 | goto pop_level; |
316 | | |
317 | 0 | ctx->st[ctx->sp].s = LEJP_IDLE; |
318 | 0 | ret = LEJP_REJECT_MEMBERS_NO_CLOSE; |
319 | 0 | goto reject; |
320 | 0 | } |
321 | 0 | ctx->st[ctx->sp].s = LEJP_M_P; |
322 | 0 | goto redo_character; |
323 | 0 | case LEJP_M_P: |
324 | 0 | if (c != '\"') { |
325 | 0 | ret = LEJP_REJECT_MP_NO_OPEN_QUOTE; |
326 | 0 | goto reject; |
327 | 0 | } |
328 | | /* push */ |
329 | 0 | ctx->st[ctx->sp].s = LEJP_MP_DELIM; |
330 | 0 | c = LEJP_MP_STRING; |
331 | 0 | goto add_stack_level; |
332 | | |
333 | 0 | case LEJP_MP_STRING: |
334 | 0 | if (c == '\"') { |
335 | 0 | if (!ctx->sp) { /* JSON can't end on quote */ |
336 | 0 | ret = LEJP_REJECT_MP_STRING_UNDERRUN; |
337 | 0 | goto reject; |
338 | 0 | } |
339 | 0 | if (ctx->st[ctx->sp - 1].s != LEJP_MP_DELIM) { |
340 | 0 | ctx->buf[ctx->npos] = '\0'; |
341 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, |
342 | 0 | LEJPCB_VAL_STR_END) < 0) |
343 | 0 | goto reject_callback; |
344 | 0 | } |
345 | | /* pop */ |
346 | 0 | ctx->sp--; |
347 | 0 | break; |
348 | 0 | } |
349 | 0 | if (c == '\\') { |
350 | 0 | ctx->st[ctx->sp].s = LEJP_MP_STRING_ESC; |
351 | 0 | break; |
352 | 0 | } |
353 | 0 | if (c < ' ') {/* "control characters" not allowed */ |
354 | 0 | ret = LEJP_REJECT_MP_ILLEGAL_CTRL; |
355 | 0 | goto reject; |
356 | 0 | } |
357 | 0 | goto emit_string_char; |
358 | | |
359 | 0 | case LEJP_MP_STRING_ESC: |
360 | 0 | if (c == 'u') { |
361 | 0 | ctx->st[ctx->sp].s = LEJP_MP_STRING_ESC_U1; |
362 | 0 | ctx->uni = 0; |
363 | 0 | break; |
364 | 0 | } |
365 | 0 | for (n = 0; n < sizeof(esc_char); n++) { |
366 | 0 | if (c != esc_char[n]) |
367 | 0 | continue; |
368 | | /* found it */ |
369 | 0 | c = (unsigned char)esc_tran[n]; |
370 | 0 | ctx->st[ctx->sp].s = LEJP_MP_STRING; |
371 | 0 | goto emit_string_char; |
372 | 0 | } |
373 | 0 | ret = LEJP_REJECT_MP_STRING_ESC_ILLEGAL_ESC; |
374 | | /* illegal escape char */ |
375 | 0 | goto reject; |
376 | | |
377 | 0 | case LEJP_MP_STRING_ESC_U1: |
378 | 0 | case LEJP_MP_STRING_ESC_U2: |
379 | 0 | case LEJP_MP_STRING_ESC_U3: |
380 | 0 | case LEJP_MP_STRING_ESC_U4: |
381 | 0 | ctx->uni = (uint16_t)(ctx->uni << 4); |
382 | 0 | if (c >= '0' && c <= '9') |
383 | 0 | ctx->uni |= (uint16_t)(c - '0'); |
384 | 0 | else |
385 | 0 | if (c >= 'a' && c <= 'f') |
386 | 0 | ctx->uni |= (uint16_t)(c - 'a' + 10); |
387 | 0 | else |
388 | 0 | if (c >= 'A' && c <= 'F') |
389 | 0 | ctx->uni |= (uint16_t)(c - 'A' + 10); |
390 | 0 | else { |
391 | 0 | ret = LEJP_REJECT_ILLEGAL_HEX; |
392 | 0 | goto reject; |
393 | 0 | } |
394 | 0 | ctx->st[ctx->sp].s++; |
395 | 0 | switch (s) { |
396 | 0 | case LEJP_MP_STRING_ESC_U2: |
397 | 0 | if (ctx->uni < 0x08) |
398 | 0 | break; |
399 | | /* |
400 | | * 0x08-0xff (0x0800 - 0xffff) |
401 | | * emit 3-byte UTF-8 |
402 | | */ |
403 | 0 | c = (unsigned char)(0xe0 | ((ctx->uni >> 4) & 0xf)); |
404 | 0 | goto emit_string_char; |
405 | | |
406 | 0 | case LEJP_MP_STRING_ESC_U3: |
407 | 0 | if (ctx->uni >= 0x080) { |
408 | | /* |
409 | | * 0x080 - 0xfff (0x0800 - 0xffff) |
410 | | * middle 3-byte seq |
411 | | * send ....XXXXXX.. |
412 | | */ |
413 | 0 | c = (unsigned char)(0x80 | ((ctx->uni >> 2) & 0x3f)); |
414 | 0 | goto emit_string_char; |
415 | 0 | } |
416 | 0 | if (ctx->uni < 0x008) |
417 | 0 | break; |
418 | | /* |
419 | | * 0x008 - 0x7f (0x0080 - 0x07ff) |
420 | | * start 2-byte seq |
421 | | */ |
422 | 0 | c = (unsigned char)(0xc0 | (ctx->uni >> 2)); |
423 | 0 | goto emit_string_char; |
424 | | |
425 | 0 | case LEJP_MP_STRING_ESC_U4: |
426 | 0 | if (ctx->uni >= 0x0080) |
427 | | /* end of 2 or 3-byte seq */ |
428 | 0 | c = (unsigned char)(0x80 | (ctx->uni & 0x3f)); |
429 | 0 | else |
430 | | /* literal */ |
431 | 0 | c = (unsigned char)ctx->uni; |
432 | |
|
433 | 0 | ctx->st[ctx->sp].s = LEJP_MP_STRING; |
434 | 0 | goto emit_string_char; |
435 | 0 | default: |
436 | 0 | break; |
437 | 0 | } |
438 | 0 | break; |
439 | | |
440 | 0 | case LEJP_MP_DELIM: |
441 | 0 | if (c != ':') { |
442 | 0 | ret = LEJP_REJECT_MP_DELIM_MISSING_COLON; |
443 | 0 | goto reject; |
444 | 0 | } |
445 | 0 | ctx->st[ctx->sp].s = LEJP_MP_VALUE; |
446 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos] = '\0'; |
447 | |
|
448 | 0 | lejp_check_path_match(ctx); |
449 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, LEJPCB_PAIR_NAME)) |
450 | 0 | goto reject_callback; |
451 | 0 | break; |
452 | | |
453 | 0 | case LEJP_MP_VALUE: |
454 | 0 | if (c == '-' || (c >= '0' && c <= '9')) { |
455 | 0 | ctx->npos = 0; |
456 | 0 | ctx->dcount = 0; |
457 | 0 | ctx->f = 0; |
458 | 0 | ctx->st[ctx->sp].s = LEJP_MP_VALUE_NUM_INT; |
459 | 0 | goto redo_character; |
460 | 0 | } |
461 | 0 | switch (c) { |
462 | 0 | case'\"': |
463 | | /* push */ |
464 | 0 | ctx->st[ctx->sp].s = LEJP_MP_COMMA_OR_END; |
465 | 0 | c = LEJP_MP_STRING; |
466 | 0 | ctx->npos = 0; |
467 | 0 | ctx->buf[0] = '\0'; |
468 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, |
469 | 0 | LEJPCB_VAL_STR_START)) |
470 | 0 | goto reject_callback; |
471 | 0 | goto add_stack_level; |
472 | | |
473 | 0 | case '{': |
474 | | /* push */ |
475 | 0 | ctx->st[ctx->sp].s = LEJP_MP_COMMA_OR_END; |
476 | 0 | c = LEJP_MEMBERS; |
477 | 0 | lejp_check_path_match(ctx); |
478 | 0 | if (ctx->flags & LEJP_FLAG_FEAT_OBJECT_INDEXES) { |
479 | | /* since insides of {} can have ',', we should |
480 | | * add an index level so we can count them |
481 | | */ |
482 | 0 | ctx->i[ctx->ipos++] = 0; |
483 | 0 | if (ctx->ipos > LWS_ARRAY_SIZE(ctx->i)) { |
484 | 0 | ret = LEJP_REJECT_MP_DELIM_ISTACK; |
485 | 0 | goto reject; |
486 | 0 | } |
487 | 0 | } |
488 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, |
489 | 0 | LEJPCB_OBJECT_START)) |
490 | 0 | goto reject_callback; |
491 | 0 | ctx->path_match = 0; |
492 | 0 | goto add_stack_level; |
493 | | |
494 | 0 | case '[': |
495 | | /* push */ |
496 | 0 | ctx->st[ctx->sp].s = LEJP_MP_ARRAY_END; |
497 | 0 | c = LEJP_MP_VALUE; |
498 | 0 | if (ctx->pst[ctx->pst_sp].ppos + 3u >= |
499 | 0 | sizeof(ctx->path)) |
500 | 0 | goto reject; |
501 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos++] = '['; |
502 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos++] = ']'; |
503 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos] = '\0'; |
504 | 0 | if (ctx->flags & LEJP_FLAG_FEAT_LEADING_WC) |
505 | 0 | lejp_check_path_match(ctx); |
506 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, LEJPCB_ARRAY_START)) |
507 | 0 | goto reject_callback; |
508 | 0 | if (ctx->flags & LEJP_FLAG_FEAT_LEADING_WC) |
509 | 0 | lejp_check_path_match(ctx); |
510 | 0 | ctx->i[ctx->ipos++] = 0; |
511 | 0 | if (ctx->ipos > LWS_ARRAY_SIZE(ctx->i)) { |
512 | 0 | ret = LEJP_REJECT_MP_DELIM_ISTACK; |
513 | 0 | goto reject; |
514 | 0 | } |
515 | 0 | goto add_stack_level; |
516 | | |
517 | 0 | case ']': |
518 | | /* pop */ |
519 | 0 | if (!ctx->sp) { /* JSON can't end on ] */ |
520 | 0 | ret = LEJP_REJECT_MP_C_OR_E_UNDERF; |
521 | 0 | goto reject; |
522 | 0 | } |
523 | 0 | ctx->sp--; |
524 | 0 | if (ctx->st[ctx->sp].s != LEJP_MP_ARRAY_END) { |
525 | 0 | ret = LEJP_REJECT_MP_C_OR_E_NOTARRAY; |
526 | 0 | goto reject; |
527 | 0 | } |
528 | | /* drop the path [n] bit */ |
529 | 0 | if (ctx->sp) { |
530 | 0 | ctx->pst[ctx->pst_sp].ppos = (unsigned char) |
531 | 0 | ctx->st[ctx->sp - 1].p; |
532 | 0 | ctx->ipos = (unsigned char)ctx->st[ctx->sp - 1].i; |
533 | 0 | } else |
534 | 0 | if (ctx->flags & LEJP_FLAG_FEAT_OBJECT_INDEXES) |
535 | 0 | ctx->ipos--; |
536 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos] = '\0'; |
537 | 0 | if (ctx->path_match && |
538 | 0 | ctx->pst[ctx->pst_sp].ppos <= ctx->path_match_len) |
539 | | /* |
540 | | * we shrank the path to be |
541 | | * smaller than the matching point |
542 | | */ |
543 | 0 | ctx->path_match = 0; |
544 | 0 | if (ctx->pst_sp && !ctx->sp) |
545 | 0 | lejp_parser_pop(ctx); |
546 | 0 | if (ctx->flags & LEJP_FLAG_FEAT_LEADING_WC) |
547 | 0 | lejp_check_path_match(ctx); |
548 | 0 | if (ctx->outer_array && !ctx->sp) { /* ended on ] */ |
549 | 0 | n = LEJPCB_ARRAY_END; |
550 | 0 | goto completed; |
551 | 0 | } |
552 | 0 | goto array_end; |
553 | | |
554 | 0 | case 't': /* true */ |
555 | 0 | ctx->uni = 0; |
556 | 0 | ctx->st[ctx->sp].s = LEJP_MP_VALUE_TOK; |
557 | 0 | break; |
558 | | |
559 | 0 | case 'f': |
560 | 0 | ctx->uni = 4; |
561 | 0 | ctx->st[ctx->sp].s = LEJP_MP_VALUE_TOK; |
562 | 0 | break; |
563 | | |
564 | 0 | case 'n': |
565 | 0 | ctx->uni = 4 + 5; |
566 | 0 | ctx->st[ctx->sp].s = LEJP_MP_VALUE_TOK; |
567 | 0 | break; |
568 | 0 | default: |
569 | 0 | ret = LEJP_REJECT_MP_DELIM_BAD_VALUE_START; |
570 | 0 | goto reject; |
571 | 0 | } |
572 | 0 | break; |
573 | | |
574 | 0 | case LEJP_MP_VALUE_NUM_INT: |
575 | 0 | if (!ctx->npos && c == '-') { |
576 | 0 | ctx->f |= LEJP_SEEN_MINUS; |
577 | 0 | goto append_npos; |
578 | 0 | } |
579 | | |
580 | 0 | if (ctx->dcount < 20 && c >= '0' && c <= '9') { |
581 | 0 | if (ctx->f & LEJP_SEEN_POINT) |
582 | 0 | ctx->f |= LEJP_SEEN_POST_POINT; |
583 | 0 | ctx->dcount++; |
584 | 0 | goto append_npos; |
585 | 0 | } |
586 | 0 | if (c == '.') { |
587 | 0 | if (!ctx->dcount || (ctx->f & LEJP_SEEN_POINT)) { |
588 | 0 | ret = LEJP_REJECT_MP_VAL_NUM_FORMAT; |
589 | 0 | goto reject; |
590 | 0 | } |
591 | 0 | ctx->f |= LEJP_SEEN_POINT; |
592 | 0 | goto append_npos; |
593 | 0 | } |
594 | | /* |
595 | | * before exponent, if we had . we must have had at |
596 | | * least one more digit |
597 | | */ |
598 | 0 | if ((ctx->f & |
599 | 0 | (LEJP_SEEN_POINT | LEJP_SEEN_POST_POINT)) == |
600 | 0 | LEJP_SEEN_POINT) { |
601 | 0 | ret = LEJP_REJECT_MP_VAL_NUM_INT_NO_FRAC; |
602 | 0 | goto reject; |
603 | 0 | } |
604 | 0 | if (c == 'e' || c == 'E') { |
605 | 0 | if (ctx->f & LEJP_SEEN_EXP) { |
606 | 0 | ret = LEJP_REJECT_MP_VAL_NUM_FORMAT; |
607 | 0 | goto reject; |
608 | 0 | } |
609 | 0 | ctx->f |= LEJP_SEEN_EXP; |
610 | 0 | ctx->st[ctx->sp].s = LEJP_MP_VALUE_NUM_EXP; |
611 | 0 | goto append_npos; |
612 | 0 | } |
613 | | /* if none of the above, did we even have a number? */ |
614 | 0 | if (!ctx->dcount) { |
615 | 0 | ret = LEJP_REJECT_MP_VAL_NUM_FORMAT; |
616 | 0 | goto reject; |
617 | 0 | } |
618 | | |
619 | 0 | ctx->buf[ctx->npos] = '\0'; |
620 | 0 | if (ctx->f & LEJP_SEEN_POINT) { |
621 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, |
622 | 0 | LEJPCB_VAL_NUM_FLOAT)) |
623 | 0 | goto reject_callback; |
624 | 0 | } else { |
625 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, |
626 | 0 | LEJPCB_VAL_NUM_INT)) |
627 | 0 | goto reject_callback; |
628 | 0 | } |
629 | | |
630 | | /* then this is the post-number character, loop */ |
631 | 0 | ctx->st[ctx->sp].s = LEJP_MP_COMMA_OR_END; |
632 | 0 | goto redo_character; |
633 | | |
634 | 0 | case LEJP_MP_VALUE_NUM_EXP: |
635 | 0 | ctx->st[ctx->sp].s = LEJP_MP_VALUE_NUM_INT; |
636 | 0 | if (c >= '0' && c <= '9') |
637 | 0 | goto redo_character; |
638 | 0 | if (c == '+' || c == '-') |
639 | 0 | goto append_npos; |
640 | 0 | ret = LEJP_REJECT_MP_VAL_NUM_EXP_BAD_EXP; |
641 | 0 | goto reject; |
642 | | |
643 | 0 | case LEJP_MP_VALUE_TOK: /* true, false, null */ |
644 | 0 | if (c != tokens[ctx->uni]) { |
645 | 0 | ret = LEJP_REJECT_MP_VAL_TOK_UNKNOWN; |
646 | 0 | goto reject; |
647 | 0 | } |
648 | 0 | ctx->uni++; |
649 | 0 | if (tokens[ctx->uni] != ' ') |
650 | 0 | break; |
651 | 0 | switch (ctx->uni) { |
652 | 0 | case 3: |
653 | 0 | ctx->buf[0] = '1'; |
654 | 0 | ctx->buf[1] = '\0'; |
655 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, |
656 | 0 | LEJPCB_VAL_TRUE)) |
657 | 0 | goto reject_callback; |
658 | 0 | break; |
659 | 0 | case 8: |
660 | 0 | ctx->buf[0] = '0'; |
661 | 0 | ctx->buf[1] = '\0'; |
662 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, |
663 | 0 | LEJPCB_VAL_FALSE)) |
664 | 0 | goto reject_callback; |
665 | 0 | break; |
666 | 0 | case 12: |
667 | 0 | ctx->buf[0] = '\0'; |
668 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, |
669 | 0 | LEJPCB_VAL_NULL)) |
670 | 0 | goto reject_callback; |
671 | 0 | break; |
672 | 0 | } |
673 | 0 | ctx->st[ctx->sp].s = LEJP_MP_COMMA_OR_END; |
674 | 0 | break; |
675 | | |
676 | 0 | case LEJP_MP_COMMA_OR_END: |
677 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos] = '\0'; |
678 | 0 | if (c == ',') { |
679 | | /* increment this stack level's index */ |
680 | 0 | ctx->st[ctx->sp].s = LEJP_M_P; |
681 | |
|
682 | 0 | if (ctx->flags & LEJP_FLAG_FEAT_OBJECT_INDEXES) |
683 | 0 | if (ctx->ipos) |
684 | 0 | ctx->i[ctx->ipos - 1]++; |
685 | |
|
686 | 0 | if (!ctx->sp) { |
687 | 0 | ctx->pst[ctx->pst_sp].ppos = 0; |
688 | | /* |
689 | | * since we came back to root level, |
690 | | * no path can still match |
691 | | */ |
692 | 0 | ctx->path_match = 0; |
693 | 0 | break; |
694 | 0 | } |
695 | 0 | ctx->pst[ctx->pst_sp].ppos = (unsigned char)ctx->st[ctx->sp - 1].p; |
696 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos] = '\0'; |
697 | 0 | if (ctx->path_match && |
698 | 0 | ctx->pst[ctx->pst_sp].ppos <= ctx->path_match_len) |
699 | | /* |
700 | | * we shrank the path to be |
701 | | * smaller than the matching point |
702 | | */ |
703 | 0 | ctx->path_match = 0; |
704 | |
|
705 | 0 | lejp_check_path_match(ctx); |
706 | |
|
707 | 0 | if (ctx->st[ctx->sp - 1].s != LEJP_MP_ARRAY_END) |
708 | 0 | break; |
709 | | /* top level is definitely an array... */ |
710 | 0 | if (!(ctx->flags & LEJP_FLAG_FEAT_OBJECT_INDEXES)) |
711 | 0 | if (ctx->ipos) |
712 | 0 | ctx->i[ctx->ipos - 1]++; |
713 | |
|
714 | 0 | ctx->st[ctx->sp].s = LEJP_MP_VALUE; |
715 | 0 | break; |
716 | 0 | } |
717 | 0 | if (c == ']') { |
718 | 0 | if (!ctx->sp) { |
719 | 0 | ret = LEJP_REJECT_MP_C_OR_E_UNDERF; |
720 | 0 | goto reject; |
721 | 0 | } |
722 | | /* pop */ |
723 | 0 | ctx->sp--; |
724 | 0 | if (ctx->st[ctx->sp].s != LEJP_MP_ARRAY_END) { |
725 | 0 | ret = LEJP_REJECT_MP_C_OR_E_NOTARRAY; |
726 | 0 | goto reject; |
727 | 0 | } |
728 | | |
729 | | /* drop the path [n] bit */ |
730 | 0 | if (ctx->sp) { |
731 | 0 | ctx->pst[ctx->pst_sp].ppos = (unsigned char) |
732 | 0 | ctx->st[ctx->sp - 1].p; |
733 | 0 | ctx->ipos = (unsigned char)ctx->st[ctx->sp - 1].i; |
734 | 0 | } else |
735 | 0 | if (ctx->flags & LEJP_FLAG_FEAT_OBJECT_INDEXES) |
736 | 0 | ctx->ipos--; |
737 | |
|
738 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos] = '\0'; |
739 | 0 | if (ctx->path_match && |
740 | 0 | ctx->pst[ctx->pst_sp].ppos <= ctx->path_match_len) |
741 | | /* |
742 | | * we shrank the path to be |
743 | | * smaller than the matching point |
744 | | */ |
745 | 0 | ctx->path_match = 0; |
746 | |
|
747 | 0 | if (ctx->outer_array && !ctx->sp) { /* ended on ] */ |
748 | 0 | n = LEJPCB_ARRAY_END; |
749 | 0 | goto completed; |
750 | 0 | } |
751 | | |
752 | 0 | if (ctx->pst_sp && !ctx->sp) |
753 | 0 | defer = 1; |
754 | | |
755 | | /* do LEJP_MP_ARRAY_END processing */ |
756 | 0 | goto redo_character; |
757 | 0 | } |
758 | 0 | if (c != '}') { |
759 | 0 | ret = LEJP_REJECT_MP_C_OR_E_NEITHER; |
760 | 0 | goto reject; |
761 | 0 | } |
762 | 0 | if (!ctx->sp) { |
763 | 0 | n = LEJPCB_OBJECT_END; |
764 | 0 | completed: |
765 | 0 | ctx->path_match = 0; |
766 | | //lejp_check_path_match(ctx); |
767 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, (char)n) || |
768 | 0 | ctx->pst[ctx->pst_sp].callback(ctx, |
769 | 0 | LEJPCB_COMPLETE)) |
770 | 0 | goto reject_callback; |
771 | | |
772 | | /* done, return unused amount */ |
773 | 0 | return len; |
774 | 0 | } |
775 | | |
776 | | /* pop */ |
777 | 0 | pop_level: |
778 | 0 | ctx->sp--; |
779 | 0 | if (ctx->sp) { |
780 | 0 | ctx->pst[ctx->pst_sp].ppos = (unsigned char)ctx->st[ctx->sp].p; |
781 | 0 | ctx->ipos = (unsigned char)ctx->st[ctx->sp].i; |
782 | 0 | } else |
783 | 0 | if (ctx->flags & LEJP_FLAG_FEAT_OBJECT_INDEXES) |
784 | 0 | ctx->ipos--; |
785 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos] = '\0'; |
786 | |
|
787 | 0 | if (ctx->path_match && |
788 | 0 | ctx->pst[ctx->pst_sp].ppos <= ctx->path_match_len) |
789 | | /* |
790 | | * we shrank the path to be |
791 | | * smaller than the matching point |
792 | | */ |
793 | 0 | ctx->path_match = 0; |
794 | |
|
795 | 0 | lejp_check_path_match(ctx); |
796 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, |
797 | 0 | LEJPCB_OBJECT_END)) |
798 | 0 | goto reject_callback; |
799 | 0 | if (ctx->pst_sp && !ctx->sp) |
800 | 0 | lejp_parser_pop(ctx); |
801 | 0 | break; |
802 | | |
803 | 0 | case LEJP_MP_ARRAY_END: |
804 | 0 | array_end: |
805 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos] = '\0'; |
806 | 0 | if (c == ',') { |
807 | | /* increment this stack level's index */ |
808 | 0 | if (ctx->ipos) |
809 | 0 | ctx->i[ctx->ipos - 1]++; |
810 | 0 | ctx->st[ctx->sp].s = LEJP_MP_VALUE; |
811 | 0 | if (ctx->sp) |
812 | 0 | ctx->pst[ctx->pst_sp].ppos = (unsigned char) |
813 | 0 | ctx->st[ctx->sp - 1].p; |
814 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos] = '\0'; |
815 | 0 | lejp_check_path_match(ctx); |
816 | 0 | break; |
817 | 0 | } |
818 | 0 | if (c != ']') { |
819 | 0 | ret = LEJP_REJECT_MP_ARRAY_END_MISSING; |
820 | 0 | goto reject; |
821 | 0 | } |
822 | 0 | lejp_check_path_match(ctx); |
823 | 0 | ctx->st[ctx->sp].s = LEJP_MP_COMMA_OR_END; |
824 | 0 | ctx->pst[ctx->pst_sp].callback(ctx, LEJPCB_ARRAY_END); |
825 | 0 | if (defer) { |
826 | 0 | lejp_parser_pop(ctx); |
827 | 0 | defer = 0; |
828 | 0 | } |
829 | 0 | break; |
830 | 0 | } |
831 | | |
832 | 0 | continue; |
833 | | |
834 | 0 | emit_string_char: |
835 | 0 | if (!ctx->sp || ctx->st[ctx->sp - 1].s != LEJP_MP_DELIM) { |
836 | | /* assemble the string value into chunks */ |
837 | 0 | ctx->buf[ctx->npos++] = (char)c; |
838 | 0 | if (ctx->npos == sizeof(ctx->buf) - 1) { |
839 | 0 | if (ctx->pst[ctx->pst_sp].callback(ctx, |
840 | 0 | LEJPCB_VAL_STR_CHUNK)) |
841 | 0 | goto reject_callback; |
842 | 0 | ctx->npos = 0; |
843 | 0 | } |
844 | 0 | continue; |
845 | 0 | } |
846 | | /* name part of name:value pair */ |
847 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos++] = (char)c; |
848 | 0 | continue; |
849 | | |
850 | 0 | add_stack_level: |
851 | | /* push on to the object stack */ |
852 | 0 | if (ctx->pst[ctx->pst_sp].ppos && |
853 | 0 | ctx->st[ctx->sp].s != LEJP_MP_COMMA_OR_END && |
854 | 0 | ctx->st[ctx->sp].s != LEJP_MP_ARRAY_END) |
855 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos++] = '.'; |
856 | |
|
857 | 0 | ctx->st[ctx->sp].p = (char)ctx->pst[ctx->pst_sp].ppos; |
858 | 0 | ctx->st[ctx->sp].i = (char)ctx->ipos; |
859 | 0 | if (++ctx->sp == LWS_ARRAY_SIZE(ctx->st)) { |
860 | 0 | ret = LEJP_REJECT_STACK_OVERFLOW; |
861 | 0 | goto reject; |
862 | 0 | } |
863 | 0 | ctx->path[ctx->pst[ctx->pst_sp].ppos] = '\0'; |
864 | 0 | ctx->st[ctx->sp].s = (char)c; |
865 | 0 | ctx->st[ctx->sp].b = 0; |
866 | 0 | continue; |
867 | | |
868 | 0 | append_npos: |
869 | 0 | if (ctx->npos >= sizeof(ctx->buf)) { |
870 | 0 | ret = LEJP_REJECT_NUM_TOO_LONG; |
871 | 0 | goto reject; |
872 | 0 | } |
873 | 0 | ctx->buf[ctx->npos++] = (char)c; |
874 | 0 | continue; |
875 | | |
876 | 0 | redo_character: |
877 | 0 | json--; |
878 | 0 | len++; |
879 | 0 | } |
880 | | |
881 | 0 | return LEJP_CONTINUE; |
882 | | |
883 | | |
884 | 0 | reject_callback: |
885 | 0 | ret = LEJP_REJECT_CALLBACK; |
886 | |
|
887 | 0 | reject: |
888 | 0 | ctx->pst[ctx->pst_sp].callback(ctx, LEJPCB_FAILED); |
889 | 0 | return ret; |
890 | 0 | } |
891 | | |
892 | | int |
893 | | lejp_parser_push(struct lejp_ctx *ctx, void *user, const char * const *paths, |
894 | | unsigned char paths_count, lejp_callback lejp_cb) |
895 | 0 | { |
896 | 0 | struct _lejp_parsing_stack *p; |
897 | |
|
898 | 0 | if (ctx->pst_sp + 1 == LEJP_MAX_PARSING_STACK_DEPTH) |
899 | 0 | return -1; |
900 | | |
901 | 0 | lejp_check_path_match(ctx); |
902 | |
|
903 | 0 | ctx->pst[ctx->pst_sp].path_match = ctx->path_match; |
904 | 0 | ctx->pst_sp++; |
905 | |
|
906 | 0 | p = &ctx->pst[ctx->pst_sp]; |
907 | 0 | p->user = user; |
908 | 0 | p->callback = lejp_cb; |
909 | 0 | p->paths = paths; |
910 | 0 | p->count_paths = paths_count; |
911 | 0 | p->ppos = 0; |
912 | |
|
913 | 0 | ctx->path_match = 0; |
914 | 0 | lejp_check_path_match(ctx); |
915 | |
|
916 | 0 | lwsl_debug("%s: pushed parser stack to %d (path %s)\n", __func__, |
917 | 0 | ctx->pst_sp, ctx->path); |
918 | |
|
919 | 0 | return 0; |
920 | 0 | } |
921 | | |
922 | | int |
923 | | lejp_parser_pop(struct lejp_ctx *ctx) |
924 | 0 | { |
925 | 0 | if (!ctx->pst_sp) |
926 | 0 | return -1; |
927 | | |
928 | 0 | ctx->pst_sp--; |
929 | 0 | lwsl_debug("%s: popped parser stack to %d\n", __func__, ctx->pst_sp); |
930 | |
|
931 | 0 | ctx->path_match = 0; /* force it to check */ |
932 | 0 | lejp_check_path_match(ctx); |
933 | |
|
934 | 0 | return 0; |
935 | 0 | } |
936 | | |
937 | | const char * |
938 | | lejp_error_to_string(int e) |
939 | 0 | { |
940 | 0 | if (e > 0) |
941 | 0 | e = 0; |
942 | 0 | else |
943 | 0 | e = -e; |
944 | |
|
945 | 0 | if (e >= (int)LWS_ARRAY_SIZE(parser_errs)) |
946 | 0 | return "Unknown error"; |
947 | | |
948 | 0 | return parser_errs[e]; |
949 | 0 | } |
950 | | |