Line | Count | Source |
1 | | // Copyright (c) 2018-2020 Cesanta Software Limited |
2 | | // All rights reserved |
3 | | // |
4 | | // Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | | // of this software and associated documentation files (the "Software"), to deal |
6 | | // in the Software without restriction, including without limitation the rights |
7 | | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
8 | | // copies of the Software, and to permit persons to whom the Software is |
9 | | // furnished to do so, subject to the following conditions: |
10 | | // |
11 | | // The above copyright notice and this permission notice shall be included in |
12 | | // all copies or substantial portions of the Software. |
13 | | // |
14 | | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
19 | | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
20 | | // SOFTWARE. |
21 | | |
22 | | #include <float.h> |
23 | | #include <string.h> |
24 | | |
25 | | #include <import/mjson.h> |
26 | | |
27 | | static double mystrtod(const char *str, int len, char **end); |
28 | | |
29 | 0 | static int mjson_esc(int c, int esc) { |
30 | 0 | const char *p, *esc1 = "\b\f\n\r\t\\\"", *esc2 = "bfnrt\\\""; |
31 | 0 | for (p = esc ? esc1 : esc2; *p != '\0'; p++) { |
32 | 0 | if (*p == c) return esc ? esc2[p - esc1] : esc1[p - esc2]; |
33 | 0 | } |
34 | 0 | return 0; |
35 | 0 | } |
36 | | |
37 | 0 | static int mjson_escape(int c) { |
38 | 0 | return mjson_esc(c, 1); |
39 | 0 | } |
40 | | |
41 | 0 | static int mjson_pass_string(const char *s, int len) { |
42 | 0 | int i; |
43 | 0 | for (i = 0; i < len; i++) { |
44 | 0 | if (s[i] == '\\' && i + 1 < len && mjson_escape(s[i + 1])) { |
45 | 0 | i++; |
46 | 0 | } else if (s[i] == '\0') { |
47 | 0 | return MJSON_ERROR_INVALID_INPUT; |
48 | 0 | } else if (s[i] == '"') { |
49 | 0 | return i; |
50 | 0 | } |
51 | 0 | } |
52 | 0 | return MJSON_ERROR_INVALID_INPUT; |
53 | 0 | } |
54 | | |
55 | 0 | int mjson(const char *s, int len, mjson_cb_t cb, void *ud) { |
56 | 0 | enum { S_VALUE, S_KEY, S_COLON, S_COMMA_OR_EOO } expecting = S_VALUE; |
57 | 0 | unsigned char nesting[MJSON_MAX_DEPTH]; |
58 | 0 | int i, depth = 0; |
59 | 0 | #define MJSONCALL(ev) \ |
60 | 0 | if (cb != NULL && cb(ev, s, start, i - start + 1, ud)) return i + 1; |
61 | | |
62 | | // In the ascii table, the distance between `[` and `]` is 2. |
63 | | // Ditto for `{` and `}`. Hence +2 in the code below. |
64 | 0 | #define MJSONEOO() \ |
65 | 0 | do { \ |
66 | 0 | if (c != nesting[depth - 1] + 2) return MJSON_ERROR_INVALID_INPUT; \ |
67 | 0 | depth--; \ |
68 | 0 | if (depth == 0) { \ |
69 | 0 | MJSONCALL(tok); \ |
70 | 0 | return i + 1; \ |
71 | 0 | } \ |
72 | 0 | } while (0) |
73 | |
|
74 | 0 | for (i = 0; i < len; i++) { |
75 | 0 | int start = i; |
76 | 0 | unsigned char c = ((unsigned char *) s)[i]; |
77 | 0 | int tok = c; |
78 | 0 | if (c == ' ' || c == '\t' || c == '\n' || c == '\r') continue; |
79 | | // printf("- %c [%.*s] %d %d\n", c, i, s, depth, expecting); |
80 | 0 | switch (expecting) { |
81 | 0 | case S_VALUE: |
82 | 0 | if (c == '{') { |
83 | 0 | if (depth >= (int) sizeof(nesting)) return MJSON_ERROR_TOO_DEEP; |
84 | 0 | nesting[depth++] = c; |
85 | 0 | expecting = S_KEY; |
86 | 0 | break; |
87 | 0 | } else if (c == '[') { |
88 | 0 | if (depth >= (int) sizeof(nesting)) return MJSON_ERROR_TOO_DEEP; |
89 | 0 | nesting[depth++] = c; |
90 | 0 | break; |
91 | 0 | } else if (c == ']' && depth > 0) { // Empty array |
92 | 0 | MJSONEOO(); |
93 | 0 | } else if (c == 't' && i + 3 < len && memcmp(&s[i], "true", 4) == 0) { |
94 | 0 | i += 3; |
95 | 0 | tok = MJSON_TOK_TRUE; |
96 | 0 | } else if (c == 'n' && i + 3 < len && memcmp(&s[i], "null", 4) == 0) { |
97 | 0 | i += 3; |
98 | 0 | tok = MJSON_TOK_NULL; |
99 | 0 | } else if (c == 'f' && i + 4 < len && memcmp(&s[i], "false", 5) == 0) { |
100 | 0 | i += 4; |
101 | 0 | tok = MJSON_TOK_FALSE; |
102 | 0 | } else if (c == '-' || ((c >= '0' && c <= '9'))) { |
103 | 0 | char *end = NULL; |
104 | 0 | mystrtod(&s[i], len - i, &end); |
105 | 0 | if (end != NULL) i += (int) (end - &s[i] - 1); |
106 | 0 | tok = MJSON_TOK_NUMBER; |
107 | 0 | } else if (c == '"') { |
108 | 0 | int n = mjson_pass_string(&s[i + 1], len - i - 1); |
109 | 0 | if (n < 0) return n; |
110 | 0 | i += n + 1; |
111 | 0 | tok = MJSON_TOK_STRING; |
112 | 0 | } else { |
113 | 0 | return MJSON_ERROR_INVALID_INPUT; |
114 | 0 | } |
115 | 0 | if (depth == 0) { |
116 | 0 | MJSONCALL(tok); |
117 | 0 | return i + 1; |
118 | 0 | } |
119 | 0 | expecting = S_COMMA_OR_EOO; |
120 | 0 | break; |
121 | | |
122 | 0 | case S_KEY: |
123 | 0 | if (c == '"') { |
124 | 0 | int n = mjson_pass_string(&s[i + 1], len - i - 1); |
125 | 0 | if (n < 0) return n; |
126 | 0 | i += n + 1; |
127 | 0 | tok = MJSON_TOK_KEY; |
128 | 0 | expecting = S_COLON; |
129 | 0 | } else if (c == '}') { // Empty object |
130 | 0 | MJSONEOO(); |
131 | 0 | expecting = S_COMMA_OR_EOO; |
132 | 0 | } else { |
133 | 0 | return MJSON_ERROR_INVALID_INPUT; |
134 | 0 | } |
135 | 0 | break; |
136 | | |
137 | 0 | case S_COLON: |
138 | 0 | if (c == ':') { |
139 | 0 | expecting = S_VALUE; |
140 | 0 | } else { |
141 | 0 | return MJSON_ERROR_INVALID_INPUT; |
142 | 0 | } |
143 | 0 | break; |
144 | | |
145 | 0 | case S_COMMA_OR_EOO: |
146 | 0 | if (depth <= 0) return MJSON_ERROR_INVALID_INPUT; |
147 | 0 | if (c == ',') { |
148 | 0 | expecting = (nesting[depth - 1] == '{') ? S_KEY : S_VALUE; |
149 | 0 | } else if (c == ']' || c == '}') { |
150 | 0 | MJSONEOO(); |
151 | 0 | } else { |
152 | 0 | return MJSON_ERROR_INVALID_INPUT; |
153 | 0 | } |
154 | 0 | break; |
155 | 0 | } |
156 | 0 | MJSONCALL(tok); |
157 | 0 | } |
158 | 0 | return MJSON_ERROR_INVALID_INPUT; |
159 | 0 | } |
160 | | |
161 | | struct msjon_get_data { |
162 | | const char *path; // Lookup json path |
163 | | int pos; // Current path index |
164 | | int d1; // Current depth of traversal |
165 | | int d2; // Expected depth of traversal |
166 | | int i1; // Index in an array |
167 | | int i2; // Expected index in an array |
168 | | int obj; // If the value is array/object, offset where it starts |
169 | | const char **tokptr; // Destination |
170 | | int *toklen; // Destination length |
171 | | int tok; // Returned token |
172 | | }; |
173 | | |
174 | | #include <stdio.h> |
175 | | |
176 | 0 | static int plen1(const char *s) { |
177 | 0 | int i = 0, n = 0; |
178 | 0 | while (s[i] != '\0' && s[i] != '.' && s[i] != '[') |
179 | 0 | n++, i += s[i] == '\\' ? 2 : 1; |
180 | | // printf("PLEN: s: [%s], [%.*s] => %d\n", s, i, s, n); |
181 | 0 | return n; |
182 | 0 | } |
183 | | |
184 | 0 | static int plen2(const char *s) { |
185 | 0 | int i = 0, __attribute__((unused)) n = 0; |
186 | 0 | while (s[i] != '\0' && s[i] != '.' && s[i] != '[') |
187 | 0 | n++, i += s[i] == '\\' ? 2 : 1; |
188 | | // printf("PLEN: s: [%s], [%.*s] => %d\n", s, i, s, n); |
189 | 0 | return i; |
190 | 0 | } |
191 | | |
192 | 0 | static int kcmp(const char *a, const char *b, int n) { |
193 | 0 | int i = 0, j = 0, r = 0; |
194 | 0 | for (i = 0, j = 0; j < n; i++, j++) { |
195 | 0 | if (b[i] == '\\') i++; |
196 | 0 | if ((r = a[j] - b[i]) != 0) return r; |
197 | 0 | } |
198 | | // printf("KCMP: a: [%.*s], b:[%.*s] ==> %d\n", n, a, i, b, r); |
199 | 0 | return r; |
200 | 0 | } |
201 | | |
202 | 0 | static int mjson_get_cb(int tok, const char *s, int off, int len, void *ud) { |
203 | 0 | struct msjon_get_data *data = (struct msjon_get_data *) ud; |
204 | | // printf("--> %2x %2d %2d %2d %2d\t'%s'\t'%.*s'\t\t'%.*s'\n", tok, data->d1, |
205 | | // data->d2, data->i1, data->i2, data->path + data->pos, off, s, len, |
206 | | // s + off); |
207 | 0 | if (data->tok != MJSON_TOK_INVALID) return 1; // Found |
208 | | |
209 | 0 | if (tok == '{') { |
210 | 0 | if (!data->path[data->pos] && data->d1 == data->d2) data->obj = off; |
211 | 0 | data->d1++; |
212 | 0 | } else if (tok == '[') { |
213 | 0 | if (data->d1 == data->d2 && data->path[data->pos] == '[') { |
214 | 0 | data->i1 = 0; |
215 | 0 | data->i2 = (int) mystrtod(&data->path[data->pos + 1], strlen(&data->path[data->pos + 1]), NULL); |
216 | 0 | if (data->i1 == data->i2) { |
217 | 0 | data->d2++; |
218 | 0 | data->pos += 3; |
219 | 0 | } |
220 | 0 | } |
221 | 0 | if (!data->path[data->pos] && data->d1 == data->d2) data->obj = off; |
222 | 0 | data->d1++; |
223 | 0 | } else if (tok == ',') { |
224 | 0 | if (data->d1 == data->d2 + 1) { |
225 | 0 | data->i1++; |
226 | 0 | if (data->i1 == data->i2) { |
227 | 0 | while (data->path[data->pos] != ']') data->pos++; |
228 | 0 | data->pos++; |
229 | 0 | data->d2++; |
230 | 0 | } |
231 | 0 | } |
232 | 0 | } else if (tok == MJSON_TOK_KEY && data->d1 == data->d2 + 1 && |
233 | 0 | data->path[data->pos] == '.' && s[off] == '"' && |
234 | 0 | s[off + len - 1] == '"' && |
235 | 0 | plen1(&data->path[data->pos + 1]) == len - 2 && |
236 | 0 | kcmp(s + off + 1, &data->path[data->pos + 1], len - 2) == 0) { |
237 | 0 | data->d2++; |
238 | 0 | data->pos += plen2(&data->path[data->pos + 1]) + 1; |
239 | 0 | } else if (tok == MJSON_TOK_KEY && data->d1 == data->d2) { |
240 | 0 | return 1; // Exhausted path, not found |
241 | 0 | } else if (tok == '}' || tok == ']') { |
242 | 0 | data->d1--; |
243 | | // data->d2--; |
244 | 0 | if (!data->path[data->pos] && data->d1 == data->d2 && data->obj != -1) { |
245 | 0 | data->tok = tok - 2; |
246 | 0 | if (data->tokptr) *data->tokptr = s + data->obj; |
247 | 0 | if (data->toklen) *data->toklen = off - data->obj + 1; |
248 | 0 | return 1; |
249 | 0 | } |
250 | 0 | } else if (MJSON_TOK_IS_VALUE(tok)) { |
251 | | // printf("TOK --> %d\n", tok); |
252 | 0 | if (data->d1 == data->d2 && !data->path[data->pos]) { |
253 | 0 | data->tok = tok; |
254 | 0 | if (data->tokptr) *data->tokptr = s + off; |
255 | 0 | if (data->toklen) *data->toklen = len; |
256 | 0 | return 1; |
257 | 0 | } |
258 | 0 | } |
259 | 0 | return 0; |
260 | 0 | } |
261 | | |
262 | | enum mjson_tok mjson_find(const char *s, int len, const char *jp, |
263 | 0 | const char **tokptr, int *toklen) { |
264 | 0 | struct msjon_get_data data = {jp, 1, 0, 0, 0, |
265 | 0 | 0, -1, tokptr, toklen, MJSON_TOK_INVALID}; |
266 | 0 | if (jp[0] != '$') return MJSON_TOK_INVALID; |
267 | 0 | if (mjson(s, len, mjson_get_cb, &data) < 0) return MJSON_TOK_INVALID; |
268 | 0 | return (enum mjson_tok) data.tok; |
269 | 0 | } |
270 | | |
271 | 0 | int mjson_get_number(const char *s, int len, const char *path, double *v) { |
272 | 0 | const char *p; |
273 | 0 | int tok, n; |
274 | 0 | if ((tok = mjson_find(s, len, path, &p, &n)) == MJSON_TOK_NUMBER) { |
275 | 0 | if (v != NULL) *v = mystrtod(p, n, NULL); |
276 | 0 | } |
277 | 0 | return tok == MJSON_TOK_NUMBER ? 1 : 0; |
278 | 0 | } |
279 | | |
280 | 0 | int mjson_get_bool(const char *s, int len, const char *path, int *v) { |
281 | 0 | int tok = mjson_find(s, len, path, NULL, NULL); |
282 | 0 | if (tok == MJSON_TOK_TRUE && v != NULL) *v = 1; |
283 | 0 | if (tok == MJSON_TOK_FALSE && v != NULL) *v = 0; |
284 | 0 | return tok == MJSON_TOK_TRUE || tok == MJSON_TOK_FALSE ? 1 : 0; |
285 | 0 | } |
286 | | |
287 | 0 | static unsigned char mjson_unhex_nimble(const char *s) { |
288 | 0 | unsigned char i, v = 0; |
289 | 0 | for (i = 0; i < 2; i++) { |
290 | 0 | int c = s[i]; |
291 | 0 | if (i > 0) v <<= 4; |
292 | 0 | v |= (c >= '0' && c <= '9') ? c - '0' |
293 | 0 | : (c >= 'A' && c <= 'F') ? c - '7' : c - 'W'; |
294 | 0 | } |
295 | 0 | return v; |
296 | 0 | } |
297 | | |
298 | 0 | static int mjson_unescape(const char *s, int len, char *to, int n) { |
299 | 0 | int i, j; |
300 | 0 | for (i = 0, j = 0; i < len && j < n; i++, j++) { |
301 | 0 | if (s[i] == '\\' && i + 5 < len && s[i + 1] == 'u') { |
302 | | // \uXXXX escape. We could process a simple one-byte chars |
303 | | // \u00xx from the ASCII range. More complex chars would require |
304 | | // dragging in a UTF8 library, which is too much for us |
305 | 0 | if (s[i + 2] != '0' || s[i + 3] != '0') return -1; // Too much, give up |
306 | 0 | to[j] = mjson_unhex_nimble(s + i + 4); |
307 | 0 | i += 5; |
308 | 0 | } else if (s[i] == '\\' && i + 1 < len) { |
309 | 0 | int c = mjson_esc(s[i + 1], 0); |
310 | 0 | if (c == 0) return -1; |
311 | 0 | to[j] = c; |
312 | 0 | i++; |
313 | 0 | } else { |
314 | 0 | to[j] = s[i]; |
315 | 0 | } |
316 | 0 | } |
317 | 0 | if (j >= n) return -1; |
318 | 0 | if (n > 0) to[j] = '\0'; |
319 | 0 | return j; |
320 | 0 | } |
321 | | |
322 | | int mjson_get_string(const char *s, int len, const char *path, char *to, |
323 | 0 | int n) { |
324 | 0 | const char *p; |
325 | 0 | int sz; |
326 | 0 | if (mjson_find(s, len, path, &p, &sz) != MJSON_TOK_STRING) return -1; |
327 | 0 | return mjson_unescape(p + 1, sz - 2, to, n); |
328 | 0 | } |
329 | | |
330 | 0 | int mjson_get_hex(const char *s, int len, const char *x, char *to, int n) { |
331 | 0 | const char *p; |
332 | 0 | int i, j, sz; |
333 | 0 | if (mjson_find(s, len, x, &p, &sz) != MJSON_TOK_STRING) return -1; |
334 | 0 | for (i = j = 0; i < sz - 3 && j < n; i += 2, j++) { |
335 | 0 | ((unsigned char *) to)[j] = mjson_unhex_nimble(p + i + 1); |
336 | 0 | } |
337 | 0 | if (j < n) to[j] = '\0'; |
338 | 0 | return j; |
339 | 0 | } |
340 | | |
341 | 0 | static int is_digit(int c) { |
342 | 0 | return c >= '0' && c <= '9'; |
343 | 0 | } |
344 | | |
345 | | /* NOTE: strtod() implementation by Yasuhiro Matsumoto. */ |
346 | 0 | static double mystrtod(const char *str, int len, char **end) { |
347 | 0 | double d = 0.0; |
348 | 0 | int sign = 1, __attribute__((unused)) n = 0; |
349 | 0 | const char *p = str, *a = str; |
350 | 0 | const char *end_p = str + len; |
351 | | |
352 | | /* decimal part */ |
353 | 0 | if (p < end_p && *p == '-') { |
354 | 0 | sign = -1; |
355 | 0 | ++p; |
356 | 0 | } else if (p < end_p && *p == '+') { |
357 | 0 | ++p; |
358 | 0 | } |
359 | 0 | if (p < end_p && is_digit(*p)) { |
360 | 0 | d = (double) (*p++ - '0'); |
361 | 0 | while (p < end_p && is_digit(*p)) { |
362 | 0 | d = d * 10.0 + (double) (*p - '0'); |
363 | 0 | ++p; |
364 | 0 | ++n; |
365 | 0 | } |
366 | 0 | a = p; |
367 | 0 | } else if (p >= end_p || *p != '.') { |
368 | 0 | goto done; |
369 | 0 | } |
370 | 0 | d *= sign; |
371 | | |
372 | | /* fraction part */ |
373 | 0 | if (p < end_p && *p == '.') { |
374 | 0 | double f = 0.0; |
375 | 0 | double base = 0.1; |
376 | 0 | ++p; |
377 | |
|
378 | 0 | while (p < end_p && is_digit(*p)) { |
379 | 0 | f += base * (*p - '0'); |
380 | 0 | base /= 10.0; |
381 | 0 | ++p; |
382 | 0 | ++n; |
383 | 0 | } |
384 | 0 | d += f * sign; |
385 | 0 | a = p; |
386 | 0 | } |
387 | | |
388 | | /* exponential part */ |
389 | 0 | if (p < end_p && ((*p == 'E') || (*p == 'e'))) { |
390 | 0 | double exp, f; |
391 | 0 | int i, e = 0, neg = 0; |
392 | 0 | p++; |
393 | 0 | if (p < end_p && *p == '-') p++, neg++; |
394 | 0 | if (p < end_p && *p == '+') p++; |
395 | 0 | while (p < end_p && is_digit(*p)) e = e * 10 + *p++ - '0'; |
396 | 0 | i = e; |
397 | 0 | if (neg) e = -e; |
398 | | #if 0 |
399 | | if (d == 2.2250738585072011 && e == -308) { |
400 | | d = 0.0; |
401 | | a = p; |
402 | | goto done; |
403 | | } |
404 | | if (d == 2.2250738585072012 && e <= -308) { |
405 | | d *= 1.0e-308; |
406 | | a = p; |
407 | | goto done; |
408 | | } |
409 | | #endif |
410 | | /* calculate f = 10^i */ |
411 | 0 | exp = 10; |
412 | 0 | f = 1; |
413 | 0 | while (i > 0) { |
414 | 0 | if (i & 1) f *= exp; |
415 | 0 | exp *= exp; |
416 | 0 | i >>= 1; |
417 | 0 | } |
418 | 0 | if (e > 0) d *= f; |
419 | 0 | else if (e < 0) d /= f; |
420 | 0 | a = p; |
421 | 0 | } else if (p > str && !is_digit(*(p - 1))) { |
422 | 0 | a = str; |
423 | 0 | goto done; |
424 | 0 | } |
425 | | |
426 | 0 | done: |
427 | 0 | if (end) *end = (char *) a; |
428 | 0 | return d; |
429 | 0 | } |
430 | | |
431 | | |