Line | Count | Source (jump to first uncovered line) |
1 | | /*- |
2 | | * Copyright (c) 2018 Christos Zoulas |
3 | | * All rights reserved. |
4 | | * |
5 | | * Redistribution and use in source and binary forms, with or without |
6 | | * modification, are permitted provided that the following conditions |
7 | | * are met: |
8 | | * 1. Redistributions of source code must retain the above copyright |
9 | | * notice, this list of conditions and the following disclaimer. |
10 | | * 2. Redistributions in binary form must reproduce the above copyright |
11 | | * notice, this list of conditions and the following disclaimer in the |
12 | | * documentation and/or other materials provided with the distribution. |
13 | | * |
14 | | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
15 | | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
16 | | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
17 | | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
18 | | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
19 | | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
20 | | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
21 | | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
22 | | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
23 | | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
24 | | * POSSIBILITY OF SUCH DAMAGE. |
25 | | */ |
26 | | |
27 | | /* |
28 | | * Parse JSON object serialization format (RFC-7159) |
29 | | */ |
30 | | |
31 | | #ifndef TEST |
32 | | #include "file.h" |
33 | | |
34 | | #ifndef lint |
35 | | FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $") |
36 | | #endif |
37 | | |
38 | | #include "magic.h" |
39 | | #else |
40 | | #include <stdio.h> |
41 | | #include <stddef.h> |
42 | | #endif |
43 | | #include <string.h> |
44 | | |
45 | | #ifdef DEBUG |
46 | | #include <stdio.h> |
47 | | #define DPRINTF(a, b, c) \ |
48 | | printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \ |
49 | | (int)(b - c), (const char *)(c)) |
50 | | #define __file_debugused |
51 | | #else |
52 | 61.9k | #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) |
53 | | #define __file_debugused __attribute__((__unused__)) |
54 | | #endif |
55 | | |
56 | 2.68k | #define JSON_ARRAY 0 |
57 | 1.01k | #define JSON_CONSTANT 1 |
58 | 8.98k | #define JSON_NUMBER 2 |
59 | 863 | #define JSON_OBJECT 3 |
60 | 780 | #define JSON_STRING 4 |
61 | 381 | #define JSON_ARRAYN 5 |
62 | | #define JSON_MAX 6 |
63 | | |
64 | | /* |
65 | | * if JSON_COUNT != 0: |
66 | | * count all the objects, require that we have the whole data file |
67 | | * otherwise: |
68 | | * stop if we find an object or an array |
69 | | */ |
70 | | #ifndef JSON_COUNT |
71 | | #define JSON_COUNT 0 |
72 | | #endif |
73 | | |
74 | | static int json_parse(const unsigned char **, const unsigned char *, size_t *, |
75 | | size_t); |
76 | | |
77 | | static int |
78 | | json_isspace(const unsigned char uc) |
79 | 10.8M | { |
80 | 10.8M | switch (uc) { |
81 | 146k | case ' ': |
82 | 7.48M | case '\n': |
83 | 10.7M | case '\r': |
84 | 10.8M | case '\t': |
85 | 10.8M | return 1; |
86 | 34.7k | default: |
87 | 34.7k | return 0; |
88 | 10.8M | } |
89 | 10.8M | } |
90 | | |
91 | | static int |
92 | | json_isdigit(unsigned char uc) |
93 | 12.4M | { |
94 | 12.4M | switch (uc) { |
95 | 10.6M | case '0': case '1': case '2': case '3': case '4': |
96 | 12.4M | case '5': case '6': case '7': case '8': case '9': |
97 | 12.4M | return 1; |
98 | 20.2k | default: |
99 | 20.2k | return 0; |
100 | 12.4M | } |
101 | 12.4M | } |
102 | | |
103 | | static int |
104 | | json_isxdigit(unsigned char uc) |
105 | 2.19k | { |
106 | 2.19k | if (json_isdigit(uc)) |
107 | 142 | return 1; |
108 | 2.04k | switch (uc) { |
109 | 1.20k | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': |
110 | 2.03k | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
111 | 2.03k | return 1; |
112 | 17 | default: |
113 | 17 | return 0; |
114 | 2.04k | } |
115 | 2.04k | } |
116 | | |
117 | | static const unsigned char * |
118 | | json_skip_space(const unsigned char *uc, const unsigned char *ue) |
119 | 37.0k | { |
120 | 10.8M | while (uc < ue && json_isspace(*uc)) |
121 | 10.8M | uc++; |
122 | 37.0k | return uc; |
123 | 37.0k | } |
124 | | |
125 | | /*ARGSUSED*/ |
126 | | static int |
127 | | json_parse_string(const unsigned char **ucp, const unsigned char *ue, |
128 | | size_t lvl __file_debugused) |
129 | 2.34k | { |
130 | 2.34k | const unsigned char *uc = *ucp; |
131 | 2.34k | size_t i; |
132 | | |
133 | 2.34k | DPRINTF("Parse string: ", uc, *ucp); |
134 | 16.8k | while (uc < ue) { |
135 | 16.7k | switch (*uc++) { |
136 | 9 | case '\0': |
137 | 9 | goto out; |
138 | 1.79k | case '\\': |
139 | 1.79k | if (uc == ue) |
140 | 7 | goto out; |
141 | 1.79k | switch (*uc++) { |
142 | 1 | case '\0': |
143 | 1 | goto out; |
144 | 85 | case '"': |
145 | 179 | case '\\': |
146 | 725 | case '/': |
147 | 800 | case 'b': |
148 | 863 | case 'f': |
149 | 1.06k | case 'n': |
150 | 1.14k | case 'r': |
151 | 1.21k | case 't': |
152 | 1.21k | continue; |
153 | 561 | case 'u': |
154 | 561 | if (ue - uc < 4) { |
155 | 6 | uc = ue; |
156 | 6 | goto out; |
157 | 6 | } |
158 | 2.72k | for (i = 0; i < 4; i++) |
159 | 2.19k | if (!json_isxdigit(*uc++)) |
160 | 17 | goto out; |
161 | 538 | continue; |
162 | 538 | default: |
163 | 10 | goto out; |
164 | 1.79k | } |
165 | 2.20k | case '"': |
166 | 2.20k | DPRINTF("Good string: ", uc, *ucp); |
167 | 2.20k | *ucp = uc; |
168 | 2.20k | return 1; |
169 | 12.7k | default: |
170 | 12.7k | continue; |
171 | 16.7k | } |
172 | 16.7k | } |
173 | 138 | out: |
174 | 138 | DPRINTF("Bad string: ", uc, *ucp); |
175 | 138 | *ucp = uc; |
176 | 138 | return 0; |
177 | 2.34k | } |
178 | | |
179 | | static int |
180 | | json_parse_array(const unsigned char **ucp, const unsigned char *ue, |
181 | | size_t *st, size_t lvl) |
182 | 2.68k | { |
183 | 2.68k | const unsigned char *uc = *ucp; |
184 | | |
185 | 2.68k | DPRINTF("Parse array: ", uc, *ucp); |
186 | 5.24k | while (uc < ue) { |
187 | 5.22k | uc = json_skip_space(uc, ue); |
188 | 5.22k | if (uc == ue) |
189 | 14 | goto out; |
190 | 5.20k | if (*uc == ']') |
191 | 254 | goto done; |
192 | 4.95k | if (!json_parse(&uc, ue, st, lvl + 1)) |
193 | 2.31k | goto out; |
194 | 2.63k | if (uc == ue) |
195 | 36 | goto out; |
196 | 2.60k | switch (*uc) { |
197 | 2.56k | case ',': |
198 | 2.56k | uc++; |
199 | 2.56k | continue; |
200 | 23 | case ']': |
201 | 277 | done: |
202 | 277 | st[JSON_ARRAYN]++; |
203 | 277 | DPRINTF("Good array: ", uc, *ucp); |
204 | 277 | *ucp = uc + 1; |
205 | 277 | return 1; |
206 | 12 | default: |
207 | 12 | goto out; |
208 | 2.60k | } |
209 | 2.60k | } |
210 | 2.40k | out: |
211 | 2.40k | DPRINTF("Bad array: ", uc, *ucp); |
212 | 2.40k | *ucp = uc; |
213 | 2.40k | return 0; |
214 | 2.68k | } |
215 | | |
216 | | static int |
217 | | json_parse_object(const unsigned char **ucp, const unsigned char *ue, |
218 | | size_t *st, size_t lvl) |
219 | 773 | { |
220 | 773 | const unsigned char *uc = *ucp; |
221 | 773 | DPRINTF("Parse object: ", uc, *ucp); |
222 | 1.74k | while (uc < ue) { |
223 | 1.74k | uc = json_skip_space(uc, ue); |
224 | 1.74k | if (uc == ue) |
225 | 10 | goto out; |
226 | 1.73k | if (*uc == '}') { |
227 | 140 | uc++; |
228 | 140 | goto done; |
229 | 140 | } |
230 | 1.59k | if (*uc++ != '"') { |
231 | 30 | DPRINTF("not string", uc, *ucp); |
232 | 30 | goto out; |
233 | 30 | } |
234 | 1.56k | DPRINTF("next field", uc, *ucp); |
235 | 1.56k | if (!json_parse_string(&uc, ue, lvl)) { |
236 | 6 | DPRINTF("not string", uc, *ucp); |
237 | 6 | goto out; |
238 | 6 | } |
239 | 1.55k | uc = json_skip_space(uc, ue); |
240 | 1.55k | if (uc == ue) |
241 | 9 | goto out; |
242 | 1.54k | if (*uc++ != ':') { |
243 | 19 | DPRINTF("not colon", uc, *ucp); |
244 | 19 | goto out; |
245 | 19 | } |
246 | 1.52k | if (!json_parse(&uc, ue, st, lvl + 1)) { |
247 | 231 | DPRINTF("not json", uc, *ucp); |
248 | 231 | goto out; |
249 | 231 | } |
250 | 1.29k | if (uc == ue) |
251 | 1 | goto out; |
252 | 1.29k | switch (*uc++) { |
253 | 972 | case ',': |
254 | 972 | continue; |
255 | 318 | case '}': /* { */ |
256 | 458 | done: |
257 | 458 | DPRINTF("Good object: ", uc, *ucp); |
258 | 458 | *ucp = uc; |
259 | 458 | return 1; |
260 | 4 | default: |
261 | 4 | DPRINTF("not more", uc, *ucp); |
262 | 4 | *ucp = uc - 1; |
263 | 4 | goto out; |
264 | 1.29k | } |
265 | 1.29k | } |
266 | 315 | out: |
267 | 315 | DPRINTF("Bad object: ", uc, *ucp); |
268 | 315 | *ucp = uc; |
269 | 315 | return 0; |
270 | 773 | } |
271 | | |
272 | | /*ARGSUSED*/ |
273 | | static int |
274 | | json_parse_number(const unsigned char **ucp, const unsigned char *ue, |
275 | | size_t lvl __file_debugused) |
276 | 8.98k | { |
277 | 8.98k | const unsigned char *uc = *ucp; |
278 | 8.98k | int got = 0; |
279 | | |
280 | 8.98k | DPRINTF("Parse number: ", uc, *ucp); |
281 | 8.98k | if (uc == ue) |
282 | 0 | return 0; |
283 | 8.98k | if (*uc == '-') |
284 | 294 | uc++; |
285 | | |
286 | 10.6M | for (; uc < ue; uc++) { |
287 | 10.6M | if (!json_isdigit(*uc)) |
288 | 8.95k | break; |
289 | 10.6M | got = 1; |
290 | 10.6M | } |
291 | 8.98k | if (uc == ue) |
292 | 27 | goto out; |
293 | 8.95k | if (*uc == '.') |
294 | 108 | uc++; |
295 | 13.3k | for (; uc < ue; uc++) { |
296 | 13.3k | if (!json_isdigit(*uc)) |
297 | 8.94k | break; |
298 | 4.37k | got = 1; |
299 | 4.37k | } |
300 | 8.95k | if (uc == ue) |
301 | 17 | goto out; |
302 | 8.94k | if (got && (*uc == 'e' || *uc == 'E')) { |
303 | 301 | uc++; |
304 | 301 | got = 0; |
305 | 301 | if (uc == ue) |
306 | 6 | goto out; |
307 | 295 | if (*uc == '+' || *uc == '-') |
308 | 121 | uc++; |
309 | 1.83M | for (; uc < ue; uc++) { |
310 | 1.83M | if (!json_isdigit(*uc)) |
311 | 279 | break; |
312 | 1.82M | got = 1; |
313 | 1.82M | } |
314 | 295 | } |
315 | 8.98k | out: |
316 | 8.98k | if (!got) |
317 | 7.16k | DPRINTF("Bad number: ", uc, *ucp); |
318 | 1.82k | else |
319 | 1.82k | DPRINTF("Good number: ", uc, *ucp); |
320 | 8.98k | *ucp = uc; |
321 | 8.98k | return got; |
322 | 8.94k | } |
323 | | |
324 | | /*ARGSUSED*/ |
325 | | static int |
326 | | json_parse_const(const unsigned char **ucp, const unsigned char *ue, |
327 | | const char *str, size_t len, size_t lvl __file_debugused) |
328 | 1.01k | { |
329 | 1.01k | const unsigned char *uc = *ucp; |
330 | | |
331 | 1.01k | DPRINTF("Parse const: ", uc, *ucp); |
332 | 1.01k | *ucp += --len - 1; |
333 | 1.01k | if (*ucp > ue) |
334 | 45 | *ucp = ue; |
335 | 4.24k | for (; uc < ue && --len;) { |
336 | 3.25k | if (*uc++ != *++str) { |
337 | 30 | DPRINTF("Bad const: ", uc, *ucp); |
338 | 30 | return 0; |
339 | 30 | } |
340 | 3.25k | } |
341 | 987 | DPRINTF("Good const: ", uc, *ucp); |
342 | 987 | return 1; |
343 | 1.01k | } |
344 | | |
345 | | static int |
346 | | json_parse(const unsigned char **ucp, const unsigned char *ue, |
347 | | size_t *st, size_t lvl) |
348 | 14.3k | { |
349 | 14.3k | const unsigned char *uc, *ouc; |
350 | 14.3k | int rv = 0; |
351 | 14.3k | int t; |
352 | | |
353 | 14.3k | ouc = uc = json_skip_space(*ucp, ue); |
354 | 14.3k | if (uc == ue) |
355 | 65 | goto out; |
356 | | |
357 | | // Avoid recursion |
358 | 14.2k | if (lvl > 500) { |
359 | 1 | DPRINTF("Too many levels", uc, *ucp); |
360 | 1 | return 0; |
361 | 1 | } |
362 | | #if JSON_COUNT |
363 | | /* bail quickly if not counting */ |
364 | | if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) |
365 | | return 1; |
366 | | #endif |
367 | | |
368 | 14.2k | DPRINTF("Parse general: ", uc, *ucp); |
369 | 14.2k | switch (*uc++) { |
370 | 780 | case '"': |
371 | 780 | rv = json_parse_string(&uc, ue, lvl + 1); |
372 | 780 | t = JSON_STRING; |
373 | 780 | break; |
374 | 2.68k | case '[': |
375 | 2.68k | rv = json_parse_array(&uc, ue, st, lvl + 1); |
376 | 2.68k | t = JSON_ARRAY; |
377 | 2.68k | break; |
378 | 773 | case '{': /* '}' */ |
379 | 773 | rv = json_parse_object(&uc, ue, st, lvl + 1); |
380 | 773 | t = JSON_OBJECT; |
381 | 773 | break; |
382 | 350 | case 't': |
383 | 350 | rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1); |
384 | 350 | t = JSON_CONSTANT; |
385 | 350 | break; |
386 | 362 | case 'f': |
387 | 362 | rv = json_parse_const(&uc, ue, "false", sizeof("false"), |
388 | 362 | lvl + 1); |
389 | 362 | t = JSON_CONSTANT; |
390 | 362 | break; |
391 | 305 | case 'n': |
392 | 305 | rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1); |
393 | 305 | t = JSON_CONSTANT; |
394 | 305 | break; |
395 | 8.98k | default: |
396 | 8.98k | --uc; |
397 | 8.98k | rv = json_parse_number(&uc, ue, lvl + 1); |
398 | 8.98k | t = JSON_NUMBER; |
399 | 8.98k | break; |
400 | 14.2k | } |
401 | 14.2k | if (rv) |
402 | 4.19k | st[t]++; |
403 | 14.2k | uc = json_skip_space(uc, ue); |
404 | 14.3k | out: |
405 | 14.3k | DPRINTF("End general: ", uc, *ucp); |
406 | 14.3k | *ucp = uc; |
407 | 14.3k | if (lvl == 0) { |
408 | 7.77k | if (!rv) |
409 | 7.55k | return 0; |
410 | 225 | if (uc == ue) |
411 | 68 | return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0; |
412 | 157 | if (*ouc == *uc && json_parse(&uc, ue, st, 1)) |
413 | 36 | return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0; |
414 | 121 | else |
415 | 121 | return 0; |
416 | 157 | } |
417 | 6.52k | return rv; |
418 | 14.3k | } |
419 | | |
420 | | #ifndef TEST |
421 | | int |
422 | | file_is_json(struct magic_set *ms, const struct buffer *b) |
423 | 7.77k | { |
424 | 7.77k | const unsigned char *uc = CAST(const unsigned char *, b->fbuf); |
425 | 7.77k | const unsigned char *ue = uc + b->flen; |
426 | 7.77k | size_t st[JSON_MAX]; |
427 | 7.77k | int mime = ms->flags & MAGIC_MIME; |
428 | 7.77k | int jt; |
429 | | |
430 | | |
431 | 7.77k | if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) |
432 | 0 | return 0; |
433 | | |
434 | 7.77k | memset(st, 0, sizeof(st)); |
435 | | |
436 | 7.77k | if ((jt = json_parse(&uc, ue, st, 0)) == 0) |
437 | 7.75k | return 0; |
438 | | |
439 | 21 | if (mime == MAGIC_MIME_ENCODING) |
440 | 0 | return 1; |
441 | 21 | if (mime) { |
442 | 0 | if (file_printf(ms, "application/%s", |
443 | 0 | jt == 1 ? "json" : "x-ndjson") == -1) |
444 | 0 | return -1; |
445 | 0 | return 1; |
446 | 0 | } |
447 | 21 | if (file_printf(ms, "%sJSON text data", |
448 | 21 | jt == 1 ? "" : "New Line Delimited ") == -1) |
449 | 0 | return -1; |
450 | | #if JSON_COUNT |
451 | | #define P(n) st[n], st[n] > 1 ? "s" : "" |
452 | | if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT |
453 | | "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT |
454 | | "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT |
455 | | "u >1array%s)", |
456 | | P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), |
457 | | P(JSON_NUMBER), P(JSON_ARRAYN)) |
458 | | == -1) |
459 | | return -1; |
460 | | #endif |
461 | 21 | return 1; |
462 | 21 | } |
463 | | |
464 | | #else |
465 | | |
466 | | #include <sys/types.h> |
467 | | #include <sys/stat.h> |
468 | | #include <stdio.h> |
469 | | #include <fcntl.h> |
470 | | #include <unistd.h> |
471 | | #include <stdlib.h> |
472 | | #include <stdint.h> |
473 | | #include <err.h> |
474 | | |
475 | | int |
476 | | main(int argc, char *argv[]) |
477 | | { |
478 | | int fd; |
479 | | struct stat st; |
480 | | unsigned char *p; |
481 | | size_t stats[JSON_MAX]; |
482 | | |
483 | | if ((fd = open(argv[1], O_RDONLY)) == -1) |
484 | | err(EXIT_FAILURE, "Can't open `%s'", argv[1]); |
485 | | |
486 | | if (fstat(fd, &st) == -1) |
487 | | err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); |
488 | | |
489 | | if ((p = CAST(char *, malloc(st.st_size))) == NULL) |
490 | | err(EXIT_FAILURE, "Can't allocate %jd bytes", |
491 | | (intmax_t)st.st_size); |
492 | | if (read(fd, p, st.st_size) != st.st_size) |
493 | | err(EXIT_FAILURE, "Can't read %jd bytes", |
494 | | (intmax_t)st.st_size); |
495 | | memset(stats, 0, sizeof(stats)); |
496 | | printf("is json %d\n", json_parse((const unsigned char **)&p, |
497 | | p + st.st_size, stats, 0)); |
498 | | return 0; |
499 | | } |
500 | | #endif |