Coverage Report

Created: 2025-08-11 06:28

/src/file/src/is_json.c
Line
Count
Source (jump to first uncovered line)
1
/*-
2
 * Copyright (c) 2018 Christos Zoulas
3
 * All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 *
14
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24
 * POSSIBILITY OF SUCH DAMAGE.
25
 */
26
27
/*
28
 * Parse JSON object serialization format (RFC-7159)
29
 */
30
31
#ifndef TEST
32
#include "file.h"
33
34
#ifndef lint
35
FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $")
36
#endif
37
38
#include "magic.h"
39
#else
40
#include <stdio.h>
41
#include <stddef.h>
42
#endif
43
#include <string.h>
44
45
#ifdef DEBUG
46
#include <stdio.h>
47
#define DPRINTF(a, b, c)  \
48
    printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \
49
  (int)(b - c), (const char *)(c))
50
#define __file_debugused
51
#else
52
61.9k
#define DPRINTF(a, b, c)  do { } while (/*CONSTCOND*/0)
53
#define __file_debugused __attribute__((__unused__))
54
#endif
55
56
2.68k
#define JSON_ARRAY  0
57
1.01k
#define JSON_CONSTANT 1
58
8.98k
#define JSON_NUMBER 2
59
863
#define JSON_OBJECT 3
60
780
#define JSON_STRING 4
61
381
#define JSON_ARRAYN 5
62
#define JSON_MAX  6
63
64
/*
65
 * if JSON_COUNT != 0:
66
 *  count all the objects, require that we have the whole data file
67
 * otherwise:
68
 *  stop if we find an object or an array
69
 */
70
#ifndef JSON_COUNT
71
#define JSON_COUNT 0
72
#endif
73
74
static int json_parse(const unsigned char **, const unsigned char *, size_t *,
75
  size_t);
76
77
static int
78
json_isspace(const unsigned char uc)
79
10.8M
{
80
10.8M
  switch (uc) {
81
146k
  case ' ':
82
7.48M
  case '\n':
83
10.7M
  case '\r':
84
10.8M
  case '\t':
85
10.8M
    return 1;
86
34.7k
  default:
87
34.7k
    return 0;
88
10.8M
  }
89
10.8M
}
90
91
static int
92
json_isdigit(unsigned char uc)
93
12.4M
{
94
12.4M
  switch (uc) {
95
10.6M
  case '0': case '1': case '2': case '3': case '4':
96
12.4M
  case '5': case '6': case '7': case '8': case '9':
97
12.4M
    return 1;
98
20.2k
  default:
99
20.2k
    return 0;
100
12.4M
  }
101
12.4M
}
102
103
static int
104
json_isxdigit(unsigned char uc)
105
2.19k
{
106
2.19k
  if (json_isdigit(uc))
107
142
    return 1;
108
2.04k
  switch (uc) {
109
1.20k
  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
110
2.03k
  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
111
2.03k
    return 1;
112
17
  default:
113
17
    return 0;
114
2.04k
  }
115
2.04k
}
116
117
static const unsigned char *
118
json_skip_space(const unsigned char *uc, const unsigned char *ue)
119
37.0k
{
120
10.8M
  while (uc < ue && json_isspace(*uc))
121
10.8M
    uc++;
122
37.0k
  return uc;
123
37.0k
}
124
125
/*ARGSUSED*/
126
static int
127
json_parse_string(const unsigned char **ucp, const unsigned char *ue,
128
    size_t lvl __file_debugused)
129
2.34k
{
130
2.34k
  const unsigned char *uc = *ucp;
131
2.34k
  size_t i;
132
133
2.34k
  DPRINTF("Parse string: ", uc, *ucp);
134
16.8k
  while (uc < ue) {
135
16.7k
    switch (*uc++) {
136
9
    case '\0':
137
9
      goto out;
138
1.79k
    case '\\':
139
1.79k
      if (uc == ue)
140
7
        goto out;
141
1.79k
      switch (*uc++) {
142
1
      case '\0':
143
1
        goto out;
144
85
      case '"':
145
179
      case '\\':
146
725
      case '/':
147
800
      case 'b':
148
863
      case 'f':
149
1.06k
      case 'n':
150
1.14k
      case 'r':
151
1.21k
      case 't':
152
1.21k
        continue;
153
561
      case 'u':
154
561
        if (ue - uc < 4) {
155
6
          uc = ue;
156
6
          goto out;
157
6
        }
158
2.72k
        for (i = 0; i < 4; i++)
159
2.19k
          if (!json_isxdigit(*uc++))
160
17
            goto out;
161
538
        continue;
162
538
      default:
163
10
        goto out;
164
1.79k
      }
165
2.20k
    case '"':
166
2.20k
      DPRINTF("Good string: ", uc, *ucp);
167
2.20k
      *ucp = uc;
168
2.20k
      return 1;
169
12.7k
    default:
170
12.7k
      continue;
171
16.7k
    }
172
16.7k
  }
173
138
out:
174
138
  DPRINTF("Bad string: ", uc, *ucp);
175
138
  *ucp = uc;
176
138
  return 0;
177
2.34k
}
178
179
static int
180
json_parse_array(const unsigned char **ucp, const unsigned char *ue,
181
  size_t *st, size_t lvl)
182
2.68k
{
183
2.68k
  const unsigned char *uc = *ucp;
184
185
2.68k
  DPRINTF("Parse array: ", uc, *ucp);
186
5.24k
  while (uc < ue) {
187
5.22k
    uc = json_skip_space(uc, ue);
188
5.22k
    if (uc == ue)
189
14
      goto out;
190
5.20k
    if (*uc == ']')
191
254
      goto done;
192
4.95k
    if (!json_parse(&uc, ue, st, lvl + 1))
193
2.31k
      goto out;
194
2.63k
    if (uc == ue)
195
36
      goto out;
196
2.60k
    switch (*uc) {
197
2.56k
    case ',':
198
2.56k
      uc++;
199
2.56k
      continue;
200
23
    case ']':
201
277
    done:
202
277
      st[JSON_ARRAYN]++;
203
277
      DPRINTF("Good array: ", uc, *ucp);
204
277
      *ucp = uc + 1;
205
277
      return 1;
206
12
    default:
207
12
      goto out;
208
2.60k
    }
209
2.60k
  }
210
2.40k
out:
211
2.40k
  DPRINTF("Bad array: ", uc,  *ucp);
212
2.40k
  *ucp = uc;
213
2.40k
  return 0;
214
2.68k
}
215
216
static int
217
json_parse_object(const unsigned char **ucp, const unsigned char *ue,
218
  size_t *st, size_t lvl)
219
773
{
220
773
  const unsigned char *uc = *ucp;
221
773
  DPRINTF("Parse object: ", uc, *ucp);
222
1.74k
  while (uc < ue) {
223
1.74k
    uc = json_skip_space(uc, ue);
224
1.74k
    if (uc == ue)
225
10
      goto out;
226
1.73k
    if (*uc == '}') {
227
140
      uc++;
228
140
      goto done;
229
140
    }
230
1.59k
    if (*uc++ != '"') {
231
30
      DPRINTF("not string", uc, *ucp);
232
30
      goto out;
233
30
    }
234
1.56k
    DPRINTF("next field", uc, *ucp);
235
1.56k
    if (!json_parse_string(&uc, ue, lvl)) {
236
6
      DPRINTF("not string", uc, *ucp);
237
6
      goto out;
238
6
    }
239
1.55k
    uc = json_skip_space(uc, ue);
240
1.55k
    if (uc == ue)
241
9
      goto out;
242
1.54k
    if (*uc++ != ':') {
243
19
      DPRINTF("not colon", uc, *ucp);
244
19
      goto out;
245
19
    }
246
1.52k
    if (!json_parse(&uc, ue, st, lvl + 1)) {
247
231
      DPRINTF("not json", uc, *ucp);
248
231
      goto out;
249
231
    }
250
1.29k
    if (uc == ue)
251
1
      goto out;
252
1.29k
    switch (*uc++) {
253
972
    case ',':
254
972
      continue;
255
318
    case '}': /* { */
256
458
    done:
257
458
      DPRINTF("Good object: ", uc, *ucp);
258
458
      *ucp = uc;
259
458
      return 1;
260
4
    default:
261
4
      DPRINTF("not more", uc, *ucp);
262
4
      *ucp = uc - 1;
263
4
      goto out;
264
1.29k
    }
265
1.29k
  }
266
315
out:
267
315
  DPRINTF("Bad object: ", uc, *ucp);
268
315
  *ucp = uc;
269
315
  return 0;
270
773
}
271
272
/*ARGSUSED*/
273
static int
274
json_parse_number(const unsigned char **ucp, const unsigned char *ue, 
275
    size_t lvl __file_debugused)
276
8.98k
{
277
8.98k
  const unsigned char *uc = *ucp;
278
8.98k
  int got = 0;
279
280
8.98k
  DPRINTF("Parse number: ", uc, *ucp);
281
8.98k
  if (uc == ue)
282
0
    return 0;
283
8.98k
  if (*uc == '-')
284
294
    uc++;
285
286
10.6M
  for (; uc < ue; uc++) {
287
10.6M
    if (!json_isdigit(*uc))
288
8.95k
      break;
289
10.6M
    got = 1;
290
10.6M
  }
291
8.98k
  if (uc == ue)
292
27
    goto out;
293
8.95k
  if (*uc == '.')
294
108
    uc++;
295
13.3k
  for (; uc < ue; uc++) {
296
13.3k
    if (!json_isdigit(*uc))
297
8.94k
      break;
298
4.37k
    got = 1;
299
4.37k
  }
300
8.95k
  if (uc == ue)
301
17
    goto out;
302
8.94k
  if (got && (*uc == 'e' || *uc == 'E')) {
303
301
    uc++;
304
301
    got = 0;
305
301
    if (uc == ue)
306
6
      goto out;
307
295
    if (*uc == '+' || *uc == '-')
308
121
      uc++;
309
1.83M
    for (; uc < ue; uc++) {
310
1.83M
      if (!json_isdigit(*uc))
311
279
        break;
312
1.82M
      got = 1;
313
1.82M
    }
314
295
  }
315
8.98k
out:
316
8.98k
  if (!got)
317
7.16k
    DPRINTF("Bad number: ", uc, *ucp);
318
1.82k
  else
319
1.82k
    DPRINTF("Good number: ", uc, *ucp);
320
8.98k
  *ucp = uc;
321
8.98k
  return got;
322
8.94k
}
323
324
/*ARGSUSED*/
325
static int
326
json_parse_const(const unsigned char **ucp, const unsigned char *ue,
327
    const char *str, size_t len, size_t lvl __file_debugused)
328
1.01k
{
329
1.01k
  const unsigned char *uc = *ucp;
330
331
1.01k
  DPRINTF("Parse const: ", uc, *ucp);
332
1.01k
  *ucp += --len - 1;
333
1.01k
  if (*ucp > ue)
334
45
    *ucp = ue;
335
4.24k
  for (; uc < ue && --len;) {
336
3.25k
    if (*uc++ != *++str) {
337
30
      DPRINTF("Bad const: ", uc, *ucp);
338
30
      return 0;
339
30
    }
340
3.25k
  }
341
987
  DPRINTF("Good const: ", uc, *ucp);
342
987
  return 1;
343
1.01k
}
344
345
static int
346
json_parse(const unsigned char **ucp, const unsigned char *ue,
347
    size_t *st, size_t lvl)
348
14.3k
{
349
14.3k
  const unsigned char *uc, *ouc;
350
14.3k
  int rv = 0;
351
14.3k
  int t;
352
353
14.3k
  ouc = uc = json_skip_space(*ucp, ue);
354
14.3k
  if (uc == ue)
355
65
    goto out;
356
357
  // Avoid recursion
358
14.2k
  if (lvl > 500) {
359
1
    DPRINTF("Too many levels", uc, *ucp);
360
1
    return 0;
361
1
  }
362
#if JSON_COUNT
363
  /* bail quickly if not counting */
364
  if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
365
    return 1;
366
#endif
367
368
14.2k
  DPRINTF("Parse general: ", uc, *ucp);
369
14.2k
  switch (*uc++) {
370
780
  case '"':
371
780
    rv = json_parse_string(&uc, ue, lvl + 1);
372
780
    t = JSON_STRING;
373
780
    break;
374
2.68k
  case '[':
375
2.68k
    rv = json_parse_array(&uc, ue, st, lvl + 1);
376
2.68k
    t = JSON_ARRAY;
377
2.68k
    break;
378
773
  case '{': /* '}' */
379
773
    rv = json_parse_object(&uc, ue, st, lvl + 1);
380
773
    t = JSON_OBJECT;
381
773
    break;
382
350
  case 't':
383
350
    rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1);
384
350
    t = JSON_CONSTANT;
385
350
    break;
386
362
  case 'f':
387
362
    rv = json_parse_const(&uc, ue, "false", sizeof("false"),
388
362
        lvl + 1);
389
362
    t = JSON_CONSTANT;
390
362
    break;
391
305
  case 'n':
392
305
    rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1);
393
305
    t = JSON_CONSTANT;
394
305
    break;
395
8.98k
  default:
396
8.98k
    --uc;
397
8.98k
    rv = json_parse_number(&uc, ue, lvl + 1);
398
8.98k
    t = JSON_NUMBER;
399
8.98k
    break;
400
14.2k
  }
401
14.2k
  if (rv)
402
4.19k
    st[t]++;
403
14.2k
  uc = json_skip_space(uc, ue);
404
14.3k
out:
405
14.3k
  DPRINTF("End general: ", uc, *ucp);
406
14.3k
  *ucp = uc;
407
14.3k
  if (lvl == 0) {
408
7.77k
    if (!rv)
409
7.55k
      return 0;
410
225
    if (uc == ue)
411
68
      return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0;
412
157
    if (*ouc == *uc && json_parse(&uc, ue, st, 1))
413
36
      return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0;
414
121
    else
415
121
      return 0;
416
157
  }
417
6.52k
  return rv;
418
14.3k
}
419
420
#ifndef TEST
421
int
422
file_is_json(struct magic_set *ms, const struct buffer *b)
423
7.77k
{
424
7.77k
  const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
425
7.77k
  const unsigned char *ue = uc + b->flen;
426
7.77k
  size_t st[JSON_MAX];
427
7.77k
  int mime = ms->flags & MAGIC_MIME;
428
7.77k
  int jt;
429
430
431
7.77k
  if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
432
0
    return 0;
433
434
7.77k
  memset(st, 0, sizeof(st));
435
436
7.77k
  if ((jt = json_parse(&uc, ue, st, 0)) == 0)
437
7.75k
    return 0;
438
439
21
  if (mime == MAGIC_MIME_ENCODING)
440
0
    return 1;
441
21
  if (mime) {
442
0
    if (file_printf(ms, "application/%s",
443
0
        jt == 1 ? "json" : "x-ndjson") == -1)
444
0
      return -1;
445
0
    return 1;
446
0
  }
447
21
  if (file_printf(ms, "%sJSON text data",
448
21
      jt == 1 ? "" : "New Line Delimited ") == -1)
449
0
    return -1;
450
#if JSON_COUNT
451
#define P(n) st[n], st[n] > 1 ? "s" : ""
452
  if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
453
      "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
454
      "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
455
      "u >1array%s)",
456
      P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
457
      P(JSON_NUMBER), P(JSON_ARRAYN))
458
      == -1)
459
    return -1;
460
#endif
461
21
  return 1;
462
21
}
463
464
#else
465
466
#include <sys/types.h>
467
#include <sys/stat.h>
468
#include <stdio.h>
469
#include <fcntl.h>
470
#include <unistd.h>
471
#include <stdlib.h>
472
#include <stdint.h>
473
#include <err.h>
474
475
int
476
main(int argc, char *argv[])
477
{
478
  int fd;
479
  struct stat st;
480
  unsigned char *p;
481
  size_t stats[JSON_MAX];
482
483
  if ((fd = open(argv[1], O_RDONLY)) == -1)
484
    err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
485
486
  if (fstat(fd, &st) == -1)
487
    err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
488
489
  if ((p = CAST(char *, malloc(st.st_size))) == NULL)
490
    err(EXIT_FAILURE, "Can't allocate %jd bytes",
491
        (intmax_t)st.st_size);
492
  if (read(fd, p, st.st_size) != st.st_size)
493
    err(EXIT_FAILURE, "Can't read %jd bytes",
494
        (intmax_t)st.st_size);
495
  memset(stats, 0, sizeof(stats));
496
  printf("is json %d\n", json_parse((const unsigned char **)&p,
497
      p + st.st_size, stats, 0));
498
  return 0;
499
}
500
#endif