Coverage Report

Created: 2026-06-10 06:19

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/nghttp2/lib/sfparse.c
Line
Count
Source
1
/*
2
 * sfparse
3
 *
4
 * Copyright (c) 2023 sfparse contributors
5
 * Copyright (c) 2019 nghttp3 contributors
6
 * Copyright (c) 2015 nghttp2 contributors
7
 *
8
 * Permission is hereby granted, free of charge, to any person obtaining
9
 * a copy of this software and associated documentation files (the
10
 * "Software"), to deal in the Software without restriction, including
11
 * without limitation the rights to use, copy, modify, merge, publish,
12
 * distribute, sublicense, and/or sell copies of the Software, and to
13
 * permit persons to whom the Software is furnished to do so, subject to
14
 * the following conditions:
15
 *
16
 * The above copyright notice and this permission notice shall be
17
 * included in all copies or substantial portions of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
 */
27
#include "sfparse.h"
28
29
#include <string.h>
30
#include <assert.h>
31
#include <stdlib.h>
32
33
#ifdef __AVX2__
34
#  include <immintrin.h>
35
#endif /* __AVX2__ */
36
37
97.1k
#define SFPARSE_STATE_DICT 0x08U
38
0
#define SFPARSE_STATE_LIST 0x10U
39
0
#define SFPARSE_STATE_ITEM 0x18U
40
41
18.5k
#define SFPARSE_STATE_INNER_LIST 0x04U
42
43
23.3k
#define SFPARSE_STATE_BEFORE 0x00U
44
109k
#define SFPARSE_STATE_BEFORE_PARAMS 0x01U
45
44.9k
#define SFPARSE_STATE_PARAMS 0x02U
46
72.2k
#define SFPARSE_STATE_AFTER 0x03U
47
48
144k
#define SFPARSE_STATE_OP_MASK 0x03U
49
50
#define SFPARSE_SET_STATE_AFTER(NAME)                                          \
51
28.0k
  (SFPARSE_STATE_##NAME | SFPARSE_STATE_AFTER)
52
#define SFPARSE_SET_STATE_BEFORE_PARAMS(NAME)                                  \
53
53.5k
  (SFPARSE_STATE_##NAME | SFPARSE_STATE_BEFORE_PARAMS)
54
#define SFPARSE_SET_STATE_INNER_LIST_BEFORE(NAME)                              \
55
15.5k
  (SFPARSE_STATE_##NAME | SFPARSE_STATE_INNER_LIST | SFPARSE_STATE_BEFORE)
56
57
28.0k
#define SFPARSE_STATE_DICT_AFTER SFPARSE_SET_STATE_AFTER(DICT)
58
53.5k
#define SFPARSE_STATE_DICT_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(DICT)
59
#define SFPARSE_STATE_DICT_INNER_LIST_BEFORE                                   \
60
15.5k
  SFPARSE_SET_STATE_INNER_LIST_BEFORE(DICT)
61
62
0
#define SFPARSE_STATE_LIST_AFTER SFPARSE_SET_STATE_AFTER(LIST)
63
0
#define SFPARSE_STATE_LIST_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(LIST)
64
#define SFPARSE_STATE_LIST_INNER_LIST_BEFORE                                   \
65
0
  SFPARSE_SET_STATE_INNER_LIST_BEFORE(LIST)
66
67
0
#define SFPARSE_STATE_ITEM_AFTER SFPARSE_SET_STATE_AFTER(ITEM)
68
0
#define SFPARSE_STATE_ITEM_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(ITEM)
69
#define SFPARSE_STATE_ITEM_INNER_LIST_BEFORE                                   \
70
0
  SFPARSE_SET_STATE_INNER_LIST_BEFORE(ITEM)
71
72
56.5k
#define SFPARSE_STATE_INITIAL 0x00U
73
74
#define LCALPHAS                                                               \
75
  ['a'] = 1, ['b'] = 1, ['c'] = 1, ['d'] = 1, ['e'] = 1, ['f'] = 1, ['g'] = 1, \
76
  ['h'] = 1, ['i'] = 1, ['j'] = 1, ['k'] = 1, ['l'] = 1, ['m'] = 1, ['n'] = 1, \
77
  ['o'] = 1, ['p'] = 1, ['q'] = 1, ['r'] = 1, ['s'] = 1, ['t'] = 1, ['u'] = 1, \
78
  ['v'] = 1, ['w'] = 1, ['x'] = 1, ['y'] = 1, ['z'] = 1
79
80
#define ALPHAS(N)                                                              \
81
  ['A'] = (N), ['B'] = (N), ['C'] = (N), ['D'] = (N), ['E'] = (N),             \
82
  ['F'] = (N), ['G'] = (N), ['H'] = (N), ['I'] = (N), ['J'] = (N),             \
83
  ['K'] = (N), ['L'] = (N), ['M'] = (N), ['N'] = (N), ['O'] = (N),             \
84
  ['P'] = (N), ['Q'] = (N), ['R'] = (N), ['S'] = (N), ['T'] = (N),             \
85
  ['U'] = (N), ['V'] = (N), ['W'] = (N), ['X'] = (N), ['Y'] = (N),             \
86
  ['Z'] = (N), ['a'] = (N), ['b'] = (N), ['c'] = (N), ['d'] = (N),             \
87
  ['e'] = (N), ['f'] = (N), ['g'] = (N), ['h'] = (N), ['i'] = (N),             \
88
  ['j'] = (N), ['k'] = (N), ['l'] = (N), ['m'] = (N), ['n'] = (N),             \
89
  ['o'] = (N), ['p'] = (N), ['q'] = (N), ['r'] = (N), ['s'] = (N),             \
90
  ['t'] = (N), ['u'] = (N), ['v'] = (N), ['w'] = (N), ['x'] = (N),             \
91
  ['y'] = (N), ['z'] = (N)
92
93
#define DIGITS(N)                                                              \
94
  ['0'] = (N), ['1'] = (N), ['2'] = (N), ['3'] = (N), ['4'] = (N),             \
95
  ['5'] = (N), ['6'] = (N), ['7'] = (N), ['8'] = (N), ['9'] = (N)
96
97
#define HEXALPHAS(N)                                                           \
98
  ['a'] = (N), ['b'] = (N), ['c'] = (N), ['d'] = (N), ['e'] = (N), ['f'] = (N)
99
100
14.9k
static int is_ws(uint8_t c) {
101
14.9k
  switch (c) {
102
503
  case ' ':
103
1.01k
  case '\t':
104
1.01k
    return 1;
105
13.9k
  default:
106
13.9k
    return 0;
107
14.9k
  }
108
14.9k
}
109
110
#ifdef __AVX2__
111
#  ifdef _MSC_VER
112
#    include <intrin.h>
113
114
static int ctz(unsigned int v) {
115
  unsigned long n;
116
117
  /* Assume that v is not 0. */
118
  _BitScanForward(&n, v);
119
120
  return (int)n;
121
}
122
#  else /* !_MSC_VER */
123
#    define ctz __builtin_ctz
124
#  endif /* !_MSC_VER */
125
#endif   /* __AVX2__ */
126
127
586k
static int parser_eof(sfparse_parser *sfp) { return sfp->pos == sfp->end; }
128
129
34.9k
static void parser_discard_ows(sfparse_parser *sfp) {
130
35.9k
  for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos)
131
1.01k
    ;
132
34.9k
}
133
134
50.4k
static void parser_discard_sp(sfparse_parser *sfp) {
135
57.7k
  for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos)
136
7.26k
    ;
137
50.4k
}
138
139
83.6k
static void parser_set_op_state(sfparse_parser *sfp, uint32_t op) {
140
83.6k
  sfp->state &= ~SFPARSE_STATE_OP_MASK;
141
83.6k
  sfp->state |= op;
142
83.6k
}
143
144
3.00k
static void parser_unset_inner_list_state(sfparse_parser *sfp) {
145
3.00k
  sfp->state &= ~SFPARSE_STATE_INNER_LIST;
146
3.00k
}
147
148
#ifdef __AVX2__
149
static const uint8_t *find_char_key(const uint8_t *first, const uint8_t *last) {
150
  const __m256i us = _mm256_set1_epi8('_');
151
  const __m256i ds = _mm256_set1_epi8('-');
152
  const __m256i dot = _mm256_set1_epi8('.');
153
  const __m256i ast = _mm256_set1_epi8('*');
154
  const __m256i r0l = _mm256_set1_epi8('0' - 1);
155
  const __m256i r0r = _mm256_set1_epi8('9' + 1);
156
  const __m256i r1l = _mm256_set1_epi8('a' - 1);
157
  const __m256i r1r = _mm256_set1_epi8('z' + 1);
158
  __m256i s, x;
159
  uint32_t m;
160
161
  for (; first != last; first += 32) {
162
    s = _mm256_loadu_si256((void *)first);
163
164
    x = _mm256_cmpeq_epi8(s, us);
165
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ds), x);
166
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dot), x);
167
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ast), x);
168
    x = _mm256_or_si256(
169
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)),
170
      x);
171
    x = _mm256_or_si256(
172
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
173
      x);
174
175
    m = ~(uint32_t)_mm256_movemask_epi8(x);
176
    if (m) {
177
      return first + ctz(m);
178
    }
179
  }
180
181
  return last;
182
}
183
#endif /* __AVX2__ */
184
185
static const uint8_t key_tbl[256] = {
186
  ['*'] = 1, LCALPHAS, ['_'] = 2, ['-'] = 2, ['.'] = 2, DIGITS(2),
187
};
188
189
43.6k
static int parser_key(sfparse_parser *sfp, sfparse_vec *dest) {
190
43.6k
  const uint8_t *base;
191
#ifdef __AVX2__
192
  const uint8_t *last;
193
#endif /* __AVX2__ */
194
195
43.6k
  if (key_tbl[*sfp->pos] != 1) {
196
208
    return SFPARSE_ERR_PARSE;
197
208
  }
198
199
43.4k
  base = sfp->pos++;
200
201
#ifdef __AVX2__
202
  if (sfp->end - sfp->pos >= 32) {
203
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1FU);
204
205
    sfp->pos = find_char_key(sfp->pos, last);
206
    if (sfp->pos != last) {
207
      goto fin;
208
    }
209
  }
210
#endif /* __AVX2__ */
211
212
84.0k
  for (; !parser_eof(sfp) && key_tbl[*sfp->pos]; ++sfp->pos)
213
40.5k
    ;
214
215
#ifdef __AVX2__
216
fin:
217
#endif /* __AVX2__ */
218
43.4k
  if (dest) {
219
34.7k
    dest->base = (uint8_t *)base;
220
34.7k
    dest->len = (size_t)(sfp->pos - dest->base);
221
34.7k
  }
222
223
43.4k
  return 0;
224
43.6k
}
225
226
static const uint8_t number_tbl[256] = {
227
  DIGITS(1),
228
};
229
230
4.77k
static int parser_number(sfparse_parser *sfp, sfparse_value *dest) {
231
4.77k
  int sign = 1;
232
4.77k
  int64_t value = 0;
233
4.77k
  size_t len = 0;
234
4.77k
  size_t fpos = 0;
235
236
4.77k
  if (*sfp->pos == '-') {
237
270
    ++sfp->pos;
238
270
    if (parser_eof(sfp)) {
239
35
      return SFPARSE_ERR_PARSE;
240
35
    }
241
242
235
    sign = -1;
243
235
  }
244
245
4.77k
  assert(!parser_eof(sfp));
246
247
12.9k
  for (; !parser_eof(sfp) && number_tbl[*sfp->pos]; ++sfp->pos) {
248
8.20k
    if (++len > 15) {
249
13
      return SFPARSE_ERR_PARSE;
250
13
    }
251
252
8.19k
    value *= 10;
253
8.19k
    value += *sfp->pos - '0';
254
8.19k
  }
255
256
4.73k
  if (len == 0) {
257
51
    return SFPARSE_ERR_PARSE;
258
51
  }
259
260
4.67k
  if (parser_eof(sfp) || *sfp->pos != '.') {
261
3.93k
    if (dest) {
262
2.13k
      dest->type = SFPARSE_TYPE_INTEGER;
263
2.13k
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
264
2.13k
      dest->integer = value * sign;
265
2.13k
    }
266
267
3.93k
    return 0;
268
3.93k
  }
269
270
  /* decimal */
271
272
743
  if (len > 12) {
273
10
    return SFPARSE_ERR_PARSE;
274
10
  }
275
276
733
  fpos = len;
277
278
733
  ++sfp->pos;
279
280
1.84k
  for (; !parser_eof(sfp) && number_tbl[*sfp->pos]; ++sfp->pos) {
281
1.11k
    if (++len > 15) {
282
10
      return SFPARSE_ERR_PARSE;
283
10
    }
284
285
1.10k
    value *= 10;
286
1.10k
    value += *sfp->pos - '0';
287
1.10k
  }
288
289
723
  if (fpos == len || len - fpos > 3) {
290
74
    return SFPARSE_ERR_PARSE;
291
74
  }
292
293
649
  if (dest) {
294
430
    dest->type = SFPARSE_TYPE_DECIMAL;
295
430
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
296
430
    dest->decimal.numer = value * sign;
297
298
430
    switch (len - fpos) {
299
273
    case 1:
300
273
      dest->decimal.denom = 10;
301
302
273
      break;
303
88
    case 2:
304
88
      dest->decimal.denom = 100;
305
306
88
      break;
307
69
    case 3:
308
69
      dest->decimal.denom = 1000;
309
310
69
      break;
311
430
    }
312
430
  }
313
314
649
  return 0;
315
649
}
316
317
505
static int parser_date(sfparse_parser *sfp, sfparse_value *dest) {
318
505
  int rv;
319
505
  sfparse_value val;
320
321
  /* The first byte has already been validated by the caller. */
322
505
  assert('@' == *sfp->pos);
323
324
505
  ++sfp->pos;
325
326
505
  if (parser_eof(sfp)) {
327
33
    return SFPARSE_ERR_PARSE;
328
33
  }
329
330
472
  rv = parser_number(sfp, &val);
331
472
  if (rv != 0) {
332
41
    return rv;
333
41
  }
334
335
431
  if (val.type != SFPARSE_TYPE_INTEGER) {
336
10
    return SFPARSE_ERR_PARSE;
337
10
  }
338
339
421
  if (dest) {
340
201
    *dest = val;
341
201
    dest->type = SFPARSE_TYPE_DATE;
342
201
  }
343
344
421
  return 0;
345
431
}
346
347
#ifdef __AVX2__
348
static const uint8_t *find_char_string(const uint8_t *first,
349
                                       const uint8_t *last) {
350
  const __m256i bs = _mm256_set1_epi8('\\');
351
  const __m256i dq = _mm256_set1_epi8('"');
352
  const __m256i del = _mm256_set1_epi8(0x7F);
353
  const __m256i sp = _mm256_set1_epi8(' ');
354
  __m256i s, x;
355
  uint32_t m;
356
357
  for (; first != last; first += 32) {
358
    s = _mm256_loadu_si256((void *)first);
359
360
    x = _mm256_cmpgt_epi8(sp, s);
361
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, bs), x);
362
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dq), x);
363
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, del), x);
364
365
    m = (uint32_t)_mm256_movemask_epi8(x);
366
    if (m) {
367
      return first + ctz(m);
368
    }
369
  }
370
371
  return last;
372
}
373
#endif /* __AVX2__ */
374
375
static const uint8_t string_tbl[256] = {
376
  [' '] = 1, ['!'] = 1, ['#'] = 1, ['$'] = 1, ['%'] = 1, ['&'] = 1,  ['\''] = 1,
377
  ['('] = 1, [')'] = 1, ['*'] = 1, ['+'] = 1, [','] = 1, ['-'] = 1,  ['.'] = 1,
378
  ['/'] = 1, DIGITS(1), [':'] = 1, [';'] = 1, ['<'] = 1, ['='] = 1,  ['>'] = 1,
379
  ['?'] = 1, ['@'] = 1, ALPHAS(1), ['['] = 1, [']'] = 1, ['^'] = 1,  ['_'] = 1,
380
  ['`'] = 1, ['{'] = 1, ['|'] = 1, ['}'] = 1, ['~'] = 1, ['\\'] = 2, ['"'] = 3,
381
};
382
383
3.31k
static int parser_string(sfparse_parser *sfp, sfparse_value *dest) {
384
3.31k
  const uint8_t *base;
385
#ifdef __AVX2__
386
  const uint8_t *last;
387
#endif /* __AVX2__ */
388
3.31k
  uint32_t flags = SFPARSE_VALUE_FLAG_NONE;
389
390
  /* The first byte has already been validated by the caller. */
391
3.31k
  assert('"' == *sfp->pos);
392
393
3.31k
  base = ++sfp->pos;
394
395
#ifdef __AVX2__
396
  for (; sfp->end - sfp->pos >= 32; ++sfp->pos) {
397
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1FU);
398
399
    sfp->pos = find_char_string(sfp->pos, last);
400
    if (sfp->pos == last) {
401
      break;
402
    }
403
404
    switch (*sfp->pos) {
405
    case '\\':
406
      ++sfp->pos;
407
      if (parser_eof(sfp)) {
408
        return SFPARSE_ERR_PARSE;
409
      }
410
411
      switch (*sfp->pos) {
412
      case '"':
413
      case '\\':
414
        flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING;
415
416
        break;
417
      default:
418
        return SFPARSE_ERR_PARSE;
419
      }
420
421
      break;
422
    case '"':
423
      goto fin;
424
    default:
425
      return SFPARSE_ERR_PARSE;
426
    }
427
  }
428
#endif /* __AVX2__ */
429
430
39.1k
  for (; !parser_eof(sfp); ++sfp->pos) {
431
38.8k
    switch (string_tbl[*sfp->pos]) {
432
74
    case 0:
433
74
      return SFPARSE_ERR_PARSE;
434
35.4k
    case 1:
435
35.4k
      break;
436
564
    case 2:
437
564
      ++sfp->pos;
438
564
      if (parser_eof(sfp)) {
439
42
        return SFPARSE_ERR_PARSE;
440
42
      }
441
442
522
      switch (*sfp->pos) {
443
233
      case '"':
444
469
      case '\\':
445
469
        flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING;
446
447
469
        break;
448
53
      default:
449
53
        return SFPARSE_ERR_PARSE;
450
522
      }
451
452
469
      break;
453
2.77k
    case 3:
454
2.77k
      goto fin;
455
38.8k
    }
456
38.8k
  }
457
458
369
  return SFPARSE_ERR_PARSE;
459
460
2.77k
fin:
461
2.77k
  if (dest) {
462
2.56k
    dest->type = SFPARSE_TYPE_STRING;
463
2.56k
    dest->flags = flags;
464
2.56k
    dest->vec.len = (size_t)(sfp->pos - base);
465
2.56k
    dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
466
2.56k
  }
467
468
2.77k
  ++sfp->pos;
469
470
2.77k
  return 0;
471
3.31k
}
472
473
#ifdef __AVX2__
474
static const uint8_t *find_char_token(const uint8_t *first,
475
                                      const uint8_t *last) {
476
  /* r0: !..:, excluding "(),
477
     r1: A..Z
478
     r2: ^..~, excluding {} */
479
  const __m256i r0l = _mm256_set1_epi8('!' - 1);
480
  const __m256i r0r = _mm256_set1_epi8(':' + 1);
481
  const __m256i dq = _mm256_set1_epi8('"');
482
  const __m256i prl = _mm256_set1_epi8('(');
483
  const __m256i prr = _mm256_set1_epi8(')');
484
  const __m256i comma = _mm256_set1_epi8(',');
485
  const __m256i r1l = _mm256_set1_epi8('A' - 1);
486
  const __m256i r1r = _mm256_set1_epi8('Z' + 1);
487
  const __m256i r2l = _mm256_set1_epi8('^' - 1);
488
  const __m256i r2r = _mm256_set1_epi8('~' + 1);
489
  const __m256i cbl = _mm256_set1_epi8('{');
490
  const __m256i cbr = _mm256_set1_epi8('}');
491
  __m256i s, x;
492
  uint32_t m;
493
494
  for (; first != last; first += 32) {
495
    s = _mm256_loadu_si256((void *)first);
496
497
    x = _mm256_andnot_si256(
498
      _mm256_cmpeq_epi8(s, comma),
499
      _mm256_andnot_si256(
500
        _mm256_cmpeq_epi8(s, prr),
501
        _mm256_andnot_si256(
502
          _mm256_cmpeq_epi8(s, prl),
503
          _mm256_andnot_si256(_mm256_cmpeq_epi8(s, dq),
504
                              _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l),
505
                                               _mm256_cmpgt_epi8(r0r, s))))));
506
    x = _mm256_or_si256(
507
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
508
      x);
509
    x = _mm256_or_si256(
510
      _mm256_andnot_si256(
511
        _mm256_cmpeq_epi8(s, cbr),
512
        _mm256_andnot_si256(_mm256_cmpeq_epi8(s, cbl),
513
                            _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l),
514
                                             _mm256_cmpgt_epi8(r2r, s)))),
515
      x);
516
517
    m = ~(uint32_t)_mm256_movemask_epi8(x);
518
    if (m) {
519
      return first + ctz(m);
520
    }
521
  }
522
523
  return last;
524
}
525
#endif /* __AVX2__ */
526
527
static const uint8_t token_tbl[256] = {
528
  ['!'] = 1, ['#'] = 1, ['$'] = 1, ['%'] = 1, ['&'] = 1, ['\''] = 1, ['*'] = 1,
529
  ['+'] = 1, ['-'] = 1, ['.'] = 1, ['/'] = 1, DIGITS(1), [':'] = 1,  ALPHAS(1),
530
  ['^'] = 1, ['_'] = 1, ['`'] = 1, ['|'] = 1, ['~'] = 1,
531
};
532
533
16.6k
static int parser_token(sfparse_parser *sfp, sfparse_value *dest) {
534
16.6k
  const uint8_t *base;
535
#ifdef __AVX2__
536
  const uint8_t *last;
537
#endif /* __AVX2__ */
538
539
  /* The first byte has already been validated by the caller. */
540
16.6k
  base = sfp->pos++;
541
542
#ifdef __AVX2__
543
  if (sfp->end - sfp->pos >= 32) {
544
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1FU);
545
546
    sfp->pos = find_char_token(sfp->pos, last);
547
    if (sfp->pos != last) {
548
      goto fin;
549
    }
550
  }
551
#endif /* __AVX2__ */
552
553
69.9k
  for (; !parser_eof(sfp) && token_tbl[*sfp->pos]; ++sfp->pos)
554
53.3k
    ;
555
556
#ifdef __AVX2__
557
fin:
558
#endif /* __AVX2__ */
559
16.6k
  if (dest) {
560
7.06k
    dest->type = SFPARSE_TYPE_TOKEN;
561
7.06k
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
562
7.06k
    dest->vec.base = (uint8_t *)base;
563
7.06k
    dest->vec.len = (size_t)(sfp->pos - base);
564
7.06k
  }
565
566
16.6k
  return 0;
567
16.6k
}
568
569
#ifdef __AVX2__
570
static const uint8_t *find_char_byteseq(const uint8_t *first,
571
                                        const uint8_t *last) {
572
  const __m256i pls = _mm256_set1_epi8('+');
573
  const __m256i fs = _mm256_set1_epi8('/');
574
  const __m256i r0l = _mm256_set1_epi8('0' - 1);
575
  const __m256i r0r = _mm256_set1_epi8('9' + 1);
576
  const __m256i r1l = _mm256_set1_epi8('A' - 1);
577
  const __m256i r1r = _mm256_set1_epi8('Z' + 1);
578
  const __m256i r2l = _mm256_set1_epi8('a' - 1);
579
  const __m256i r2r = _mm256_set1_epi8('z' + 1);
580
  __m256i s, x;
581
  uint32_t m;
582
583
  for (; first != last; first += 32) {
584
    s = _mm256_loadu_si256((void *)first);
585
586
    x = _mm256_cmpeq_epi8(s, pls);
587
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, fs), x);
588
    x = _mm256_or_si256(
589
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)),
590
      x);
591
    x = _mm256_or_si256(
592
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
593
      x);
594
    x = _mm256_or_si256(
595
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), _mm256_cmpgt_epi8(r2r, s)),
596
      x);
597
598
    m = ~(uint32_t)_mm256_movemask_epi8(x);
599
    if (m) {
600
      return first + ctz(m);
601
    }
602
  }
603
604
  return last;
605
}
606
#endif /* __AVX2__ */
607
608
static const uint8_t byteseq_tbl[256] = {
609
  ['+'] = 1, ['/'] = 1, DIGITS(1), ALPHAS(1), ['='] = 2, [':'] = 3,
610
};
611
612
3.85k
static int parser_byteseq(sfparse_parser *sfp, sfparse_value *dest) {
613
3.85k
  const uint8_t *base;
614
#ifdef __AVX2__
615
  const uint8_t *last;
616
#endif /* __AVX2__ */
617
618
  /* The first byte has already been validated by the caller. */
619
3.85k
  assert(':' == *sfp->pos);
620
621
3.85k
  base = ++sfp->pos;
622
623
#ifdef __AVX2__
624
  if (sfp->end - sfp->pos >= 32) {
625
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1FU);
626
    sfp->pos = find_char_byteseq(sfp->pos, last);
627
  }
628
#endif /* __AVX2__ */
629
630
29.9k
  for (; !parser_eof(sfp); ++sfp->pos) {
631
29.6k
    switch (byteseq_tbl[*sfp->pos]) {
632
63
    case 0:
633
63
      return SFPARSE_ERR_PARSE;
634
26.0k
    case 1:
635
26.0k
      continue;
636
1.38k
    case 2:
637
1.38k
      switch ((sfp->pos - base) & 0x3) {
638
35
      case 0:
639
69
      case 1:
640
69
        return SFPARSE_ERR_PARSE;
641
609
      case 2:
642
609
        ++sfp->pos;
643
644
609
        if (parser_eof(sfp)) {
645
16
          return SFPARSE_ERR_PARSE;
646
16
        }
647
648
593
        if (*sfp->pos == '=') {
649
290
          ++sfp->pos;
650
290
        }
651
652
593
        break;
653
702
      case 3:
654
702
        ++sfp->pos;
655
656
702
        break;
657
1.38k
      }
658
659
1.29k
      if (parser_eof(sfp) || *sfp->pos != ':') {
660
91
        return SFPARSE_ERR_PARSE;
661
91
      }
662
663
1.20k
      goto fin;
664
2.16k
    case 3:
665
2.16k
      if (((sfp->pos - base) & 0x3) == 1) {
666
34
        return SFPARSE_ERR_PARSE;
667
34
      }
668
669
2.12k
      goto fin;
670
29.6k
    }
671
29.6k
  }
672
673
247
  return SFPARSE_ERR_PARSE;
674
675
3.33k
fin:
676
3.33k
  if (dest) {
677
2.89k
    dest->type = SFPARSE_TYPE_BYTESEQ;
678
2.89k
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
679
2.89k
    dest->vec.len = (size_t)(sfp->pos - base);
680
2.89k
    dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
681
2.89k
  }
682
683
3.33k
  ++sfp->pos;
684
685
3.33k
  return 0;
686
3.85k
}
687
688
668
static int parser_boolean(sfparse_parser *sfp, sfparse_value *dest) {
689
668
  int b;
690
691
  /* The first byte has already been validated by the caller. */
692
668
  assert('?' == *sfp->pos);
693
694
668
  ++sfp->pos;
695
696
668
  if (parser_eof(sfp)) {
697
49
    return SFPARSE_ERR_PARSE;
698
49
  }
699
700
619
  switch (*sfp->pos) {
701
351
  case '0':
702
351
    b = 0;
703
704
351
    break;
705
234
  case '1':
706
234
    b = 1;
707
708
234
    break;
709
34
  default:
710
34
    return SFPARSE_ERR_PARSE;
711
619
  }
712
713
585
  ++sfp->pos;
714
715
585
  if (dest) {
716
277
    dest->type = SFPARSE_TYPE_BOOLEAN;
717
277
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
718
277
    dest->boolean = b;
719
277
  }
720
721
585
  return 0;
722
619
}
723
724
static const uint8_t pct_tbl[256] = {
725
  DIGITS(1),
726
  HEXALPHAS(2),
727
};
728
729
8.06k
static int pctdecode(uint8_t *pc, const uint8_t **ppos) {
730
8.06k
  uint8_t c, b = **ppos;
731
732
8.06k
  switch (pct_tbl[b]) {
733
37
  case 0:
734
37
    return -1;
735
4.47k
  case 1:
736
4.47k
    c = (uint8_t)((b - '0') << 4);
737
738
4.47k
    break;
739
3.55k
  case 2:
740
3.55k
    c = (uint8_t)((b - 'a' + 10) << 4);
741
742
3.55k
    break;
743
0
  default:
744
0
    assert(0);
745
0
    abort();
746
8.06k
  }
747
748
8.02k
  b = *++*ppos;
749
750
8.02k
  switch (pct_tbl[b]) {
751
36
  case 0:
752
36
    return -1;
753
5.11k
  case 1:
754
5.11k
    c |= (uint8_t)(b - '0');
755
756
5.11k
    break;
757
2.87k
  case 2:
758
2.87k
    c |= (uint8_t)(b - 'a' + 10);
759
760
2.87k
    break;
761
8.02k
  }
762
763
7.98k
  *pc = c;
764
7.98k
  ++*ppos;
765
766
7.98k
  return 0;
767
8.02k
}
768
769
/* Start of utf8 dfa */
770
/* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
771
 * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
772
 *
773
 * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
774
 *
775
 * Permission is hereby granted, free of charge, to any person
776
 * obtaining a copy of this software and associated documentation
777
 * files (the "Software"), to deal in the Software without
778
 * restriction, including without limitation the rights to use, copy,
779
 * modify, merge, publish, distribute, sublicense, and/or sell copies
780
 * of the Software, and to permit persons to whom the Software is
781
 * furnished to do so, subject to the following conditions:
782
 *
783
 * The above copyright notice and this permission notice shall be
784
 * included in all copies or substantial portions of the Software.
785
 *
786
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
787
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
788
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
789
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
790
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
791
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
792
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
793
 * SOFTWARE.
794
 */
795
15.6k
#define UTF8_ACCEPT 0
796
5.41k
#define UTF8_REJECT 12
797
798
/* clang-format off */
799
static const uint8_t utf8d[] = {
800
  /*
801
   * The first part of the table maps bytes to character classes that
802
   * to reduce the size of the transition table and create bitmasks.
803
   */
804
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
805
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
806
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
807
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
808
   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
809
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
810
   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
811
  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
812
813
   /*
814
    * The second part is a transition table that maps a combination
815
    * of a state of the automaton and a character class to a state.
816
    */
817
   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
818
  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
819
  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
820
  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
821
  12,36,12,12,12,12,12,12,12,12,12,12,
822
};
823
/* clang-format on */
824
825
7.98k
static void utf8_decode(uint32_t *state, uint8_t byte) {
826
7.98k
  *state = utf8d[256 + *state + utf8d[byte]];
827
7.98k
}
828
829
/* End of utf8 dfa */
830
831
static const uint8_t dispstring_tbl[256] = {
832
  [' '] = 1, ['!'] = 1, ['#'] = 1, ['$'] = 1,  ['&'] = 1, ['\''] = 1, ['('] = 1,
833
  [')'] = 1, ['*'] = 1, ['+'] = 1, [','] = 1,  ['-'] = 1, ['.'] = 1,  ['/'] = 1,
834
  DIGITS(1), [':'] = 1, [';'] = 1, ['<'] = 1,  ['='] = 1, ['>'] = 1,  ['?'] = 1,
835
  ['@'] = 1, ALPHAS(1), ['['] = 1, ['\\'] = 1, [']'] = 1, ['^'] = 1,  ['_'] = 1,
836
  ['`'] = 1, ['{'] = 1, ['|'] = 1, ['}'] = 1,  ['~'] = 1, ['%'] = 2,  ['"'] = 3,
837
};
838
839
7.69k
static int parser_dispstring(sfparse_parser *sfp, sfparse_value *dest) {
840
7.69k
  const uint8_t *base;
841
7.69k
  uint8_t c;
842
7.69k
  uint32_t utf8state = UTF8_ACCEPT;
843
844
7.69k
  assert('%' == *sfp->pos);
845
846
7.69k
  ++sfp->pos;
847
848
7.69k
  if (parser_eof(sfp) || *sfp->pos != '"') {
849
74
    return SFPARSE_ERR_PARSE;
850
74
  }
851
852
7.62k
  base = ++sfp->pos;
853
854
19.8k
  for (; !parser_eof(sfp);) {
855
19.7k
    switch (dispstring_tbl[*sfp->pos]) {
856
4.42k
    case 0:
857
4.42k
      return SFPARSE_ERR_PARSE;
858
8.53k
    case 1:
859
8.53k
      ++sfp->pos;
860
861
8.53k
      break;
862
3.87k
    case 2:
863
8.09k
      for (;;) {
864
8.09k
        ++sfp->pos;
865
866
8.09k
        if (sfp->pos + 2 > sfp->end || pctdecode(&c, &sfp->pos) != 0) {
867
107
          return SFPARSE_ERR_PARSE;
868
107
        }
869
870
7.98k
        utf8_decode(&utf8state, c);
871
7.98k
        if (utf8state == UTF8_ACCEPT) {
872
5.28k
          if (sfp->pos != sfp->end && *sfp->pos == '%') {
873
1.58k
            continue;
874
1.58k
          }
875
876
3.69k
          break;
877
5.28k
        }
878
879
2.70k
        if (utf8state == UTF8_REJECT || sfp->pos + 1 > sfp->end ||
880
2.66k
            *sfp->pos != '%') {
881
75
          return SFPARSE_ERR_PARSE;
882
75
        }
883
2.70k
      }
884
885
3.69k
      break;
886
3.69k
    case 3:
887
2.93k
      assert(utf8state == UTF8_ACCEPT);
888
889
2.93k
      if (dest) {
890
2.15k
        dest->type = SFPARSE_TYPE_DISPSTRING;
891
2.15k
        dest->flags = SFPARSE_VALUE_FLAG_NONE;
892
2.15k
        dest->vec.len = (size_t)(sfp->pos - base);
893
2.15k
        dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
894
2.15k
      }
895
896
2.93k
      ++sfp->pos;
897
898
2.93k
      return 0;
899
19.7k
    }
900
19.7k
  }
901
902
82
  return SFPARSE_ERR_PARSE;
903
7.62k
}
904
905
static const uint8_t bare_item_tbl[256] = {
906
  ['"'] = 1, ['-'] = 2, DIGITS(2), ['@'] = 3, [':'] = 4,
907
  ['?'] = 5, ['*'] = 6, ALPHAS(6), ['%'] = 7,
908
};
909
910
36.9k
static int parser_bare_item(sfparse_parser *sfp, sfparse_value *dest) {
911
36.9k
  switch (bare_item_tbl[*sfp->pos]) {
912
43
  case 0:
913
43
    return SFPARSE_ERR_PARSE;
914
3.31k
  case 1:
915
3.31k
    return parser_string(sfp, dest);
916
4.30k
  case 2:
917
4.30k
    return parser_number(sfp, dest);
918
505
  case 3:
919
505
    return parser_date(sfp, dest);
920
3.85k
  case 4:
921
3.85k
    return parser_byteseq(sfp, dest);
922
668
  case 5:
923
668
    return parser_boolean(sfp, dest);
924
16.6k
  case 6:
925
16.6k
    return parser_token(sfp, dest);
926
7.69k
  case 7:
927
7.69k
    return parser_dispstring(sfp, dest);
928
0
  default:
929
0
    assert(0);
930
0
    abort();
931
36.9k
  }
932
36.9k
}
933
934
static int parser_skip_inner_list(sfparse_parser *sfp);
935
936
int sfparse_parser_param(sfparse_parser *sfp, sfparse_vec *dest_key,
937
44.9k
                         sfparse_value *dest_value) {
938
44.9k
  int rv;
939
940
44.9k
  switch (sfp->state & SFPARSE_STATE_OP_MASK) {
941
0
  case SFPARSE_STATE_BEFORE:
942
0
    rv = parser_skip_inner_list(sfp);
943
0
    if (rv != 0) {
944
0
      return rv;
945
0
    }
946
947
    /* fall through */
948
36.3k
  case SFPARSE_STATE_BEFORE_PARAMS:
949
36.3k
    parser_set_op_state(sfp, SFPARSE_STATE_PARAMS);
950
951
36.3k
    break;
952
8.65k
  case SFPARSE_STATE_PARAMS:
953
8.65k
    break;
954
0
  default:
955
0
    assert(0);
956
0
    abort();
957
44.9k
  }
958
959
44.9k
  if (parser_eof(sfp) || *sfp->pos != ';') {
960
36.1k
    parser_set_op_state(sfp, SFPARSE_STATE_AFTER);
961
962
36.1k
    return SFPARSE_ERR_EOF;
963
36.1k
  }
964
965
8.83k
  ++sfp->pos;
966
967
8.83k
  parser_discard_sp(sfp);
968
8.83k
  if (parser_eof(sfp)) {
969
40
    return SFPARSE_ERR_PARSE;
970
40
  }
971
972
8.79k
  rv = parser_key(sfp, dest_key);
973
8.79k
  if (rv != 0) {
974
71
    return rv;
975
71
  }
976
977
8.72k
  if (parser_eof(sfp) || *sfp->pos != '=') {
978
3.29k
    if (dest_value) {
979
0
      dest_value->type = SFPARSE_TYPE_BOOLEAN;
980
0
      dest_value->flags = SFPARSE_VALUE_FLAG_NONE;
981
0
      dest_value->boolean = 1;
982
0
    }
983
984
3.29k
    return 0;
985
3.29k
  }
986
987
5.42k
  ++sfp->pos;
988
989
5.42k
  if (parser_eof(sfp)) {
990
12
    return SFPARSE_ERR_PARSE;
991
12
  }
992
993
5.41k
  return parser_bare_item(sfp, dest_value);
994
5.42k
}
995
996
36.3k
static int parser_skip_params(sfparse_parser *sfp) {
997
36.3k
  int rv;
998
999
44.9k
  for (;;) {
1000
44.9k
    rv = sfparse_parser_param(sfp, NULL, NULL);
1001
44.9k
    switch (rv) {
1002
8.65k
    case 0:
1003
8.65k
      break;
1004
36.1k
    case SFPARSE_ERR_EOF:
1005
36.1k
      return 0;
1006
178
    case SFPARSE_ERR_PARSE:
1007
178
      return rv;
1008
0
    default:
1009
0
      assert(0);
1010
0
      abort();
1011
44.9k
    }
1012
44.9k
  }
1013
36.3k
}
1014
1015
15.9k
int sfparse_parser_inner_list(sfparse_parser *sfp, sfparse_value *dest) {
1016
15.9k
  int rv;
1017
1018
15.9k
  switch (sfp->state & SFPARSE_STATE_OP_MASK) {
1019
7.76k
  case SFPARSE_STATE_BEFORE:
1020
7.76k
    parser_discard_sp(sfp);
1021
7.76k
    if (parser_eof(sfp)) {
1022
34
      return SFPARSE_ERR_PARSE;
1023
34
    }
1024
1025
7.72k
    break;
1026
8.16k
  case SFPARSE_STATE_BEFORE_PARAMS:
1027
8.16k
    rv = parser_skip_params(sfp);
1028
8.16k
    if (rv != 0) {
1029
61
      return rv;
1030
61
    }
1031
1032
    /* Technically, we are entering SFPARSE_STATE_AFTER, but we will set
1033
       another state without reading the state. */
1034
    /* parser_set_op_state(sfp, SFPARSE_STATE_AFTER); */
1035
1036
    /* fall through */
1037
8.10k
  case SFPARSE_STATE_AFTER:
1038
8.10k
    if (parser_eof(sfp)) {
1039
48
      return SFPARSE_ERR_PARSE;
1040
48
    }
1041
1042
8.05k
    switch (*sfp->pos) {
1043
5.60k
    case ' ':
1044
5.60k
      parser_discard_sp(sfp);
1045
5.60k
      if (parser_eof(sfp)) {
1046
0
        return SFPARSE_ERR_PARSE;
1047
0
      }
1048
1049
5.60k
      break;
1050
5.60k
    case ')':
1051
2.40k
      break;
1052
44
    default:
1053
44
      return SFPARSE_ERR_PARSE;
1054
8.05k
    }
1055
1056
8.00k
    break;
1057
8.00k
  default:
1058
0
    assert(0);
1059
0
    abort();
1060
15.9k
  }
1061
1062
15.7k
  if (*sfp->pos == ')') {
1063
3.00k
    ++sfp->pos;
1064
1065
3.00k
    parser_unset_inner_list_state(sfp);
1066
3.00k
    parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS);
1067
1068
3.00k
    return SFPARSE_ERR_EOF;
1069
3.00k
  }
1070
1071
12.7k
  rv = parser_bare_item(sfp, dest);
1072
12.7k
  if (rv != 0) {
1073
4.57k
    return rv;
1074
4.57k
  }
1075
1076
8.16k
  parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS);
1077
1078
8.16k
  return 0;
1079
12.7k
}
1080
1081
7.76k
static int parser_skip_inner_list(sfparse_parser *sfp) {
1082
7.76k
  int rv;
1083
1084
15.9k
  for (;;) {
1085
15.9k
    rv = sfparse_parser_inner_list(sfp, NULL);
1086
15.9k
    switch (rv) {
1087
8.16k
    case 0:
1088
8.16k
      break;
1089
3.00k
    case SFPARSE_ERR_EOF:
1090
3.00k
      return 0;
1091
4.75k
    case SFPARSE_ERR_PARSE:
1092
4.75k
      return rv;
1093
0
    default:
1094
0
      assert(0);
1095
0
      abort();
1096
15.9k
    }
1097
15.9k
  }
1098
7.76k
}
1099
1100
28.0k
static int parser_next_key_or_item(sfparse_parser *sfp) {
1101
28.0k
  parser_discard_ows(sfp);
1102
1103
28.0k
  if (parser_eof(sfp)) {
1104
20.9k
    return SFPARSE_ERR_EOF;
1105
20.9k
  }
1106
1107
7.05k
  if (*sfp->pos != ',') {
1108
102
    return SFPARSE_ERR_PARSE;
1109
102
  }
1110
1111
6.94k
  ++sfp->pos;
1112
1113
6.94k
  parser_discard_ows(sfp);
1114
6.94k
  if (parser_eof(sfp)) {
1115
63
    return SFPARSE_ERR_PARSE;
1116
63
  }
1117
1118
6.88k
  return 0;
1119
6.94k
}
1120
1121
34.7k
static int parser_dict_value(sfparse_parser *sfp, sfparse_value *dest) {
1122
34.7k
  int rv;
1123
1124
34.7k
  if (parser_eof(sfp) || *(sfp->pos) != '=') {
1125
    /* Boolean true */
1126
8.10k
    if (dest) {
1127
8.10k
      dest->type = SFPARSE_TYPE_BOOLEAN;
1128
8.10k
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1129
8.10k
      dest->boolean = 1;
1130
8.10k
    }
1131
1132
8.10k
    sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS;
1133
1134
8.10k
    return 0;
1135
8.10k
  }
1136
1137
26.6k
  ++sfp->pos;
1138
1139
26.6k
  if (parser_eof(sfp)) {
1140
36
    return SFPARSE_ERR_PARSE;
1141
36
  }
1142
1143
26.6k
  if (*sfp->pos == '(') {
1144
7.77k
    if (dest) {
1145
7.77k
      dest->type = SFPARSE_TYPE_INNER_LIST;
1146
7.77k
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1147
7.77k
    }
1148
1149
7.77k
    ++sfp->pos;
1150
1151
7.77k
    sfp->state = SFPARSE_STATE_DICT_INNER_LIST_BEFORE;
1152
1153
7.77k
    return 0;
1154
7.77k
  }
1155
1156
18.8k
  rv = parser_bare_item(sfp, dest);
1157
18.8k
  if (rv != 0) {
1158
1.55k
    return rv;
1159
1.55k
  }
1160
1161
17.2k
  sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS;
1162
1163
17.2k
  return 0;
1164
18.8k
}
1165
1166
int sfparse_parser_dict(sfparse_parser *sfp, sfparse_vec *dest_key,
1167
61.1k
                        sfparse_value *dest_value) {
1168
61.1k
  int rv;
1169
1170
61.1k
  switch (sfp->state) {
1171
7.76k
  case SFPARSE_STATE_DICT_INNER_LIST_BEFORE:
1172
7.76k
    rv = parser_skip_inner_list(sfp);
1173
7.76k
    if (rv != 0) {
1174
4.75k
      return rv;
1175
4.75k
    }
1176
1177
    /* fall through */
1178
28.1k
  case SFPARSE_STATE_DICT_BEFORE_PARAMS:
1179
28.1k
    rv = parser_skip_params(sfp);
1180
28.1k
    if (rv != 0) {
1181
117
      return rv;
1182
117
    }
1183
1184
    /* fall through */
1185
28.0k
  case SFPARSE_STATE_DICT_AFTER:
1186
28.0k
    rv = parser_next_key_or_item(sfp);
1187
28.0k
    if (rv != 0) {
1188
21.1k
      return rv;
1189
21.1k
    }
1190
1191
6.88k
    break;
1192
28.2k
  case SFPARSE_STATE_INITIAL:
1193
28.2k
    parser_discard_sp(sfp);
1194
1195
28.2k
    if (parser_eof(sfp)) {
1196
263
      return SFPARSE_ERR_EOF;
1197
263
    }
1198
1199
28.0k
    break;
1200
28.0k
  default:
1201
0
    assert(0);
1202
0
    abort();
1203
61.1k
  }
1204
1205
34.8k
  rv = parser_key(sfp, dest_key);
1206
34.8k
  if (rv != 0) {
1207
137
    return rv;
1208
137
  }
1209
1210
34.7k
  return parser_dict_value(sfp, dest_value);
1211
34.8k
}
1212
1213
0
int sfparse_parser_list(sfparse_parser *sfp, sfparse_value *dest) {
1214
0
  int rv;
1215
1216
0
  switch (sfp->state) {
1217
0
  case SFPARSE_STATE_LIST_INNER_LIST_BEFORE:
1218
0
    rv = parser_skip_inner_list(sfp);
1219
0
    if (rv != 0) {
1220
0
      return rv;
1221
0
    }
1222
1223
    /* fall through */
1224
0
  case SFPARSE_STATE_LIST_BEFORE_PARAMS:
1225
0
    rv = parser_skip_params(sfp);
1226
0
    if (rv != 0) {
1227
0
      return rv;
1228
0
    }
1229
1230
    /* fall through */
1231
0
  case SFPARSE_STATE_LIST_AFTER:
1232
0
    rv = parser_next_key_or_item(sfp);
1233
0
    if (rv != 0) {
1234
0
      return rv;
1235
0
    }
1236
1237
0
    break;
1238
0
  case SFPARSE_STATE_INITIAL:
1239
0
    parser_discard_sp(sfp);
1240
1241
0
    if (parser_eof(sfp)) {
1242
0
      return SFPARSE_ERR_EOF;
1243
0
    }
1244
1245
0
    break;
1246
0
  default:
1247
0
    assert(0);
1248
0
    abort();
1249
0
  }
1250
1251
0
  if (*sfp->pos == '(') {
1252
0
    if (dest) {
1253
0
      dest->type = SFPARSE_TYPE_INNER_LIST;
1254
0
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1255
0
    }
1256
1257
0
    ++sfp->pos;
1258
1259
0
    sfp->state = SFPARSE_STATE_LIST_INNER_LIST_BEFORE;
1260
1261
0
    return 0;
1262
0
  }
1263
1264
0
  rv = parser_bare_item(sfp, dest);
1265
0
  if (rv != 0) {
1266
0
    return rv;
1267
0
  }
1268
1269
0
  sfp->state = SFPARSE_STATE_LIST_BEFORE_PARAMS;
1270
1271
0
  return 0;
1272
0
}
1273
1274
0
int sfparse_parser_item(sfparse_parser *sfp, sfparse_value *dest) {
1275
0
  int rv;
1276
1277
0
  switch (sfp->state) {
1278
0
  case SFPARSE_STATE_INITIAL:
1279
0
    parser_discard_sp(sfp);
1280
1281
0
    if (parser_eof(sfp)) {
1282
0
      return SFPARSE_ERR_PARSE;
1283
0
    }
1284
1285
0
    break;
1286
0
  case SFPARSE_STATE_ITEM_INNER_LIST_BEFORE:
1287
0
    rv = parser_skip_inner_list(sfp);
1288
0
    if (rv != 0) {
1289
0
      return rv;
1290
0
    }
1291
1292
    /* fall through */
1293
0
  case SFPARSE_STATE_ITEM_BEFORE_PARAMS:
1294
0
    rv = parser_skip_params(sfp);
1295
0
    if (rv != 0) {
1296
0
      return rv;
1297
0
    }
1298
1299
    /* fall through */
1300
0
  case SFPARSE_STATE_ITEM_AFTER:
1301
0
    parser_discard_sp(sfp);
1302
1303
0
    if (!parser_eof(sfp)) {
1304
0
      return SFPARSE_ERR_PARSE;
1305
0
    }
1306
1307
0
    return SFPARSE_ERR_EOF;
1308
0
  default:
1309
0
    assert(0);
1310
0
    abort();
1311
0
  }
1312
1313
0
  if (*sfp->pos == '(') {
1314
0
    if (dest) {
1315
0
      dest->type = SFPARSE_TYPE_INNER_LIST;
1316
0
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1317
0
    }
1318
1319
0
    ++sfp->pos;
1320
1321
0
    sfp->state = SFPARSE_STATE_ITEM_INNER_LIST_BEFORE;
1322
1323
0
    return 0;
1324
0
  }
1325
1326
0
  rv = parser_bare_item(sfp, dest);
1327
0
  if (rv != 0) {
1328
0
    return rv;
1329
0
  }
1330
1331
0
  sfp->state = SFPARSE_STATE_ITEM_BEFORE_PARAMS;
1332
1333
0
  return 0;
1334
0
}
1335
1336
void sfparse_parser_init(sfparse_parser *sfp, const uint8_t *data,
1337
28.2k
                         size_t datalen) {
1338
28.2k
  if (datalen == 0) {
1339
263
    sfp->pos = sfp->end = NULL;
1340
28.0k
  } else {
1341
28.0k
    sfp->pos = data;
1342
28.0k
    sfp->end = data + datalen;
1343
28.0k
  }
1344
1345
28.2k
  sfp->state = SFPARSE_STATE_INITIAL;
1346
28.2k
}
1347
1348
0
void sfparse_unescape(sfparse_vec *dest, const sfparse_vec *src) {
1349
0
  const uint8_t *p, *q;
1350
0
  uint8_t *o;
1351
0
  size_t len, slen;
1352
1353
0
  if (src->len == 0) {
1354
0
    dest->len = 0;
1355
1356
0
    return;
1357
0
  }
1358
1359
0
  o = dest->base;
1360
0
  p = src->base;
1361
0
  len = src->len;
1362
1363
0
  for (;;) {
1364
0
    q = memchr(p, '\\', len);
1365
0
    if (q == NULL) {
1366
0
      memcpy(o, p, len);
1367
0
      o += len;
1368
1369
0
      dest->len = (size_t)(o - dest->base);
1370
1371
0
      return;
1372
0
    }
1373
1374
0
    slen = (size_t)(q - p);
1375
0
    memcpy(o, p, slen);
1376
0
    o += slen;
1377
1378
0
    p = q + 1;
1379
0
    *o++ = *p++;
1380
0
    len -= slen + 2;
1381
0
  }
1382
0
}
1383
1384
0
void sfparse_base64decode(sfparse_vec *dest, const sfparse_vec *src) {
1385
0
  static const int index_tbl[] = {
1386
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1387
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1388
0
    -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
1389
0
    61, -1, -1, -1, -1, -1, -1, -1, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
1390
0
    11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1,
1391
0
    -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
1392
0
    43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1393
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1394
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1395
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1396
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1397
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1398
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1399
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1};
1400
0
  uint8_t *o;
1401
0
  const uint8_t *p, *end;
1402
0
  uint32_t n;
1403
0
  size_t i, left;
1404
0
  int idx;
1405
1406
0
  if (src->len == 0) {
1407
0
    dest->len = 0;
1408
1409
0
    return;
1410
0
  }
1411
1412
0
  o = dest->base;
1413
0
  p = src->base;
1414
0
  left = src->len & 0x3;
1415
0
  if (left == 0 && src->base[src->len - 1] == '=') {
1416
0
    left = 4;
1417
0
  }
1418
0
  end = src->base + src->len - left;
1419
1420
0
  for (; p != end;) {
1421
0
    n = 0;
1422
1423
0
    for (i = 1; i <= 4; ++i, ++p) {
1424
0
      idx = index_tbl[*p];
1425
1426
0
      assert(idx != -1);
1427
1428
0
      n += (uint32_t)(idx << (24 - i * 6));
1429
0
    }
1430
1431
0
    *o++ = (uint8_t)(n >> 16);
1432
0
    *o++ = (n >> 8) & 0xFFU;
1433
0
    *o++ = n & 0xFFU;
1434
0
  }
1435
1436
0
  switch (left) {
1437
0
  case 0:
1438
0
    goto fin;
1439
0
  case 1:
1440
0
    assert(0);
1441
0
    abort();
1442
0
  case 3:
1443
0
    if (src->base[src->len - 1] == '=') {
1444
0
      left = 2;
1445
0
    }
1446
1447
0
    break;
1448
0
  case 4:
1449
0
    assert('=' == src->base[src->len - 1]);
1450
1451
0
    if (src->base[src->len - 2] == '=') {
1452
0
      left = 2;
1453
0
    } else {
1454
0
      left = 3;
1455
0
    }
1456
1457
0
    break;
1458
0
  }
1459
1460
0
  switch (left) {
1461
0
  case 2:
1462
0
    *o = (uint8_t)(index_tbl[*p++] << 2);
1463
0
    *o++ |= (uint8_t)(index_tbl[*p++] >> 4);
1464
1465
0
    break;
1466
0
  case 3:
1467
0
    n = (uint32_t)(index_tbl[*p++] << 10);
1468
0
    n += (uint32_t)(index_tbl[*p++] << 4);
1469
0
    n += (uint32_t)(index_tbl[*p++] >> 2);
1470
0
    *o++ = (n >> 8) & 0xFFU;
1471
0
    *o++ = n & 0xFFU;
1472
1473
0
    break;
1474
0
  }
1475
1476
0
fin:
1477
0
  dest->len = (size_t)(o - dest->base);
1478
0
}
1479
1480
0
void sfparse_pctdecode(sfparse_vec *dest, const sfparse_vec *src) {
1481
0
  const uint8_t *p, *q;
1482
0
  uint8_t *o;
1483
0
  size_t len, slen;
1484
1485
0
  if (src->len == 0) {
1486
0
    dest->len = 0;
1487
1488
0
    return;
1489
0
  }
1490
1491
0
  o = dest->base;
1492
0
  p = src->base;
1493
0
  len = src->len;
1494
1495
0
  for (;;) {
1496
0
    q = memchr(p, '%', len);
1497
0
    if (q == NULL) {
1498
0
      memcpy(o, p, len);
1499
0
      o += len;
1500
1501
0
      dest->len = (size_t)(o - dest->base);
1502
1503
0
      return;
1504
0
    }
1505
1506
0
    slen = (size_t)(q - p);
1507
0
    memcpy(o, p, slen);
1508
0
    o += slen;
1509
1510
0
    p = q + 1;
1511
1512
0
    pctdecode(o++, &p);
1513
1514
0
    len -= slen + 3;
1515
0
  }
1516
0
}