Coverage Report

Created: 2025-08-09 06:11

/src/nghttp2/lib/sfparse.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * sfparse
3
 *
4
 * Copyright (c) 2023 sfparse contributors
5
 * Copyright (c) 2019 nghttp3 contributors
6
 * Copyright (c) 2015 nghttp2 contributors
7
 *
8
 * Permission is hereby granted, free of charge, to any person obtaining
9
 * a copy of this software and associated documentation files (the
10
 * "Software"), to deal in the Software without restriction, including
11
 * without limitation the rights to use, copy, modify, merge, publish,
12
 * distribute, sublicense, and/or sell copies of the Software, and to
13
 * permit persons to whom the Software is furnished to do so, subject to
14
 * the following conditions:
15
 *
16
 * The above copyright notice and this permission notice shall be
17
 * included in all copies or substantial portions of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
 */
27
#include "sfparse.h"
28
29
#include <string.h>
30
#include <assert.h>
31
#include <stdlib.h>
32
33
#ifdef __AVX2__
34
#  include <immintrin.h>
35
#endif /* __AVX2__ */
36
37
0
#define SFPARSE_STATE_DICT 0x08u
38
0
#define SFPARSE_STATE_LIST 0x10u
39
0
#define SFPARSE_STATE_ITEM 0x18u
40
41
0
#define SFPARSE_STATE_INNER_LIST 0x04u
42
43
0
#define SFPARSE_STATE_BEFORE 0x00u
44
0
#define SFPARSE_STATE_BEFORE_PARAMS 0x01u
45
0
#define SFPARSE_STATE_PARAMS 0x02u
46
0
#define SFPARSE_STATE_AFTER 0x03u
47
48
0
#define SFPARSE_STATE_OP_MASK 0x03u
49
50
#define SFPARSE_SET_STATE_AFTER(NAME)                                          \
51
0
  (SFPARSE_STATE_##NAME | SFPARSE_STATE_AFTER)
52
#define SFPARSE_SET_STATE_BEFORE_PARAMS(NAME)                                  \
53
0
  (SFPARSE_STATE_##NAME | SFPARSE_STATE_BEFORE_PARAMS)
54
#define SFPARSE_SET_STATE_INNER_LIST_BEFORE(NAME)                              \
55
0
  (SFPARSE_STATE_##NAME | SFPARSE_STATE_INNER_LIST | SFPARSE_STATE_BEFORE)
56
57
0
#define SFPARSE_STATE_DICT_AFTER SFPARSE_SET_STATE_AFTER(DICT)
58
0
#define SFPARSE_STATE_DICT_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(DICT)
59
#define SFPARSE_STATE_DICT_INNER_LIST_BEFORE                                   \
60
0
  SFPARSE_SET_STATE_INNER_LIST_BEFORE(DICT)
61
62
0
#define SFPARSE_STATE_LIST_AFTER SFPARSE_SET_STATE_AFTER(LIST)
63
0
#define SFPARSE_STATE_LIST_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(LIST)
64
#define SFPARSE_STATE_LIST_INNER_LIST_BEFORE                                   \
65
0
  SFPARSE_SET_STATE_INNER_LIST_BEFORE(LIST)
66
67
0
#define SFPARSE_STATE_ITEM_AFTER SFPARSE_SET_STATE_AFTER(ITEM)
68
0
#define SFPARSE_STATE_ITEM_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(ITEM)
69
#define SFPARSE_STATE_ITEM_INNER_LIST_BEFORE                                   \
70
0
  SFPARSE_SET_STATE_INNER_LIST_BEFORE(ITEM)
71
72
0
#define SFPARSE_STATE_INITIAL 0x00u
73
74
#define DIGIT_CASES                                                            \
75
0
  case '0':                                                                    \
76
0
  case '1':                                                                    \
77
0
  case '2':                                                                    \
78
0
  case '3':                                                                    \
79
0
  case '4':                                                                    \
80
0
  case '5':                                                                    \
81
0
  case '6':                                                                    \
82
0
  case '7':                                                                    \
83
0
  case '8':                                                                    \
84
0
  case '9'
85
86
#define LCALPHA_CASES                                                          \
87
0
  case 'a':                                                                    \
88
0
  case 'b':                                                                    \
89
0
  case 'c':                                                                    \
90
0
  case 'd':                                                                    \
91
0
  case 'e':                                                                    \
92
0
  case 'f':                                                                    \
93
0
  case 'g':                                                                    \
94
0
  case 'h':                                                                    \
95
0
  case 'i':                                                                    \
96
0
  case 'j':                                                                    \
97
0
  case 'k':                                                                    \
98
0
  case 'l':                                                                    \
99
0
  case 'm':                                                                    \
100
0
  case 'n':                                                                    \
101
0
  case 'o':                                                                    \
102
0
  case 'p':                                                                    \
103
0
  case 'q':                                                                    \
104
0
  case 'r':                                                                    \
105
0
  case 's':                                                                    \
106
0
  case 't':                                                                    \
107
0
  case 'u':                                                                    \
108
0
  case 'v':                                                                    \
109
0
  case 'w':                                                                    \
110
0
  case 'x':                                                                    \
111
0
  case 'y':                                                                    \
112
0
  case 'z'
113
114
#define UCALPHA_CASES                                                          \
115
0
  case 'A':                                                                    \
116
0
  case 'B':                                                                    \
117
0
  case 'C':                                                                    \
118
0
  case 'D':                                                                    \
119
0
  case 'E':                                                                    \
120
0
  case 'F':                                                                    \
121
0
  case 'G':                                                                    \
122
0
  case 'H':                                                                    \
123
0
  case 'I':                                                                    \
124
0
  case 'J':                                                                    \
125
0
  case 'K':                                                                    \
126
0
  case 'L':                                                                    \
127
0
  case 'M':                                                                    \
128
0
  case 'N':                                                                    \
129
0
  case 'O':                                                                    \
130
0
  case 'P':                                                                    \
131
0
  case 'Q':                                                                    \
132
0
  case 'R':                                                                    \
133
0
  case 'S':                                                                    \
134
0
  case 'T':                                                                    \
135
0
  case 'U':                                                                    \
136
0
  case 'V':                                                                    \
137
0
  case 'W':                                                                    \
138
0
  case 'X':                                                                    \
139
0
  case 'Y':                                                                    \
140
0
  case 'Z'
141
142
#define ALPHA_CASES                                                            \
143
0
  UCALPHA_CASES:                                                               \
144
0
  LCALPHA_CASES
145
146
#define TOKEN_CASES                                                            \
147
0
  case '!':                                                                    \
148
0
  case '#':                                                                    \
149
0
  case '$':                                                                    \
150
0
  case '%':                                                                    \
151
0
  case '&':                                                                    \
152
0
  case '\'':                                                                   \
153
0
  case '*':                                                                    \
154
0
  case '+':                                                                    \
155
0
  case '-':                                                                    \
156
0
  case '.':                                                                    \
157
0
  case '/':                                                                    \
158
0
  DIGIT_CASES:                                                                 \
159
0
  case ':':                                                                    \
160
0
  UCALPHA_CASES:                                                               \
161
0
  case '^':                                                                    \
162
0
  case '_':                                                                    \
163
0
  case '`':                                                                    \
164
0
  LCALPHA_CASES:                                                               \
165
0
  case '|':                                                                    \
166
0
  case '~'
167
168
#define LCHEXALPHA_CASES                                                       \
169
0
  case 'a':                                                                    \
170
0
  case 'b':                                                                    \
171
0
  case 'c':                                                                    \
172
0
  case 'd':                                                                    \
173
0
  case 'e':                                                                    \
174
0
  case 'f'
175
176
#define X00_1F_CASES                                                           \
177
0
  case 0x00:                                                                   \
178
0
  case 0x01:                                                                   \
179
0
  case 0x02:                                                                   \
180
0
  case 0x03:                                                                   \
181
0
  case 0x04:                                                                   \
182
0
  case 0x05:                                                                   \
183
0
  case 0x06:                                                                   \
184
0
  case 0x07:                                                                   \
185
0
  case 0x08:                                                                   \
186
0
  case 0x09:                                                                   \
187
0
  case 0x0a:                                                                   \
188
0
  case 0x0b:                                                                   \
189
0
  case 0x0c:                                                                   \
190
0
  case 0x0d:                                                                   \
191
0
  case 0x0e:                                                                   \
192
0
  case 0x0f:                                                                   \
193
0
  case 0x10:                                                                   \
194
0
  case 0x11:                                                                   \
195
0
  case 0x12:                                                                   \
196
0
  case 0x13:                                                                   \
197
0
  case 0x14:                                                                   \
198
0
  case 0x15:                                                                   \
199
0
  case 0x16:                                                                   \
200
0
  case 0x17:                                                                   \
201
0
  case 0x18:                                                                   \
202
0
  case 0x19:                                                                   \
203
0
  case 0x1a:                                                                   \
204
0
  case 0x1b:                                                                   \
205
0
  case 0x1c:                                                                   \
206
0
  case 0x1d:                                                                   \
207
0
  case 0x1e:                                                                   \
208
0
  case 0x1f
209
210
#define X20_21_CASES                                                           \
211
0
  case ' ':                                                                    \
212
0
  case '!'
213
214
#define X23_5B_CASES                                                           \
215
0
  case '#':                                                                    \
216
0
  case '$':                                                                    \
217
0
  case '%':                                                                    \
218
0
  case '&':                                                                    \
219
0
  case '\'':                                                                   \
220
0
  case '(':                                                                    \
221
0
  case ')':                                                                    \
222
0
  case '*':                                                                    \
223
0
  case '+':                                                                    \
224
0
  case ',':                                                                    \
225
0
  case '-':                                                                    \
226
0
  case '.':                                                                    \
227
0
  case '/':                                                                    \
228
0
  DIGIT_CASES:                                                                 \
229
0
  case ':':                                                                    \
230
0
  case ';':                                                                    \
231
0
  case '<':                                                                    \
232
0
  case '=':                                                                    \
233
0
  case '>':                                                                    \
234
0
  case '?':                                                                    \
235
0
  case '@':                                                                    \
236
0
  UCALPHA_CASES:                                                               \
237
0
  case '['
238
239
#define X5D_7E_CASES                                                           \
240
0
  case ']':                                                                    \
241
0
  case '^':                                                                    \
242
0
  case '_':                                                                    \
243
0
  case '`':                                                                    \
244
0
  LCALPHA_CASES:                                                               \
245
0
  case '{':                                                                    \
246
0
  case '|':                                                                    \
247
0
  case '}':                                                                    \
248
0
  case '~'
249
250
#define X7F_FF_CASES                                                           \
251
0
  case 0x7f:                                                                   \
252
0
  case 0x80:                                                                   \
253
0
  case 0x81:                                                                   \
254
0
  case 0x82:                                                                   \
255
0
  case 0x83:                                                                   \
256
0
  case 0x84:                                                                   \
257
0
  case 0x85:                                                                   \
258
0
  case 0x86:                                                                   \
259
0
  case 0x87:                                                                   \
260
0
  case 0x88:                                                                   \
261
0
  case 0x89:                                                                   \
262
0
  case 0x8a:                                                                   \
263
0
  case 0x8b:                                                                   \
264
0
  case 0x8c:                                                                   \
265
0
  case 0x8d:                                                                   \
266
0
  case 0x8e:                                                                   \
267
0
  case 0x8f:                                                                   \
268
0
  case 0x90:                                                                   \
269
0
  case 0x91:                                                                   \
270
0
  case 0x92:                                                                   \
271
0
  case 0x93:                                                                   \
272
0
  case 0x94:                                                                   \
273
0
  case 0x95:                                                                   \
274
0
  case 0x96:                                                                   \
275
0
  case 0x97:                                                                   \
276
0
  case 0x98:                                                                   \
277
0
  case 0x99:                                                                   \
278
0
  case 0x9a:                                                                   \
279
0
  case 0x9b:                                                                   \
280
0
  case 0x9c:                                                                   \
281
0
  case 0x9d:                                                                   \
282
0
  case 0x9e:                                                                   \
283
0
  case 0x9f:                                                                   \
284
0
  case 0xa0:                                                                   \
285
0
  case 0xa1:                                                                   \
286
0
  case 0xa2:                                                                   \
287
0
  case 0xa3:                                                                   \
288
0
  case 0xa4:                                                                   \
289
0
  case 0xa5:                                                                   \
290
0
  case 0xa6:                                                                   \
291
0
  case 0xa7:                                                                   \
292
0
  case 0xa8:                                                                   \
293
0
  case 0xa9:                                                                   \
294
0
  case 0xaa:                                                                   \
295
0
  case 0xab:                                                                   \
296
0
  case 0xac:                                                                   \
297
0
  case 0xad:                                                                   \
298
0
  case 0xae:                                                                   \
299
0
  case 0xaf:                                                                   \
300
0
  case 0xb0:                                                                   \
301
0
  case 0xb1:                                                                   \
302
0
  case 0xb2:                                                                   \
303
0
  case 0xb3:                                                                   \
304
0
  case 0xb4:                                                                   \
305
0
  case 0xb5:                                                                   \
306
0
  case 0xb6:                                                                   \
307
0
  case 0xb7:                                                                   \
308
0
  case 0xb8:                                                                   \
309
0
  case 0xb9:                                                                   \
310
0
  case 0xba:                                                                   \
311
0
  case 0xbb:                                                                   \
312
0
  case 0xbc:                                                                   \
313
0
  case 0xbd:                                                                   \
314
0
  case 0xbe:                                                                   \
315
0
  case 0xbf:                                                                   \
316
0
  case 0xc0:                                                                   \
317
0
  case 0xc1:                                                                   \
318
0
  case 0xc2:                                                                   \
319
0
  case 0xc3:                                                                   \
320
0
  case 0xc4:                                                                   \
321
0
  case 0xc5:                                                                   \
322
0
  case 0xc6:                                                                   \
323
0
  case 0xc7:                                                                   \
324
0
  case 0xc8:                                                                   \
325
0
  case 0xc9:                                                                   \
326
0
  case 0xca:                                                                   \
327
0
  case 0xcb:                                                                   \
328
0
  case 0xcc:                                                                   \
329
0
  case 0xcd:                                                                   \
330
0
  case 0xce:                                                                   \
331
0
  case 0xcf:                                                                   \
332
0
  case 0xd0:                                                                   \
333
0
  case 0xd1:                                                                   \
334
0
  case 0xd2:                                                                   \
335
0
  case 0xd3:                                                                   \
336
0
  case 0xd4:                                                                   \
337
0
  case 0xd5:                                                                   \
338
0
  case 0xd6:                                                                   \
339
0
  case 0xd7:                                                                   \
340
0
  case 0xd8:                                                                   \
341
0
  case 0xd9:                                                                   \
342
0
  case 0xda:                                                                   \
343
0
  case 0xdb:                                                                   \
344
0
  case 0xdc:                                                                   \
345
0
  case 0xdd:                                                                   \
346
0
  case 0xde:                                                                   \
347
0
  case 0xdf:                                                                   \
348
0
  case 0xe0:                                                                   \
349
0
  case 0xe1:                                                                   \
350
0
  case 0xe2:                                                                   \
351
0
  case 0xe3:                                                                   \
352
0
  case 0xe4:                                                                   \
353
0
  case 0xe5:                                                                   \
354
0
  case 0xe6:                                                                   \
355
0
  case 0xe7:                                                                   \
356
0
  case 0xe8:                                                                   \
357
0
  case 0xe9:                                                                   \
358
0
  case 0xea:                                                                   \
359
0
  case 0xeb:                                                                   \
360
0
  case 0xec:                                                                   \
361
0
  case 0xed:                                                                   \
362
0
  case 0xee:                                                                   \
363
0
  case 0xef:                                                                   \
364
0
  case 0xf0:                                                                   \
365
0
  case 0xf1:                                                                   \
366
0
  case 0xf2:                                                                   \
367
0
  case 0xf3:                                                                   \
368
0
  case 0xf4:                                                                   \
369
0
  case 0xf5:                                                                   \
370
0
  case 0xf6:                                                                   \
371
0
  case 0xf7:                                                                   \
372
0
  case 0xf8:                                                                   \
373
0
  case 0xf9:                                                                   \
374
0
  case 0xfa:                                                                   \
375
0
  case 0xfb:                                                                   \
376
0
  case 0xfc:                                                                   \
377
0
  case 0xfd:                                                                   \
378
0
  case 0xfe:                                                                   \
379
0
  case 0xff
380
381
0
static int is_ws(uint8_t c) {
382
0
  switch (c) {
383
0
  case ' ':
384
0
  case '\t':
385
0
    return 1;
386
0
  default:
387
0
    return 0;
388
0
  }
389
0
}
390
391
#ifdef __AVX2__
392
#  ifdef _MSC_VER
393
#    include <intrin.h>
394
395
static int ctz(unsigned int v) {
396
  unsigned long n;
397
398
  /* Assume that v is not 0. */
399
  _BitScanForward(&n, v);
400
401
  return (int)n;
402
}
403
#  else /* !_MSC_VER */
404
#    define ctz __builtin_ctz
405
#  endif /* !_MSC_VER */
406
#endif   /* __AVX2__ */
407
408
0
static int parser_eof(sfparse_parser *sfp) { return sfp->pos == sfp->end; }
409
410
0
static void parser_discard_ows(sfparse_parser *sfp) {
411
0
  for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos)
412
0
    ;
413
0
}
414
415
0
static void parser_discard_sp(sfparse_parser *sfp) {
416
0
  for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos)
417
0
    ;
418
0
}
419
420
0
static void parser_set_op_state(sfparse_parser *sfp, uint32_t op) {
421
0
  sfp->state &= ~SFPARSE_STATE_OP_MASK;
422
0
  sfp->state |= op;
423
0
}
424
425
0
static void parser_unset_inner_list_state(sfparse_parser *sfp) {
426
0
  sfp->state &= ~SFPARSE_STATE_INNER_LIST;
427
0
}
428
429
#ifdef __AVX2__
430
static const uint8_t *find_char_key(const uint8_t *first, const uint8_t *last) {
431
  const __m256i us = _mm256_set1_epi8('_');
432
  const __m256i ds = _mm256_set1_epi8('-');
433
  const __m256i dot = _mm256_set1_epi8('.');
434
  const __m256i ast = _mm256_set1_epi8('*');
435
  const __m256i r0l = _mm256_set1_epi8('0' - 1);
436
  const __m256i r0r = _mm256_set1_epi8('9' + 1);
437
  const __m256i r1l = _mm256_set1_epi8('a' - 1);
438
  const __m256i r1r = _mm256_set1_epi8('z' + 1);
439
  __m256i s, x;
440
  uint32_t m;
441
442
  for (; first != last; first += 32) {
443
    s = _mm256_loadu_si256((void *)first);
444
445
    x = _mm256_cmpeq_epi8(s, us);
446
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ds), x);
447
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dot), x);
448
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ast), x);
449
    x = _mm256_or_si256(
450
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)),
451
      x);
452
    x = _mm256_or_si256(
453
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
454
      x);
455
456
    m = ~(uint32_t)_mm256_movemask_epi8(x);
457
    if (m) {
458
      return first + ctz(m);
459
    }
460
  }
461
462
  return last;
463
}
464
#endif /* __AVX2__ */
465
466
0
static int parser_key(sfparse_parser *sfp, sfparse_vec *dest) {
467
0
  const uint8_t *base;
468
#ifdef __AVX2__
469
  const uint8_t *last;
470
#endif /* __AVX2__ */
471
472
0
  switch (*sfp->pos) {
473
0
  case '*':
474
0
  LCALPHA_CASES:
475
0
    break;
476
0
  default:
477
0
    return SFPARSE_ERR_PARSE;
478
0
  }
479
480
0
  base = sfp->pos++;
481
482
#ifdef __AVX2__
483
  if (sfp->end - sfp->pos >= 32) {
484
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
485
486
    sfp->pos = find_char_key(sfp->pos, last);
487
    if (sfp->pos != last) {
488
      goto fin;
489
    }
490
  }
491
#endif /* __AVX2__ */
492
493
0
  for (; !parser_eof(sfp); ++sfp->pos) {
494
0
    switch (*sfp->pos) {
495
0
    case '_':
496
0
    case '-':
497
0
    case '.':
498
0
    case '*':
499
0
    DIGIT_CASES:
500
0
    LCALPHA_CASES:
501
0
      continue;
502
0
    }
503
504
0
    break;
505
0
  }
506
507
#ifdef __AVX2__
508
fin:
509
#endif /* __AVX2__ */
510
0
  if (dest) {
511
0
    dest->base = (uint8_t *)base;
512
0
    dest->len = (size_t)(sfp->pos - dest->base);
513
0
  }
514
515
0
  return 0;
516
0
}
517
518
0
static int parser_number(sfparse_parser *sfp, sfparse_value *dest) {
519
0
  int sign = 1;
520
0
  int64_t value = 0;
521
0
  size_t len = 0;
522
0
  size_t fpos = 0;
523
524
0
  if (*sfp->pos == '-') {
525
0
    ++sfp->pos;
526
0
    if (parser_eof(sfp)) {
527
0
      return SFPARSE_ERR_PARSE;
528
0
    }
529
530
0
    sign = -1;
531
0
  }
532
533
0
  assert(!parser_eof(sfp));
534
535
0
  for (; !parser_eof(sfp); ++sfp->pos) {
536
0
    switch (*sfp->pos) {
537
0
    DIGIT_CASES:
538
0
      if (++len > 15) {
539
0
        return SFPARSE_ERR_PARSE;
540
0
      }
541
542
0
      value *= 10;
543
0
      value += *sfp->pos - '0';
544
545
0
      continue;
546
0
    }
547
548
0
    break;
549
0
  }
550
551
0
  if (len == 0) {
552
0
    return SFPARSE_ERR_PARSE;
553
0
  }
554
555
0
  if (parser_eof(sfp) || *sfp->pos != '.') {
556
0
    if (dest) {
557
0
      dest->type = SFPARSE_TYPE_INTEGER;
558
0
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
559
0
      dest->integer = value * sign;
560
0
    }
561
562
0
    return 0;
563
0
  }
564
565
  /* decimal */
566
567
0
  if (len > 12) {
568
0
    return SFPARSE_ERR_PARSE;
569
0
  }
570
571
0
  fpos = len;
572
573
0
  ++sfp->pos;
574
575
0
  for (; !parser_eof(sfp); ++sfp->pos) {
576
0
    switch (*sfp->pos) {
577
0
    DIGIT_CASES:
578
0
      if (++len > 15) {
579
0
        return SFPARSE_ERR_PARSE;
580
0
      }
581
582
0
      value *= 10;
583
0
      value += *sfp->pos - '0';
584
585
0
      continue;
586
0
    }
587
588
0
    break;
589
0
  }
590
591
0
  if (fpos == len || len - fpos > 3) {
592
0
    return SFPARSE_ERR_PARSE;
593
0
  }
594
595
0
  if (dest) {
596
0
    dest->type = SFPARSE_TYPE_DECIMAL;
597
0
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
598
0
    dest->decimal.numer = value * sign;
599
600
0
    switch (len - fpos) {
601
0
    case 1:
602
0
      dest->decimal.denom = 10;
603
604
0
      break;
605
0
    case 2:
606
0
      dest->decimal.denom = 100;
607
608
0
      break;
609
0
    case 3:
610
0
      dest->decimal.denom = 1000;
611
612
0
      break;
613
0
    }
614
0
  }
615
616
0
  return 0;
617
0
}
618
619
0
static int parser_date(sfparse_parser *sfp, sfparse_value *dest) {
620
0
  int rv;
621
0
  sfparse_value val;
622
623
  /* The first byte has already been validated by the caller. */
624
0
  assert('@' == *sfp->pos);
625
626
0
  ++sfp->pos;
627
628
0
  if (parser_eof(sfp)) {
629
0
    return SFPARSE_ERR_PARSE;
630
0
  }
631
632
0
  rv = parser_number(sfp, &val);
633
0
  if (rv != 0) {
634
0
    return rv;
635
0
  }
636
637
0
  if (val.type != SFPARSE_TYPE_INTEGER) {
638
0
    return SFPARSE_ERR_PARSE;
639
0
  }
640
641
0
  if (dest) {
642
0
    *dest = val;
643
0
    dest->type = SFPARSE_TYPE_DATE;
644
0
  }
645
646
0
  return 0;
647
0
}
648
649
#ifdef __AVX2__
650
static const uint8_t *find_char_string(const uint8_t *first,
651
                                       const uint8_t *last) {
652
  const __m256i bs = _mm256_set1_epi8('\\');
653
  const __m256i dq = _mm256_set1_epi8('"');
654
  const __m256i del = _mm256_set1_epi8(0x7f);
655
  const __m256i sp = _mm256_set1_epi8(' ');
656
  __m256i s, x;
657
  uint32_t m;
658
659
  for (; first != last; first += 32) {
660
    s = _mm256_loadu_si256((void *)first);
661
662
    x = _mm256_cmpgt_epi8(sp, s);
663
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, bs), x);
664
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dq), x);
665
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, del), x);
666
667
    m = (uint32_t)_mm256_movemask_epi8(x);
668
    if (m) {
669
      return first + ctz(m);
670
    }
671
  }
672
673
  return last;
674
}
675
#endif /* __AVX2__ */
676
677
0
static int parser_string(sfparse_parser *sfp, sfparse_value *dest) {
678
0
  const uint8_t *base;
679
#ifdef __AVX2__
680
  const uint8_t *last;
681
#endif /* __AVX2__ */
682
0
  uint32_t flags = SFPARSE_VALUE_FLAG_NONE;
683
684
  /* The first byte has already been validated by the caller. */
685
0
  assert('"' == *sfp->pos);
686
687
0
  base = ++sfp->pos;
688
689
#ifdef __AVX2__
690
  for (; sfp->end - sfp->pos >= 32; ++sfp->pos) {
691
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
692
693
    sfp->pos = find_char_string(sfp->pos, last);
694
    if (sfp->pos == last) {
695
      break;
696
    }
697
698
    switch (*sfp->pos) {
699
    case '\\':
700
      ++sfp->pos;
701
      if (parser_eof(sfp)) {
702
        return SFPARSE_ERR_PARSE;
703
      }
704
705
      switch (*sfp->pos) {
706
      case '"':
707
      case '\\':
708
        flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING;
709
710
        break;
711
      default:
712
        return SFPARSE_ERR_PARSE;
713
      }
714
715
      break;
716
    case '"':
717
      goto fin;
718
    default:
719
      return SFPARSE_ERR_PARSE;
720
    }
721
  }
722
#endif /* __AVX2__ */
723
724
0
  for (; !parser_eof(sfp); ++sfp->pos) {
725
0
    switch (*sfp->pos) {
726
0
    X20_21_CASES:
727
0
    X23_5B_CASES:
728
0
    X5D_7E_CASES:
729
0
      break;
730
0
    case '\\':
731
0
      ++sfp->pos;
732
0
      if (parser_eof(sfp)) {
733
0
        return SFPARSE_ERR_PARSE;
734
0
      }
735
736
0
      switch (*sfp->pos) {
737
0
      case '"':
738
0
      case '\\':
739
0
        flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING;
740
741
0
        break;
742
0
      default:
743
0
        return SFPARSE_ERR_PARSE;
744
0
      }
745
746
0
      break;
747
0
    case '"':
748
0
      goto fin;
749
0
    default:
750
0
      return SFPARSE_ERR_PARSE;
751
0
    }
752
0
  }
753
754
0
  return SFPARSE_ERR_PARSE;
755
756
0
fin:
757
0
  if (dest) {
758
0
    dest->type = SFPARSE_TYPE_STRING;
759
0
    dest->flags = flags;
760
0
    dest->vec.len = (size_t)(sfp->pos - base);
761
0
    dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
762
0
  }
763
764
0
  ++sfp->pos;
765
766
0
  return 0;
767
0
}
768
769
#ifdef __AVX2__
770
static const uint8_t *find_char_token(const uint8_t *first,
771
                                      const uint8_t *last) {
772
  /* r0: !..:, excluding "(),
773
     r1: A..Z
774
     r2: ^..~, excluding {} */
775
  const __m256i r0l = _mm256_set1_epi8('!' - 1);
776
  const __m256i r0r = _mm256_set1_epi8(':' + 1);
777
  const __m256i dq = _mm256_set1_epi8('"');
778
  const __m256i prl = _mm256_set1_epi8('(');
779
  const __m256i prr = _mm256_set1_epi8(')');
780
  const __m256i comma = _mm256_set1_epi8(',');
781
  const __m256i r1l = _mm256_set1_epi8('A' - 1);
782
  const __m256i r1r = _mm256_set1_epi8('Z' + 1);
783
  const __m256i r2l = _mm256_set1_epi8('^' - 1);
784
  const __m256i r2r = _mm256_set1_epi8('~' + 1);
785
  const __m256i cbl = _mm256_set1_epi8('{');
786
  const __m256i cbr = _mm256_set1_epi8('}');
787
  __m256i s, x;
788
  uint32_t m;
789
790
  for (; first != last; first += 32) {
791
    s = _mm256_loadu_si256((void *)first);
792
793
    x = _mm256_andnot_si256(
794
      _mm256_cmpeq_epi8(s, comma),
795
      _mm256_andnot_si256(
796
        _mm256_cmpeq_epi8(s, prr),
797
        _mm256_andnot_si256(
798
          _mm256_cmpeq_epi8(s, prl),
799
          _mm256_andnot_si256(_mm256_cmpeq_epi8(s, dq),
800
                              _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l),
801
                                               _mm256_cmpgt_epi8(r0r, s))))));
802
    x = _mm256_or_si256(
803
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
804
      x);
805
    x = _mm256_or_si256(
806
      _mm256_andnot_si256(
807
        _mm256_cmpeq_epi8(s, cbr),
808
        _mm256_andnot_si256(_mm256_cmpeq_epi8(s, cbl),
809
                            _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l),
810
                                             _mm256_cmpgt_epi8(r2r, s)))),
811
      x);
812
813
    m = ~(uint32_t)_mm256_movemask_epi8(x);
814
    if (m) {
815
      return first + ctz(m);
816
    }
817
  }
818
819
  return last;
820
}
821
#endif /* __AVX2__ */
822
823
0
static int parser_token(sfparse_parser *sfp, sfparse_value *dest) {
824
0
  const uint8_t *base;
825
#ifdef __AVX2__
826
  const uint8_t *last;
827
#endif /* __AVX2__ */
828
829
  /* The first byte has already been validated by the caller. */
830
0
  base = sfp->pos++;
831
832
#ifdef __AVX2__
833
  if (sfp->end - sfp->pos >= 32) {
834
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
835
836
    sfp->pos = find_char_token(sfp->pos, last);
837
    if (sfp->pos != last) {
838
      goto fin;
839
    }
840
  }
841
#endif /* __AVX2__ */
842
843
0
  for (; !parser_eof(sfp); ++sfp->pos) {
844
0
    switch (*sfp->pos) {
845
0
    TOKEN_CASES:
846
0
      continue;
847
0
    }
848
849
0
    break;
850
0
  }
851
852
#ifdef __AVX2__
853
fin:
854
#endif /* __AVX2__ */
855
0
  if (dest) {
856
0
    dest->type = SFPARSE_TYPE_TOKEN;
857
0
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
858
0
    dest->vec.base = (uint8_t *)base;
859
0
    dest->vec.len = (size_t)(sfp->pos - base);
860
0
  }
861
862
0
  return 0;
863
0
}
864
865
#ifdef __AVX2__
866
static const uint8_t *find_char_byteseq(const uint8_t *first,
867
                                        const uint8_t *last) {
868
  const __m256i pls = _mm256_set1_epi8('+');
869
  const __m256i fs = _mm256_set1_epi8('/');
870
  const __m256i r0l = _mm256_set1_epi8('0' - 1);
871
  const __m256i r0r = _mm256_set1_epi8('9' + 1);
872
  const __m256i r1l = _mm256_set1_epi8('A' - 1);
873
  const __m256i r1r = _mm256_set1_epi8('Z' + 1);
874
  const __m256i r2l = _mm256_set1_epi8('a' - 1);
875
  const __m256i r2r = _mm256_set1_epi8('z' + 1);
876
  __m256i s, x;
877
  uint32_t m;
878
879
  for (; first != last; first += 32) {
880
    s = _mm256_loadu_si256((void *)first);
881
882
    x = _mm256_cmpeq_epi8(s, pls);
883
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, fs), x);
884
    x = _mm256_or_si256(
885
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)),
886
      x);
887
    x = _mm256_or_si256(
888
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
889
      x);
890
    x = _mm256_or_si256(
891
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), _mm256_cmpgt_epi8(r2r, s)),
892
      x);
893
894
    m = ~(uint32_t)_mm256_movemask_epi8(x);
895
    if (m) {
896
      return first + ctz(m);
897
    }
898
  }
899
900
  return last;
901
}
902
#endif /* __AVX2__ */
903
904
0
static int parser_byteseq(sfparse_parser *sfp, sfparse_value *dest) {
905
0
  const uint8_t *base;
906
#ifdef __AVX2__
907
  const uint8_t *last;
908
#endif /* __AVX2__ */
909
910
  /* The first byte has already been validated by the caller. */
911
0
  assert(':' == *sfp->pos);
912
913
0
  base = ++sfp->pos;
914
915
#ifdef __AVX2__
916
  if (sfp->end - sfp->pos >= 32) {
917
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
918
    sfp->pos = find_char_byteseq(sfp->pos, last);
919
  }
920
#endif /* __AVX2__ */
921
922
0
  for (; !parser_eof(sfp); ++sfp->pos) {
923
0
    switch (*sfp->pos) {
924
0
    case '+':
925
0
    case '/':
926
0
    DIGIT_CASES:
927
0
    ALPHA_CASES:
928
0
      continue;
929
0
    case '=':
930
0
      switch ((sfp->pos - base) & 0x3) {
931
0
      case 0:
932
0
      case 1:
933
0
        return SFPARSE_ERR_PARSE;
934
0
      case 2:
935
0
        ++sfp->pos;
936
937
0
        if (parser_eof(sfp)) {
938
0
          return SFPARSE_ERR_PARSE;
939
0
        }
940
941
0
        if (*sfp->pos == '=') {
942
0
          ++sfp->pos;
943
0
        }
944
945
0
        break;
946
0
      case 3:
947
0
        ++sfp->pos;
948
949
0
        break;
950
0
      }
951
952
0
      if (parser_eof(sfp) || *sfp->pos != ':') {
953
0
        return SFPARSE_ERR_PARSE;
954
0
      }
955
956
0
      goto fin;
957
0
    case ':':
958
0
      if (((sfp->pos - base) & 0x3) == 1) {
959
0
        return SFPARSE_ERR_PARSE;
960
0
      }
961
962
0
      goto fin;
963
0
    default:
964
0
      return SFPARSE_ERR_PARSE;
965
0
    }
966
0
  }
967
968
0
  return SFPARSE_ERR_PARSE;
969
970
0
fin:
971
0
  if (dest) {
972
0
    dest->type = SFPARSE_TYPE_BYTESEQ;
973
0
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
974
0
    dest->vec.len = (size_t)(sfp->pos - base);
975
0
    dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
976
0
  }
977
978
0
  ++sfp->pos;
979
980
0
  return 0;
981
0
}
982
983
0
static int parser_boolean(sfparse_parser *sfp, sfparse_value *dest) {
984
0
  int b;
985
986
  /* The first byte has already been validated by the caller. */
987
0
  assert('?' == *sfp->pos);
988
989
0
  ++sfp->pos;
990
991
0
  if (parser_eof(sfp)) {
992
0
    return SFPARSE_ERR_PARSE;
993
0
  }
994
995
0
  switch (*sfp->pos) {
996
0
  case '0':
997
0
    b = 0;
998
999
0
    break;
1000
0
  case '1':
1001
0
    b = 1;
1002
1003
0
    break;
1004
0
  default:
1005
0
    return SFPARSE_ERR_PARSE;
1006
0
  }
1007
1008
0
  ++sfp->pos;
1009
1010
0
  if (dest) {
1011
0
    dest->type = SFPARSE_TYPE_BOOLEAN;
1012
0
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
1013
0
    dest->boolean = b;
1014
0
  }
1015
1016
0
  return 0;
1017
0
}
1018
1019
0
static int pctdecode(uint8_t *pc, const uint8_t **ppos) {
1020
0
  uint8_t c, b = **ppos;
1021
1022
0
  switch (b) {
1023
0
  DIGIT_CASES:
1024
0
    c = (uint8_t)((b - '0') << 4);
1025
1026
0
    break;
1027
0
  LCHEXALPHA_CASES:
1028
0
    c = (uint8_t)((b - 'a' + 10) << 4);
1029
1030
0
    break;
1031
0
  default:
1032
0
    return -1;
1033
0
  }
1034
1035
0
  b = *++*ppos;
1036
1037
0
  switch (b) {
1038
0
  DIGIT_CASES:
1039
0
    c |= (uint8_t)(b - '0');
1040
1041
0
    break;
1042
0
  LCHEXALPHA_CASES:
1043
0
    c |= (uint8_t)(b - 'a' + 10);
1044
1045
0
    break;
1046
0
  default:
1047
0
    return -1;
1048
0
  }
1049
1050
0
  *pc = c;
1051
0
  ++*ppos;
1052
1053
0
  return 0;
1054
0
}
1055
1056
/* Start of utf8 dfa */
1057
/* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
1058
 * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
1059
 *
1060
 * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
1061
 *
1062
 * Permission is hereby granted, free of charge, to any person
1063
 * obtaining a copy of this software and associated documentation
1064
 * files (the "Software"), to deal in the Software without
1065
 * restriction, including without limitation the rights to use, copy,
1066
 * modify, merge, publish, distribute, sublicense, and/or sell copies
1067
 * of the Software, and to permit persons to whom the Software is
1068
 * furnished to do so, subject to the following conditions:
1069
 *
1070
 * The above copyright notice and this permission notice shall be
1071
 * included in all copies or substantial portions of the Software.
1072
 *
1073
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1074
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1075
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1076
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
1077
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
1078
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
1079
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1080
 * SOFTWARE.
1081
 */
1082
0
#define UTF8_ACCEPT 0
1083
0
#define UTF8_REJECT 12
1084
1085
/* clang-format off */
1086
static const uint8_t utf8d[] = {
1087
  /*
1088
   * The first part of the table maps bytes to character classes that
1089
   * to reduce the size of the transition table and create bitmasks.
1090
   */
1091
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1092
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1093
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1094
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1095
   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
1096
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
1097
   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1098
  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
1099
1100
   /*
1101
    * The second part is a transition table that maps a combination
1102
    * of a state of the automaton and a character class to a state.
1103
    */
1104
   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
1105
  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
1106
  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
1107
  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
1108
  12,36,12,12,12,12,12,12,12,12,12,12,
1109
};
1110
/* clang-format on */
1111
1112
0
static void utf8_decode(uint32_t *state, uint8_t byte) {
1113
0
  *state = utf8d[256 + *state + utf8d[byte]];
1114
0
}
1115
1116
/* End of utf8 dfa */
1117
1118
0
static int parser_dispstring(sfparse_parser *sfp, sfparse_value *dest) {
1119
0
  const uint8_t *base;
1120
0
  uint8_t c;
1121
0
  uint32_t utf8state = UTF8_ACCEPT;
1122
1123
0
  assert('%' == *sfp->pos);
1124
1125
0
  ++sfp->pos;
1126
1127
0
  if (parser_eof(sfp) || *sfp->pos != '"') {
1128
0
    return SFPARSE_ERR_PARSE;
1129
0
  }
1130
1131
0
  base = ++sfp->pos;
1132
1133
0
  for (; !parser_eof(sfp);) {
1134
0
    switch (*sfp->pos) {
1135
0
    X00_1F_CASES:
1136
0
    X7F_FF_CASES:
1137
0
      return SFPARSE_ERR_PARSE;
1138
0
    case '%':
1139
0
      ++sfp->pos;
1140
1141
0
      if (sfp->pos + 2 > sfp->end) {
1142
0
        return SFPARSE_ERR_PARSE;
1143
0
      }
1144
1145
0
      if (pctdecode(&c, &sfp->pos) != 0) {
1146
0
        return SFPARSE_ERR_PARSE;
1147
0
      }
1148
1149
0
      utf8_decode(&utf8state, c);
1150
0
      if (utf8state == UTF8_REJECT) {
1151
0
        return SFPARSE_ERR_PARSE;
1152
0
      }
1153
1154
0
      break;
1155
0
    case '"':
1156
0
      if (utf8state != UTF8_ACCEPT) {
1157
0
        return SFPARSE_ERR_PARSE;
1158
0
      }
1159
1160
0
      if (dest) {
1161
0
        dest->type = SFPARSE_TYPE_DISPSTRING;
1162
0
        dest->flags = SFPARSE_VALUE_FLAG_NONE;
1163
0
        dest->vec.len = (size_t)(sfp->pos - base);
1164
0
        dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
1165
0
      }
1166
1167
0
      ++sfp->pos;
1168
1169
0
      return 0;
1170
0
    default:
1171
0
      if (utf8state != UTF8_ACCEPT) {
1172
0
        return SFPARSE_ERR_PARSE;
1173
0
      }
1174
1175
0
      ++sfp->pos;
1176
0
    }
1177
0
  }
1178
1179
0
  return SFPARSE_ERR_PARSE;
1180
0
}
1181
1182
0
static int parser_bare_item(sfparse_parser *sfp, sfparse_value *dest) {
1183
0
  switch (*sfp->pos) {
1184
0
  case '"':
1185
0
    return parser_string(sfp, dest);
1186
0
  case '-':
1187
0
  DIGIT_CASES:
1188
0
    return parser_number(sfp, dest);
1189
0
  case '@':
1190
0
    return parser_date(sfp, dest);
1191
0
  case ':':
1192
0
    return parser_byteseq(sfp, dest);
1193
0
  case '?':
1194
0
    return parser_boolean(sfp, dest);
1195
0
  case '*':
1196
0
  ALPHA_CASES:
1197
0
    return parser_token(sfp, dest);
1198
0
  case '%':
1199
0
    return parser_dispstring(sfp, dest);
1200
0
  default:
1201
0
    return SFPARSE_ERR_PARSE;
1202
0
  }
1203
0
}
1204
1205
static int parser_skip_inner_list(sfparse_parser *sfp);
1206
1207
int sfparse_parser_param(sfparse_parser *sfp, sfparse_vec *dest_key,
1208
0
                         sfparse_value *dest_value) {
1209
0
  int rv;
1210
1211
0
  switch (sfp->state & SFPARSE_STATE_OP_MASK) {
1212
0
  case SFPARSE_STATE_BEFORE:
1213
0
    rv = parser_skip_inner_list(sfp);
1214
0
    if (rv != 0) {
1215
0
      return rv;
1216
0
    }
1217
1218
    /* fall through */
1219
0
  case SFPARSE_STATE_BEFORE_PARAMS:
1220
0
    parser_set_op_state(sfp, SFPARSE_STATE_PARAMS);
1221
1222
0
    break;
1223
0
  case SFPARSE_STATE_PARAMS:
1224
0
    break;
1225
0
  default:
1226
0
    assert(0);
1227
0
    abort();
1228
0
  }
1229
1230
0
  if (parser_eof(sfp) || *sfp->pos != ';') {
1231
0
    parser_set_op_state(sfp, SFPARSE_STATE_AFTER);
1232
1233
0
    return SFPARSE_ERR_EOF;
1234
0
  }
1235
1236
0
  ++sfp->pos;
1237
1238
0
  parser_discard_sp(sfp);
1239
0
  if (parser_eof(sfp)) {
1240
0
    return SFPARSE_ERR_PARSE;
1241
0
  }
1242
1243
0
  rv = parser_key(sfp, dest_key);
1244
0
  if (rv != 0) {
1245
0
    return rv;
1246
0
  }
1247
1248
0
  if (parser_eof(sfp) || *sfp->pos != '=') {
1249
0
    if (dest_value) {
1250
0
      dest_value->type = SFPARSE_TYPE_BOOLEAN;
1251
0
      dest_value->flags = SFPARSE_VALUE_FLAG_NONE;
1252
0
      dest_value->boolean = 1;
1253
0
    }
1254
1255
0
    return 0;
1256
0
  }
1257
1258
0
  ++sfp->pos;
1259
1260
0
  if (parser_eof(sfp)) {
1261
0
    return SFPARSE_ERR_PARSE;
1262
0
  }
1263
1264
0
  return parser_bare_item(sfp, dest_value);
1265
0
}
1266
1267
0
static int parser_skip_params(sfparse_parser *sfp) {
1268
0
  int rv;
1269
1270
0
  for (;;) {
1271
0
    rv = sfparse_parser_param(sfp, NULL, NULL);
1272
0
    switch (rv) {
1273
0
    case 0:
1274
0
      break;
1275
0
    case SFPARSE_ERR_EOF:
1276
0
      return 0;
1277
0
    case SFPARSE_ERR_PARSE:
1278
0
      return rv;
1279
0
    default:
1280
0
      assert(0);
1281
0
      abort();
1282
0
    }
1283
0
  }
1284
0
}
1285
1286
0
int sfparse_parser_inner_list(sfparse_parser *sfp, sfparse_value *dest) {
1287
0
  int rv;
1288
1289
0
  switch (sfp->state & SFPARSE_STATE_OP_MASK) {
1290
0
  case SFPARSE_STATE_BEFORE:
1291
0
    parser_discard_sp(sfp);
1292
0
    if (parser_eof(sfp)) {
1293
0
      return SFPARSE_ERR_PARSE;
1294
0
    }
1295
1296
0
    break;
1297
0
  case SFPARSE_STATE_BEFORE_PARAMS:
1298
0
    rv = parser_skip_params(sfp);
1299
0
    if (rv != 0) {
1300
0
      return rv;
1301
0
    }
1302
1303
    /* Technically, we are entering SFPARSE_STATE_AFTER, but we will set
1304
       another state without reading the state. */
1305
    /* parser_set_op_state(sfp, SFPARSE_STATE_AFTER); */
1306
1307
    /* fall through */
1308
0
  case SFPARSE_STATE_AFTER:
1309
0
    if (parser_eof(sfp)) {
1310
0
      return SFPARSE_ERR_PARSE;
1311
0
    }
1312
1313
0
    switch (*sfp->pos) {
1314
0
    case ' ':
1315
0
      parser_discard_sp(sfp);
1316
0
      if (parser_eof(sfp)) {
1317
0
        return SFPARSE_ERR_PARSE;
1318
0
      }
1319
1320
0
      break;
1321
0
    case ')':
1322
0
      break;
1323
0
    default:
1324
0
      return SFPARSE_ERR_PARSE;
1325
0
    }
1326
1327
0
    break;
1328
0
  default:
1329
0
    assert(0);
1330
0
    abort();
1331
0
  }
1332
1333
0
  if (*sfp->pos == ')') {
1334
0
    ++sfp->pos;
1335
1336
0
    parser_unset_inner_list_state(sfp);
1337
0
    parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS);
1338
1339
0
    return SFPARSE_ERR_EOF;
1340
0
  }
1341
1342
0
  rv = parser_bare_item(sfp, dest);
1343
0
  if (rv != 0) {
1344
0
    return rv;
1345
0
  }
1346
1347
0
  parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS);
1348
1349
0
  return 0;
1350
0
}
1351
1352
0
static int parser_skip_inner_list(sfparse_parser *sfp) {
1353
0
  int rv;
1354
1355
0
  for (;;) {
1356
0
    rv = sfparse_parser_inner_list(sfp, NULL);
1357
0
    switch (rv) {
1358
0
    case 0:
1359
0
      break;
1360
0
    case SFPARSE_ERR_EOF:
1361
0
      return 0;
1362
0
    case SFPARSE_ERR_PARSE:
1363
0
      return rv;
1364
0
    default:
1365
0
      assert(0);
1366
0
      abort();
1367
0
    }
1368
0
  }
1369
0
}
1370
1371
0
static int parser_next_key_or_item(sfparse_parser *sfp) {
1372
0
  parser_discard_ows(sfp);
1373
1374
0
  if (parser_eof(sfp)) {
1375
0
    return SFPARSE_ERR_EOF;
1376
0
  }
1377
1378
0
  if (*sfp->pos != ',') {
1379
0
    return SFPARSE_ERR_PARSE;
1380
0
  }
1381
1382
0
  ++sfp->pos;
1383
1384
0
  parser_discard_ows(sfp);
1385
0
  if (parser_eof(sfp)) {
1386
0
    return SFPARSE_ERR_PARSE;
1387
0
  }
1388
1389
0
  return 0;
1390
0
}
1391
1392
0
static int parser_dict_value(sfparse_parser *sfp, sfparse_value *dest) {
1393
0
  int rv;
1394
1395
0
  if (parser_eof(sfp) || *(sfp->pos) != '=') {
1396
    /* Boolean true */
1397
0
    if (dest) {
1398
0
      dest->type = SFPARSE_TYPE_BOOLEAN;
1399
0
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1400
0
      dest->boolean = 1;
1401
0
    }
1402
1403
0
    sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS;
1404
1405
0
    return 0;
1406
0
  }
1407
1408
0
  ++sfp->pos;
1409
1410
0
  if (parser_eof(sfp)) {
1411
0
    return SFPARSE_ERR_PARSE;
1412
0
  }
1413
1414
0
  if (*sfp->pos == '(') {
1415
0
    if (dest) {
1416
0
      dest->type = SFPARSE_TYPE_INNER_LIST;
1417
0
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1418
0
    }
1419
1420
0
    ++sfp->pos;
1421
1422
0
    sfp->state = SFPARSE_STATE_DICT_INNER_LIST_BEFORE;
1423
1424
0
    return 0;
1425
0
  }
1426
1427
0
  rv = parser_bare_item(sfp, dest);
1428
0
  if (rv != 0) {
1429
0
    return rv;
1430
0
  }
1431
1432
0
  sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS;
1433
1434
0
  return 0;
1435
0
}
1436
1437
int sfparse_parser_dict(sfparse_parser *sfp, sfparse_vec *dest_key,
1438
0
                        sfparse_value *dest_value) {
1439
0
  int rv;
1440
1441
0
  switch (sfp->state) {
1442
0
  case SFPARSE_STATE_DICT_INNER_LIST_BEFORE:
1443
0
    rv = parser_skip_inner_list(sfp);
1444
0
    if (rv != 0) {
1445
0
      return rv;
1446
0
    }
1447
1448
    /* fall through */
1449
0
  case SFPARSE_STATE_DICT_BEFORE_PARAMS:
1450
0
    rv = parser_skip_params(sfp);
1451
0
    if (rv != 0) {
1452
0
      return rv;
1453
0
    }
1454
1455
    /* fall through */
1456
0
  case SFPARSE_STATE_DICT_AFTER:
1457
0
    rv = parser_next_key_or_item(sfp);
1458
0
    if (rv != 0) {
1459
0
      return rv;
1460
0
    }
1461
1462
0
    break;
1463
0
  case SFPARSE_STATE_INITIAL:
1464
0
    parser_discard_sp(sfp);
1465
1466
0
    if (parser_eof(sfp)) {
1467
0
      return SFPARSE_ERR_EOF;
1468
0
    }
1469
1470
0
    break;
1471
0
  default:
1472
0
    assert(0);
1473
0
    abort();
1474
0
  }
1475
1476
0
  rv = parser_key(sfp, dest_key);
1477
0
  if (rv != 0) {
1478
0
    return rv;
1479
0
  }
1480
1481
0
  return parser_dict_value(sfp, dest_value);
1482
0
}
1483
1484
0
int sfparse_parser_list(sfparse_parser *sfp, sfparse_value *dest) {
1485
0
  int rv;
1486
1487
0
  switch (sfp->state) {
1488
0
  case SFPARSE_STATE_LIST_INNER_LIST_BEFORE:
1489
0
    rv = parser_skip_inner_list(sfp);
1490
0
    if (rv != 0) {
1491
0
      return rv;
1492
0
    }
1493
1494
    /* fall through */
1495
0
  case SFPARSE_STATE_LIST_BEFORE_PARAMS:
1496
0
    rv = parser_skip_params(sfp);
1497
0
    if (rv != 0) {
1498
0
      return rv;
1499
0
    }
1500
1501
    /* fall through */
1502
0
  case SFPARSE_STATE_LIST_AFTER:
1503
0
    rv = parser_next_key_or_item(sfp);
1504
0
    if (rv != 0) {
1505
0
      return rv;
1506
0
    }
1507
1508
0
    break;
1509
0
  case SFPARSE_STATE_INITIAL:
1510
0
    parser_discard_sp(sfp);
1511
1512
0
    if (parser_eof(sfp)) {
1513
0
      return SFPARSE_ERR_EOF;
1514
0
    }
1515
1516
0
    break;
1517
0
  default:
1518
0
    assert(0);
1519
0
    abort();
1520
0
  }
1521
1522
0
  if (*sfp->pos == '(') {
1523
0
    if (dest) {
1524
0
      dest->type = SFPARSE_TYPE_INNER_LIST;
1525
0
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1526
0
    }
1527
1528
0
    ++sfp->pos;
1529
1530
0
    sfp->state = SFPARSE_STATE_LIST_INNER_LIST_BEFORE;
1531
1532
0
    return 0;
1533
0
  }
1534
1535
0
  rv = parser_bare_item(sfp, dest);
1536
0
  if (rv != 0) {
1537
0
    return rv;
1538
0
  }
1539
1540
0
  sfp->state = SFPARSE_STATE_LIST_BEFORE_PARAMS;
1541
1542
0
  return 0;
1543
0
}
1544
1545
0
int sfparse_parser_item(sfparse_parser *sfp, sfparse_value *dest) {
1546
0
  int rv;
1547
1548
0
  switch (sfp->state) {
1549
0
  case SFPARSE_STATE_INITIAL:
1550
0
    parser_discard_sp(sfp);
1551
1552
0
    if (parser_eof(sfp)) {
1553
0
      return SFPARSE_ERR_PARSE;
1554
0
    }
1555
1556
0
    break;
1557
0
  case SFPARSE_STATE_ITEM_INNER_LIST_BEFORE:
1558
0
    rv = parser_skip_inner_list(sfp);
1559
0
    if (rv != 0) {
1560
0
      return rv;
1561
0
    }
1562
1563
    /* fall through */
1564
0
  case SFPARSE_STATE_ITEM_BEFORE_PARAMS:
1565
0
    rv = parser_skip_params(sfp);
1566
0
    if (rv != 0) {
1567
0
      return rv;
1568
0
    }
1569
1570
    /* fall through */
1571
0
  case SFPARSE_STATE_ITEM_AFTER:
1572
0
    parser_discard_sp(sfp);
1573
1574
0
    if (!parser_eof(sfp)) {
1575
0
      return SFPARSE_ERR_PARSE;
1576
0
    }
1577
1578
0
    return SFPARSE_ERR_EOF;
1579
0
  default:
1580
0
    assert(0);
1581
0
    abort();
1582
0
  }
1583
1584
0
  if (*sfp->pos == '(') {
1585
0
    if (dest) {
1586
0
      dest->type = SFPARSE_TYPE_INNER_LIST;
1587
0
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1588
0
    }
1589
1590
0
    ++sfp->pos;
1591
1592
0
    sfp->state = SFPARSE_STATE_ITEM_INNER_LIST_BEFORE;
1593
1594
0
    return 0;
1595
0
  }
1596
1597
0
  rv = parser_bare_item(sfp, dest);
1598
0
  if (rv != 0) {
1599
0
    return rv;
1600
0
  }
1601
1602
0
  sfp->state = SFPARSE_STATE_ITEM_BEFORE_PARAMS;
1603
1604
0
  return 0;
1605
0
}
1606
1607
void sfparse_parser_init(sfparse_parser *sfp, const uint8_t *data,
1608
0
                         size_t datalen) {
1609
0
  if (datalen == 0) {
1610
0
    sfp->pos = sfp->end = NULL;
1611
0
  } else {
1612
0
    sfp->pos = data;
1613
0
    sfp->end = data + datalen;
1614
0
  }
1615
1616
0
  sfp->state = SFPARSE_STATE_INITIAL;
1617
0
}
1618
1619
0
void sfparse_unescape(sfparse_vec *dest, const sfparse_vec *src) {
1620
0
  const uint8_t *p, *q;
1621
0
  uint8_t *o;
1622
0
  size_t len, slen;
1623
1624
0
  if (src->len == 0) {
1625
0
    dest->len = 0;
1626
1627
0
    return;
1628
0
  }
1629
1630
0
  o = dest->base;
1631
0
  p = src->base;
1632
0
  len = src->len;
1633
1634
0
  for (;;) {
1635
0
    q = memchr(p, '\\', len);
1636
0
    if (q == NULL) {
1637
0
      memcpy(o, p, len);
1638
0
      o += len;
1639
1640
0
      dest->len = (size_t)(o - dest->base);
1641
1642
0
      return;
1643
0
    }
1644
1645
0
    slen = (size_t)(q - p);
1646
0
    memcpy(o, p, slen);
1647
0
    o += slen;
1648
1649
0
    p = q + 1;
1650
0
    *o++ = *p++;
1651
0
    len -= slen + 2;
1652
0
  }
1653
0
}
1654
1655
0
void sfparse_base64decode(sfparse_vec *dest, const sfparse_vec *src) {
1656
0
  static const int index_tbl[] = {
1657
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1658
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1659
0
    -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
1660
0
    61, -1, -1, -1, -1, -1, -1, -1, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
1661
0
    11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1,
1662
0
    -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
1663
0
    43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1664
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1665
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1666
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1667
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1668
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1669
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1670
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1};
1671
0
  uint8_t *o;
1672
0
  const uint8_t *p, *end;
1673
0
  uint32_t n;
1674
0
  size_t i, left;
1675
0
  int idx;
1676
1677
0
  if (src->len == 0) {
1678
0
    dest->len = 0;
1679
1680
0
    return;
1681
0
  }
1682
1683
0
  o = dest->base;
1684
0
  p = src->base;
1685
0
  left = src->len & 0x3;
1686
0
  if (left == 0 && src->base[src->len - 1] == '=') {
1687
0
    left = 4;
1688
0
  }
1689
0
  end = src->base + src->len - left;
1690
1691
0
  for (; p != end;) {
1692
0
    n = 0;
1693
1694
0
    for (i = 1; i <= 4; ++i, ++p) {
1695
0
      idx = index_tbl[*p];
1696
1697
0
      assert(idx != -1);
1698
1699
0
      n += (uint32_t)(idx << (24 - i * 6));
1700
0
    }
1701
1702
0
    *o++ = (uint8_t)(n >> 16);
1703
0
    *o++ = (n >> 8) & 0xffu;
1704
0
    *o++ = n & 0xffu;
1705
0
  }
1706
1707
0
  switch (left) {
1708
0
  case 0:
1709
0
    goto fin;
1710
0
  case 1:
1711
0
    assert(0);
1712
0
    abort();
1713
0
  case 3:
1714
0
    if (src->base[src->len - 1] == '=') {
1715
0
      left = 2;
1716
0
    }
1717
1718
0
    break;
1719
0
  case 4:
1720
0
    assert('=' == src->base[src->len - 1]);
1721
1722
0
    if (src->base[src->len - 2] == '=') {
1723
0
      left = 2;
1724
0
    } else {
1725
0
      left = 3;
1726
0
    }
1727
1728
0
    break;
1729
0
  }
1730
1731
0
  switch (left) {
1732
0
  case 2:
1733
0
    *o = (uint8_t)(index_tbl[*p++] << 2);
1734
0
    *o++ |= (uint8_t)(index_tbl[*p++] >> 4);
1735
1736
0
    break;
1737
0
  case 3:
1738
0
    n = (uint32_t)(index_tbl[*p++] << 10);
1739
0
    n += (uint32_t)(index_tbl[*p++] << 4);
1740
0
    n += (uint32_t)(index_tbl[*p++] >> 2);
1741
0
    *o++ = (n >> 8) & 0xffu;
1742
0
    *o++ = n & 0xffu;
1743
1744
0
    break;
1745
0
  }
1746
1747
0
fin:
1748
0
  dest->len = (size_t)(o - dest->base);
1749
0
}
1750
1751
0
void sfparse_pctdecode(sfparse_vec *dest, const sfparse_vec *src) {
1752
0
  const uint8_t *p, *q;
1753
0
  uint8_t *o;
1754
0
  size_t len, slen;
1755
1756
0
  if (src->len == 0) {
1757
0
    dest->len = 0;
1758
1759
0
    return;
1760
0
  }
1761
1762
0
  o = dest->base;
1763
0
  p = src->base;
1764
0
  len = src->len;
1765
1766
0
  for (;;) {
1767
0
    q = memchr(p, '%', len);
1768
0
    if (q == NULL) {
1769
0
      memcpy(o, p, len);
1770
0
      o += len;
1771
1772
0
      dest->len = (size_t)(o - dest->base);
1773
1774
0
      return;
1775
0
    }
1776
1777
0
    slen = (size_t)(q - p);
1778
0
    memcpy(o, p, slen);
1779
0
    o += slen;
1780
1781
0
    p = q + 1;
1782
1783
0
    pctdecode(o++, &p);
1784
1785
0
    len -= slen + 3;
1786
0
  }
1787
0
}