Coverage Report

Created: 2025-11-11 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/nghttp2/lib/sfparse.c
Line
Count
Source
1
/*
2
 * sfparse
3
 *
4
 * Copyright (c) 2023 sfparse contributors
5
 * Copyright (c) 2019 nghttp3 contributors
6
 * Copyright (c) 2015 nghttp2 contributors
7
 *
8
 * Permission is hereby granted, free of charge, to any person obtaining
9
 * a copy of this software and associated documentation files (the
10
 * "Software"), to deal in the Software without restriction, including
11
 * without limitation the rights to use, copy, modify, merge, publish,
12
 * distribute, sublicense, and/or sell copies of the Software, and to
13
 * permit persons to whom the Software is furnished to do so, subject to
14
 * the following conditions:
15
 *
16
 * The above copyright notice and this permission notice shall be
17
 * included in all copies or substantial portions of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
 */
27
#include "sfparse.h"
28
29
#include <string.h>
30
#include <assert.h>
31
#include <stdlib.h>
32
33
#ifdef __AVX2__
34
#  include <immintrin.h>
35
#endif /* __AVX2__ */
36
37
84.5k
#define SFPARSE_STATE_DICT 0x08u
38
0
#define SFPARSE_STATE_LIST 0x10u
39
0
#define SFPARSE_STATE_ITEM 0x18u
40
41
5.16k
#define SFPARSE_STATE_INNER_LIST 0x04u
42
43
5.51k
#define SFPARSE_STATE_BEFORE 0x00u
44
104k
#define SFPARSE_STATE_BEFORE_PARAMS 0x01u
45
44.8k
#define SFPARSE_STATE_PARAMS 0x02u
46
67.7k
#define SFPARSE_STATE_AFTER 0x03u
47
48
131k
#define SFPARSE_STATE_OP_MASK 0x03u
49
50
#define SFPARSE_SET_STATE_AFTER(NAME)                                          \
51
26.8k
  (SFPARSE_STATE_##NAME | SFPARSE_STATE_AFTER)
52
#define SFPARSE_SET_STATE_BEFORE_PARAMS(NAME)                                  \
53
53.9k
  (SFPARSE_STATE_##NAME | SFPARSE_STATE_BEFORE_PARAMS)
54
#define SFPARSE_SET_STATE_INNER_LIST_BEFORE(NAME)                              \
55
3.68k
  (SFPARSE_STATE_##NAME | SFPARSE_STATE_INNER_LIST | SFPARSE_STATE_BEFORE)
56
57
26.8k
#define SFPARSE_STATE_DICT_AFTER SFPARSE_SET_STATE_AFTER(DICT)
58
53.9k
#define SFPARSE_STATE_DICT_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(DICT)
59
#define SFPARSE_STATE_DICT_INNER_LIST_BEFORE                                   \
60
3.68k
  SFPARSE_SET_STATE_INNER_LIST_BEFORE(DICT)
61
62
0
#define SFPARSE_STATE_LIST_AFTER SFPARSE_SET_STATE_AFTER(LIST)
63
0
#define SFPARSE_STATE_LIST_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(LIST)
64
#define SFPARSE_STATE_LIST_INNER_LIST_BEFORE                                   \
65
0
  SFPARSE_SET_STATE_INNER_LIST_BEFORE(LIST)
66
67
0
#define SFPARSE_STATE_ITEM_AFTER SFPARSE_SET_STATE_AFTER(ITEM)
68
0
#define SFPARSE_STATE_ITEM_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(ITEM)
69
#define SFPARSE_STATE_ITEM_INNER_LIST_BEFORE                                   \
70
0
  SFPARSE_SET_STATE_INNER_LIST_BEFORE(ITEM)
71
72
49.3k
#define SFPARSE_STATE_INITIAL 0x00u
73
74
#define DIGIT_CASES                                                            \
75
34.0k
  case '0':                                                                    \
76
40.2k
  case '1':                                                                    \
77
46.6k
  case '2':                                                                    \
78
50.8k
  case '3':                                                                    \
79
54.5k
  case '4':                                                                    \
80
58.0k
  case '5':                                                                    \
81
61.9k
  case '6':                                                                    \
82
65.3k
  case '7':                                                                    \
83
69.4k
  case '8':                                                                    \
84
73.6k
  case '9'
85
86
#define LCALPHA_CASES                                                          \
87
103k
  case 'a':                                                                    \
88
106k
  case 'b':                                                                    \
89
113k
  case 'c':                                                                    \
90
116k
  case 'd':                                                                    \
91
121k
  case 'e':                                                                    \
92
125k
  case 'f':                                                                    \
93
129k
  case 'g':                                                                    \
94
135k
  case 'h':                                                                    \
95
142k
  case 'i':                                                                    \
96
145k
  case 'j':                                                                    \
97
149k
  case 'k':                                                                    \
98
155k
  case 'l':                                                                    \
99
161k
  case 'm':                                                                    \
100
167k
  case 'n':                                                                    \
101
174k
  case 'o':                                                                    \
102
178k
  case 'p':                                                                    \
103
184k
  case 'q':                                                                    \
104
190k
  case 'r':                                                                    \
105
195k
  case 's':                                                                    \
106
202k
  case 't':                                                                    \
107
205k
  case 'u':                                                                    \
108
210k
  case 'v':                                                                    \
109
217k
  case 'w':                                                                    \
110
222k
  case 'x':                                                                    \
111
229k
  case 'y':                                                                    \
112
233k
  case 'z'
113
114
#define UCALPHA_CASES                                                          \
115
34.5k
  case 'A':                                                                    \
116
35.7k
  case 'B':                                                                    \
117
37.2k
  case 'C':                                                                    \
118
38.4k
  case 'D':                                                                    \
119
39.8k
  case 'E':                                                                    \
120
41.2k
  case 'F':                                                                    \
121
42.2k
  case 'G':                                                                    \
122
44.0k
  case 'H':                                                                    \
123
46.2k
  case 'I':                                                                    \
124
47.8k
  case 'J':                                                                    \
125
48.9k
  case 'K':                                                                    \
126
50.4k
  case 'L':                                                                    \
127
51.8k
  case 'M':                                                                    \
128
53.6k
  case 'N':                                                                    \
129
55.0k
  case 'O':                                                                    \
130
57.4k
  case 'P':                                                                    \
131
58.3k
  case 'Q':                                                                    \
132
59.9k
  case 'R':                                                                    \
133
61.1k
  case 'S':                                                                    \
134
63.0k
  case 'T':                                                                    \
135
64.3k
  case 'U':                                                                    \
136
65.4k
  case 'V':                                                                    \
137
66.6k
  case 'W':                                                                    \
138
67.8k
  case 'X':                                                                    \
139
69.0k
  case 'Y':                                                                    \
140
70.9k
  case 'Z'
141
142
#define ALPHA_CASES                                                            \
143
21.7k
  UCALPHA_CASES:                                                               \
144
22.4k
  LCALPHA_CASES
145
146
#define TOKEN_CASES                                                            \
147
428
  case '!':                                                                    \
148
788
  case '#':                                                                    \
149
1.35k
  case '$':                                                                    \
150
2.11k
  case '%':                                                                    \
151
2.57k
  case '&':                                                                    \
152
3.81k
  case '\'':                                                                   \
153
4.67k
  case '*':                                                                    \
154
5.34k
  case '+':                                                                    \
155
6.71k
  case '-':                                                                    \
156
7.89k
  case '.':                                                                    \
157
8.78k
  case '/':                                                                    \
158
16.5k
  DIGIT_CASES:                                                                 \
159
17.4k
  case ':':                                                                    \
160
31.9k
  UCALPHA_CASES:                                                               \
161
32.2k
  case '^':                                                                    \
162
33.8k
  case '_':                                                                    \
163
34.1k
  case '`':                                                                    \
164
55.5k
  LCALPHA_CASES:                                                               \
165
56.5k
  case '|':                                                                    \
166
56.8k
  case '~'
167
168
#define LCHEXALPHA_CASES                                                       \
169
1.35k
  case 'a':                                                                    \
170
2.62k
  case 'b':                                                                    \
171
3.66k
  case 'c':                                                                    \
172
5.12k
  case 'd':                                                                    \
173
5.67k
  case 'e':                                                                    \
174
6.49k
  case 'f'
175
176
#define X00_1F_CASES                                                           \
177
0
  case 0x00:                                                                   \
178
0
  case 0x01:                                                                   \
179
0
  case 0x02:                                                                   \
180
0
  case 0x03:                                                                   \
181
0
  case 0x04:                                                                   \
182
0
  case 0x05:                                                                   \
183
0
  case 0x06:                                                                   \
184
0
  case 0x07:                                                                   \
185
0
  case 0x08:                                                                   \
186
10
  case 0x09:                                                                   \
187
10
  case 0x0a:                                                                   \
188
10
  case 0x0b:                                                                   \
189
10
  case 0x0c:                                                                   \
190
10
  case 0x0d:                                                                   \
191
10
  case 0x0e:                                                                   \
192
10
  case 0x0f:                                                                   \
193
10
  case 0x10:                                                                   \
194
10
  case 0x11:                                                                   \
195
10
  case 0x12:                                                                   \
196
10
  case 0x13:                                                                   \
197
10
  case 0x14:                                                                   \
198
10
  case 0x15:                                                                   \
199
10
  case 0x16:                                                                   \
200
10
  case 0x17:                                                                   \
201
10
  case 0x18:                                                                   \
202
10
  case 0x19:                                                                   \
203
10
  case 0x1a:                                                                   \
204
10
  case 0x1b:                                                                   \
205
10
  case 0x1c:                                                                   \
206
10
  case 0x1d:                                                                   \
207
10
  case 0x1e:                                                                   \
208
10
  case 0x1f
209
210
#define X20_21_CASES                                                           \
211
479
  case ' ':                                                                    \
212
716
  case '!'
213
214
#define X23_5B_CASES                                                           \
215
1.19k
  case '#':                                                                    \
216
1.44k
  case '$':                                                                    \
217
2.06k
  case '%':                                                                    \
218
2.36k
  case '&':                                                                    \
219
2.84k
  case '\'':                                                                   \
220
3.08k
  case '(':                                                                    \
221
3.37k
  case ')':                                                                    \
222
3.76k
  case '*':                                                                    \
223
4.14k
  case '+':                                                                    \
224
4.43k
  case ',':                                                                    \
225
4.70k
  case '-':                                                                    \
226
5.10k
  case '.':                                                                    \
227
5.36k
  case '/':                                                                    \
228
8.55k
  DIGIT_CASES:                                                                 \
229
8.86k
  case ':':                                                                    \
230
9.11k
  case ';':                                                                    \
231
9.32k
  case '<':                                                                    \
232
9.57k
  case '=':                                                                    \
233
9.84k
  case '>':                                                                    \
234
10.1k
  case '?':                                                                    \
235
10.4k
  case '@':                                                                    \
236
17.2k
  UCALPHA_CASES:                                                               \
237
17.4k
  case '['
238
239
#define X5D_7E_CASES                                                           \
240
17.8k
  case ']':                                                                    \
241
18.0k
  case '^':                                                                    \
242
18.2k
  case '_':                                                                    \
243
18.4k
  case '`':                                                                    \
244
26.4k
  LCALPHA_CASES:                                                               \
245
26.6k
  case '{':                                                                    \
246
26.8k
  case '|':                                                                    \
247
27.1k
  case '}':                                                                    \
248
27.3k
  case '~'
249
250
#define X7F_FF_CASES                                                           \
251
10
  case 0x7f:                                                                   \
252
20
  case 0x80:                                                                   \
253
30
  case 0x81:                                                                   \
254
40
  case 0x82:                                                                   \
255
50
  case 0x83:                                                                   \
256
60
  case 0x84:                                                                   \
257
70
  case 0x85:                                                                   \
258
80
  case 0x86:                                                                   \
259
90
  case 0x87:                                                                   \
260
100
  case 0x88:                                                                   \
261
110
  case 0x89:                                                                   \
262
120
  case 0x8a:                                                                   \
263
130
  case 0x8b:                                                                   \
264
140
  case 0x8c:                                                                   \
265
150
  case 0x8d:                                                                   \
266
160
  case 0x8e:                                                                   \
267
170
  case 0x8f:                                                                   \
268
180
  case 0x90:                                                                   \
269
190
  case 0x91:                                                                   \
270
201
  case 0x92:                                                                   \
271
211
  case 0x93:                                                                   \
272
221
  case 0x94:                                                                   \
273
231
  case 0x95:                                                                   \
274
241
  case 0x96:                                                                   \
275
251
  case 0x97:                                                                   \
276
261
  case 0x98:                                                                   \
277
271
  case 0x99:                                                                   \
278
281
  case 0x9a:                                                                   \
279
291
  case 0x9b:                                                                   \
280
301
  case 0x9c:                                                                   \
281
311
  case 0x9d:                                                                   \
282
321
  case 0x9e:                                                                   \
283
331
  case 0x9f:                                                                   \
284
341
  case 0xa0:                                                                   \
285
351
  case 0xa1:                                                                   \
286
361
  case 0xa2:                                                                   \
287
371
  case 0xa3:                                                                   \
288
381
  case 0xa4:                                                                   \
289
391
  case 0xa5:                                                                   \
290
401
  case 0xa6:                                                                   \
291
411
  case 0xa7:                                                                   \
292
421
  case 0xa8:                                                                   \
293
431
  case 0xa9:                                                                   \
294
441
  case 0xaa:                                                                   \
295
451
  case 0xab:                                                                   \
296
461
  case 0xac:                                                                   \
297
471
  case 0xad:                                                                   \
298
481
  case 0xae:                                                                   \
299
491
  case 0xaf:                                                                   \
300
501
  case 0xb0:                                                                   \
301
511
  case 0xb1:                                                                   \
302
521
  case 0xb2:                                                                   \
303
531
  case 0xb3:                                                                   \
304
541
  case 0xb4:                                                                   \
305
551
  case 0xb5:                                                                   \
306
561
  case 0xb6:                                                                   \
307
571
  case 0xb7:                                                                   \
308
581
  case 0xb8:                                                                   \
309
591
  case 0xb9:                                                                   \
310
601
  case 0xba:                                                                   \
311
611
  case 0xbb:                                                                   \
312
621
  case 0xbc:                                                                   \
313
631
  case 0xbd:                                                                   \
314
643
  case 0xbe:                                                                   \
315
653
  case 0xbf:                                                                   \
316
663
  case 0xc0:                                                                   \
317
673
  case 0xc1:                                                                   \
318
683
  case 0xc2:                                                                   \
319
693
  case 0xc3:                                                                   \
320
703
  case 0xc4:                                                                   \
321
713
  case 0xc5:                                                                   \
322
723
  case 0xc6:                                                                   \
323
733
  case 0xc7:                                                                   \
324
743
  case 0xc8:                                                                   \
325
753
  case 0xc9:                                                                   \
326
763
  case 0xca:                                                                   \
327
773
  case 0xcb:                                                                   \
328
783
  case 0xcc:                                                                   \
329
793
  case 0xcd:                                                                   \
330
803
  case 0xce:                                                                   \
331
813
  case 0xcf:                                                                   \
332
823
  case 0xd0:                                                                   \
333
833
  case 0xd1:                                                                   \
334
843
  case 0xd2:                                                                   \
335
853
  case 0xd3:                                                                   \
336
863
  case 0xd4:                                                                   \
337
873
  case 0xd5:                                                                   \
338
883
  case 0xd6:                                                                   \
339
893
  case 0xd7:                                                                   \
340
903
  case 0xd8:                                                                   \
341
913
  case 0xd9:                                                                   \
342
923
  case 0xda:                                                                   \
343
933
  case 0xdb:                                                                   \
344
943
  case 0xdc:                                                                   \
345
953
  case 0xdd:                                                                   \
346
963
  case 0xde:                                                                   \
347
973
  case 0xdf:                                                                   \
348
983
  case 0xe0:                                                                   \
349
993
  case 0xe1:                                                                   \
350
1.00k
  case 0xe2:                                                                   \
351
1.01k
  case 0xe3:                                                                   \
352
1.02k
  case 0xe4:                                                                   \
353
1.03k
  case 0xe5:                                                                   \
354
1.04k
  case 0xe6:                                                                   \
355
1.05k
  case 0xe7:                                                                   \
356
1.06k
  case 0xe8:                                                                   \
357
1.07k
  case 0xe9:                                                                   \
358
1.08k
  case 0xea:                                                                   \
359
1.09k
  case 0xeb:                                                                   \
360
1.10k
  case 0xec:                                                                   \
361
1.11k
  case 0xed:                                                                   \
362
1.12k
  case 0xee:                                                                   \
363
1.13k
  case 0xef:                                                                   \
364
1.14k
  case 0xf0:                                                                   \
365
1.15k
  case 0xf1:                                                                   \
366
1.16k
  case 0xf2:                                                                   \
367
1.17k
  case 0xf3:                                                                   \
368
1.18k
  case 0xf4:                                                                   \
369
1.19k
  case 0xf5:                                                                   \
370
1.20k
  case 0xf6:                                                                   \
371
1.21k
  case 0xf7:                                                                   \
372
1.22k
  case 0xf8:                                                                   \
373
1.23k
  case 0xf9:                                                                   \
374
1.24k
  case 0xfa:                                                                   \
375
1.25k
  case 0xfb:                                                                   \
376
1.26k
  case 0xfc:                                                                   \
377
1.27k
  case 0xfd:                                                                   \
378
1.28k
  case 0xfe:                                                                   \
379
1.29k
  case 0xff
380
381
10.9k
static int is_ws(uint8_t c) {
382
10.9k
  switch (c) {
383
234
  case ' ':
384
515
  case '\t':
385
515
    return 1;
386
10.4k
  default:
387
10.4k
    return 0;
388
10.9k
  }
389
10.9k
}
390
391
#ifdef __AVX2__
392
#  ifdef _MSC_VER
393
#    include <intrin.h>
394
395
static int ctz(unsigned int v) {
396
  unsigned long n;
397
398
  /* Assume that v is not 0. */
399
  _BitScanForward(&n, v);
400
401
  return (int)n;
402
}
403
#  else /* !_MSC_VER */
404
#    define ctz __builtin_ctz
405
#  endif /* !_MSC_VER */
406
#endif   /* __AVX2__ */
407
408
598k
static int parser_eof(sfparse_parser *sfp) { return sfp->pos == sfp->end; }
409
410
32.0k
static void parser_discard_ows(sfparse_parser *sfp) {
411
32.5k
  for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos)
412
515
    ;
413
32.0k
}
414
415
43.0k
static void parser_discard_sp(sfparse_parser *sfp) {
416
50.0k
  for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos)
417
6.93k
    ;
418
43.0k
}
419
420
77.5k
static void parser_set_op_state(sfparse_parser *sfp, uint32_t op) {
421
77.5k
  sfp->state &= ~SFPARSE_STATE_OP_MASK;
422
77.5k
  sfp->state |= op;
423
77.5k
}
424
425
1.48k
static void parser_unset_inner_list_state(sfparse_parser *sfp) {
426
1.48k
  sfp->state &= ~SFPARSE_STATE_INNER_LIST;
427
1.48k
}
428
429
#ifdef __AVX2__
430
static const uint8_t *find_char_key(const uint8_t *first, const uint8_t *last) {
431
  const __m256i us = _mm256_set1_epi8('_');
432
  const __m256i ds = _mm256_set1_epi8('-');
433
  const __m256i dot = _mm256_set1_epi8('.');
434
  const __m256i ast = _mm256_set1_epi8('*');
435
  const __m256i r0l = _mm256_set1_epi8('0' - 1);
436
  const __m256i r0r = _mm256_set1_epi8('9' + 1);
437
  const __m256i r1l = _mm256_set1_epi8('a' - 1);
438
  const __m256i r1r = _mm256_set1_epi8('z' + 1);
439
  __m256i s, x;
440
  uint32_t m;
441
442
  for (; first != last; first += 32) {
443
    s = _mm256_loadu_si256((void *)first);
444
445
    x = _mm256_cmpeq_epi8(s, us);
446
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ds), x);
447
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dot), x);
448
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ast), x);
449
    x = _mm256_or_si256(
450
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)),
451
      x);
452
    x = _mm256_or_si256(
453
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
454
      x);
455
456
    m = ~(uint32_t)_mm256_movemask_epi8(x);
457
    if (m) {
458
      return first + ctz(m);
459
    }
460
  }
461
462
  return last;
463
}
464
#endif /* __AVX2__ */
465
466
40.5k
static int parser_key(sfparse_parser *sfp, sfparse_vec *dest) {
467
40.5k
  const uint8_t *base;
468
#ifdef __AVX2__
469
  const uint8_t *last;
470
#endif /* __AVX2__ */
471
472
40.5k
  switch (*sfp->pos) {
473
3.45k
  case '*':
474
40.4k
  LCALPHA_CASES:
475
40.4k
    break;
476
56
  default:
477
56
    return SFPARSE_ERR_PARSE;
478
40.5k
  }
479
480
40.4k
  base = sfp->pos++;
481
482
#ifdef __AVX2__
483
  if (sfp->end - sfp->pos >= 32) {
484
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
485
486
    sfp->pos = find_char_key(sfp->pos, last);
487
    if (sfp->pos != last) {
488
      goto fin;
489
    }
490
  }
491
#endif /* __AVX2__ */
492
493
112k
  for (; !parser_eof(sfp); ++sfp->pos) {
494
107k
    switch (*sfp->pos) {
495
476
    case '_':
496
5.10k
    case '-':
497
7.46k
    case '.':
498
9.12k
    case '*':
499
149k
    DIGIT_CASES:
500
1.15M
    LCALPHA_CASES:
501
1.15M
      continue;
502
107k
    }
503
504
36.1k
    break;
505
107k
  }
506
507
#ifdef __AVX2__
508
fin:
509
#endif /* __AVX2__ */
510
40.4k
  if (dest) {
511
29.5k
    dest->base = (uint8_t *)base;
512
29.5k
    dest->len = (size_t)(sfp->pos - dest->base);
513
29.5k
  }
514
515
40.4k
  return 0;
516
40.4k
}
517
518
5.34k
static int parser_number(sfparse_parser *sfp, sfparse_value *dest) {
519
5.34k
  int sign = 1;
520
5.34k
  int64_t value = 0;
521
5.34k
  size_t len = 0;
522
5.34k
  size_t fpos = 0;
523
524
5.34k
  if (*sfp->pos == '-') {
525
342
    ++sfp->pos;
526
342
    if (parser_eof(sfp)) {
527
38
      return SFPARSE_ERR_PARSE;
528
38
    }
529
530
304
    sign = -1;
531
304
  }
532
533
5.34k
  assert(!parser_eof(sfp));
534
535
13.6k
  for (; !parser_eof(sfp); ++sfp->pos) {
536
12.1k
    switch (*sfp->pos) {
537
56.7k
    DIGIT_CASES:
538
56.7k
      if (++len > 15) {
539
10
        return SFPARSE_ERR_PARSE;
540
10
      }
541
542
8.37k
      value *= 10;
543
8.37k
      value += *sfp->pos - '0';
544
545
8.37k
      continue;
546
12.1k
    }
547
548
3.78k
    break;
549
12.1k
  }
550
551
5.29k
  if (len == 0) {
552
18
    return SFPARSE_ERR_PARSE;
553
18
  }
554
555
5.27k
  if (parser_eof(sfp) || *sfp->pos != '.') {
556
4.19k
    if (dest) {
557
2.32k
      dest->type = SFPARSE_TYPE_INTEGER;
558
2.32k
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
559
2.32k
      dest->integer = value * sign;
560
2.32k
    }
561
562
4.19k
    return 0;
563
4.19k
  }
564
565
  /* decimal */
566
567
1.08k
  if (len > 12) {
568
10
    return SFPARSE_ERR_PARSE;
569
10
  }
570
571
1.07k
  fpos = len;
572
573
1.07k
  ++sfp->pos;
574
575
2.53k
  for (; !parser_eof(sfp); ++sfp->pos) {
576
2.08k
    switch (*sfp->pos) {
577
12.0k
    DIGIT_CASES:
578
12.0k
      if (++len > 15) {
579
10
        return SFPARSE_ERR_PARSE;
580
10
      }
581
582
1.46k
      value *= 10;
583
1.46k
      value += *sfp->pos - '0';
584
585
1.46k
      continue;
586
2.08k
    }
587
588
613
    break;
589
2.08k
  }
590
591
1.06k
  if (fpos == len || len - fpos > 3) {
592
32
    return SFPARSE_ERR_PARSE;
593
32
  }
594
595
1.02k
  if (dest) {
596
699
    dest->type = SFPARSE_TYPE_DECIMAL;
597
699
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
598
699
    dest->decimal.numer = value * sign;
599
600
699
    switch (len - fpos) {
601
548
    case 1:
602
548
      dest->decimal.denom = 10;
603
604
548
      break;
605
64
    case 2:
606
64
      dest->decimal.denom = 100;
607
608
64
      break;
609
87
    case 3:
610
87
      dest->decimal.denom = 1000;
611
612
87
      break;
613
699
    }
614
699
  }
615
616
1.02k
  return 0;
617
1.02k
}
618
619
587
static int parser_date(sfparse_parser *sfp, sfparse_value *dest) {
620
587
  int rv;
621
587
  sfparse_value val;
622
623
  /* The first byte has already been validated by the caller. */
624
587
  assert('@' == *sfp->pos);
625
626
587
  ++sfp->pos;
627
628
587
  if (parser_eof(sfp)) {
629
18
    return SFPARSE_ERR_PARSE;
630
18
  }
631
632
569
  rv = parser_number(sfp, &val);
633
569
  if (rv != 0) {
634
19
    return rv;
635
19
  }
636
637
550
  if (val.type != SFPARSE_TYPE_INTEGER) {
638
10
    return SFPARSE_ERR_PARSE;
639
10
  }
640
641
540
  if (dest) {
642
194
    *dest = val;
643
194
    dest->type = SFPARSE_TYPE_DATE;
644
194
  }
645
646
540
  return 0;
647
550
}
648
649
#ifdef __AVX2__
650
static const uint8_t *find_char_string(const uint8_t *first,
651
                                       const uint8_t *last) {
652
  const __m256i bs = _mm256_set1_epi8('\\');
653
  const __m256i dq = _mm256_set1_epi8('"');
654
  const __m256i del = _mm256_set1_epi8(0x7f);
655
  const __m256i sp = _mm256_set1_epi8(' ');
656
  __m256i s, x;
657
  uint32_t m;
658
659
  for (; first != last; first += 32) {
660
    s = _mm256_loadu_si256((void *)first);
661
662
    x = _mm256_cmpgt_epi8(sp, s);
663
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, bs), x);
664
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dq), x);
665
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, del), x);
666
667
    m = (uint32_t)_mm256_movemask_epi8(x);
668
    if (m) {
669
      return first + ctz(m);
670
    }
671
  }
672
673
  return last;
674
}
675
#endif /* __AVX2__ */
676
677
3.26k
static int parser_string(sfparse_parser *sfp, sfparse_value *dest) {
678
3.26k
  const uint8_t *base;
679
#ifdef __AVX2__
680
  const uint8_t *last;
681
#endif /* __AVX2__ */
682
3.26k
  uint32_t flags = SFPARSE_VALUE_FLAG_NONE;
683
684
  /* The first byte has already been validated by the caller. */
685
3.26k
  assert('"' == *sfp->pos);
686
687
3.26k
  base = ++sfp->pos;
688
689
#ifdef __AVX2__
690
  for (; sfp->end - sfp->pos >= 32; ++sfp->pos) {
691
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
692
693
    sfp->pos = find_char_string(sfp->pos, last);
694
    if (sfp->pos == last) {
695
      break;
696
    }
697
698
    switch (*sfp->pos) {
699
    case '\\':
700
      ++sfp->pos;
701
      if (parser_eof(sfp)) {
702
        return SFPARSE_ERR_PARSE;
703
      }
704
705
      switch (*sfp->pos) {
706
      case '"':
707
      case '\\':
708
        flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING;
709
710
        break;
711
      default:
712
        return SFPARSE_ERR_PARSE;
713
      }
714
715
      break;
716
    case '"':
717
      goto fin;
718
    default:
719
      return SFPARSE_ERR_PARSE;
720
    }
721
  }
722
#endif /* __AVX2__ */
723
724
31.0k
  for (; !parser_eof(sfp); ++sfp->pos) {
725
30.7k
    switch (*sfp->pos) {
726
1.19k
    X20_21_CASES:
727
563k
    X23_5B_CASES:
728
563k
    X5D_7E_CASES:
729
27.3k
      break;
730
459
    case '\\':
731
459
      ++sfp->pos;
732
459
      if (parser_eof(sfp)) {
733
14
        return SFPARSE_ERR_PARSE;
734
14
      }
735
736
445
      switch (*sfp->pos) {
737
194
      case '"':
738
435
      case '\\':
739
435
        flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING;
740
741
435
        break;
742
10
      default:
743
10
        return SFPARSE_ERR_PARSE;
744
445
      }
745
746
435
      break;
747
2.83k
    case '"':
748
2.83k
      goto fin;
749
49
    default:
750
49
      return SFPARSE_ERR_PARSE;
751
30.7k
    }
752
30.7k
  }
753
754
362
  return SFPARSE_ERR_PARSE;
755
756
2.83k
fin:
757
2.83k
  if (dest) {
758
2.50k
    dest->type = SFPARSE_TYPE_STRING;
759
2.50k
    dest->flags = flags;
760
2.50k
    dest->vec.len = (size_t)(sfp->pos - base);
761
2.50k
    dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
762
2.50k
  }
763
764
2.83k
  ++sfp->pos;
765
766
2.83k
  return 0;
767
3.26k
}
768
769
#ifdef __AVX2__
770
static const uint8_t *find_char_token(const uint8_t *first,
771
                                      const uint8_t *last) {
772
  /* r0: !..:, excluding "(),
773
     r1: A..Z
774
     r2: ^..~, excluding {} */
775
  const __m256i r0l = _mm256_set1_epi8('!' - 1);
776
  const __m256i r0r = _mm256_set1_epi8(':' + 1);
777
  const __m256i dq = _mm256_set1_epi8('"');
778
  const __m256i prl = _mm256_set1_epi8('(');
779
  const __m256i prr = _mm256_set1_epi8(')');
780
  const __m256i comma = _mm256_set1_epi8(',');
781
  const __m256i r1l = _mm256_set1_epi8('A' - 1);
782
  const __m256i r1r = _mm256_set1_epi8('Z' + 1);
783
  const __m256i r2l = _mm256_set1_epi8('^' - 1);
784
  const __m256i r2r = _mm256_set1_epi8('~' + 1);
785
  const __m256i cbl = _mm256_set1_epi8('{');
786
  const __m256i cbr = _mm256_set1_epi8('}');
787
  __m256i s, x;
788
  uint32_t m;
789
790
  for (; first != last; first += 32) {
791
    s = _mm256_loadu_si256((void *)first);
792
793
    x = _mm256_andnot_si256(
794
      _mm256_cmpeq_epi8(s, comma),
795
      _mm256_andnot_si256(
796
        _mm256_cmpeq_epi8(s, prr),
797
        _mm256_andnot_si256(
798
          _mm256_cmpeq_epi8(s, prl),
799
          _mm256_andnot_si256(_mm256_cmpeq_epi8(s, dq),
800
                              _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l),
801
                                               _mm256_cmpgt_epi8(r0r, s))))));
802
    x = _mm256_or_si256(
803
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
804
      x);
805
    x = _mm256_or_si256(
806
      _mm256_andnot_si256(
807
        _mm256_cmpeq_epi8(s, cbr),
808
        _mm256_andnot_si256(_mm256_cmpeq_epi8(s, cbl),
809
                            _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l),
810
                                             _mm256_cmpgt_epi8(r2r, s)))),
811
      x);
812
813
    m = ~(uint32_t)_mm256_movemask_epi8(x);
814
    if (m) {
815
      return first + ctz(m);
816
    }
817
  }
818
819
  return last;
820
}
821
#endif /* __AVX2__ */
822
823
17.1k
static int parser_token(sfparse_parser *sfp, sfparse_value *dest) {
824
17.1k
  const uint8_t *base;
825
#ifdef __AVX2__
826
  const uint8_t *last;
827
#endif /* __AVX2__ */
828
829
  /* The first byte has already been validated by the caller. */
830
17.1k
  base = sfp->pos++;
831
832
#ifdef __AVX2__
833
  if (sfp->end - sfp->pos >= 32) {
834
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
835
836
    sfp->pos = find_char_token(sfp->pos, last);
837
    if (sfp->pos != last) {
838
      goto fin;
839
    }
840
  }
841
#endif /* __AVX2__ */
842
843
73.9k
  for (; !parser_eof(sfp); ++sfp->pos) {
844
67.1k
    switch (*sfp->pos) {
845
2.23M
    TOKEN_CASES:
846
2.23M
      continue;
847
67.1k
    }
848
849
10.3k
    break;
850
67.1k
  }
851
852
#ifdef __AVX2__
853
fin:
854
#endif /* __AVX2__ */
855
17.1k
  if (dest) {
856
8.27k
    dest->type = SFPARSE_TYPE_TOKEN;
857
8.27k
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
858
8.27k
    dest->vec.base = (uint8_t *)base;
859
8.27k
    dest->vec.len = (size_t)(sfp->pos - base);
860
8.27k
  }
861
862
17.1k
  return 0;
863
17.1k
}
864
865
#ifdef __AVX2__
866
static const uint8_t *find_char_byteseq(const uint8_t *first,
867
                                        const uint8_t *last) {
868
  const __m256i pls = _mm256_set1_epi8('+');
869
  const __m256i fs = _mm256_set1_epi8('/');
870
  const __m256i r0l = _mm256_set1_epi8('0' - 1);
871
  const __m256i r0r = _mm256_set1_epi8('9' + 1);
872
  const __m256i r1l = _mm256_set1_epi8('A' - 1);
873
  const __m256i r1r = _mm256_set1_epi8('Z' + 1);
874
  const __m256i r2l = _mm256_set1_epi8('a' - 1);
875
  const __m256i r2r = _mm256_set1_epi8('z' + 1);
876
  __m256i s, x;
877
  uint32_t m;
878
879
  for (; first != last; first += 32) {
880
    s = _mm256_loadu_si256((void *)first);
881
882
    x = _mm256_cmpeq_epi8(s, pls);
883
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, fs), x);
884
    x = _mm256_or_si256(
885
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)),
886
      x);
887
    x = _mm256_or_si256(
888
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
889
      x);
890
    x = _mm256_or_si256(
891
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), _mm256_cmpgt_epi8(r2r, s)),
892
      x);
893
894
    m = ~(uint32_t)_mm256_movemask_epi8(x);
895
    if (m) {
896
      return first + ctz(m);
897
    }
898
  }
899
900
  return last;
901
}
902
#endif /* __AVX2__ */
903
904
3.25k
static int parser_byteseq(sfparse_parser *sfp, sfparse_value *dest) {
905
3.25k
  const uint8_t *base;
906
#ifdef __AVX2__
907
  const uint8_t *last;
908
#endif /* __AVX2__ */
909
910
  /* The first byte has already been validated by the caller. */
911
3.25k
  assert(':' == *sfp->pos);
912
913
3.25k
  base = ++sfp->pos;
914
915
#ifdef __AVX2__
916
  if (sfp->end - sfp->pos >= 32) {
917
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
918
    sfp->pos = find_char_byteseq(sfp->pos, last);
919
  }
920
#endif /* __AVX2__ */
921
922
25.5k
  for (; !parser_eof(sfp); ++sfp->pos) {
923
25.3k
    switch (*sfp->pos) {
924
257
    case '+':
925
680
    case '/':
926
30.6k
    DIGIT_CASES:
927
30.6k
    ALPHA_CASES:
928
22.3k
      continue;
929
901
    case '=':
930
901
      switch ((sfp->pos - base) & 0x3) {
931
11
      case 0:
932
21
      case 1:
933
21
        return SFPARSE_ERR_PARSE;
934
578
      case 2:
935
578
        ++sfp->pos;
936
937
578
        if (parser_eof(sfp)) {
938
11
          return SFPARSE_ERR_PARSE;
939
11
        }
940
941
567
        if (*sfp->pos == '=') {
942
226
          ++sfp->pos;
943
226
        }
944
945
567
        break;
946
302
      case 3:
947
302
        ++sfp->pos;
948
949
302
        break;
950
901
      }
951
952
869
      if (parser_eof(sfp) || *sfp->pos != ':') {
953
36
        return SFPARSE_ERR_PARSE;
954
36
      }
955
956
833
      goto fin;
957
2.06k
    case ':':
958
2.06k
      if (((sfp->pos - base) & 0x3) == 1) {
959
10
        return SFPARSE_ERR_PARSE;
960
10
      }
961
962
2.05k
      goto fin;
963
2.05k
    default:
964
23
      return SFPARSE_ERR_PARSE;
965
25.3k
    }
966
25.3k
  }
967
968
271
  return SFPARSE_ERR_PARSE;
969
970
2.88k
fin:
971
2.88k
  if (dest) {
972
2.52k
    dest->type = SFPARSE_TYPE_BYTESEQ;
973
2.52k
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
974
2.52k
    dest->vec.len = (size_t)(sfp->pos - base);
975
2.52k
    dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
976
2.52k
  }
977
978
2.88k
  ++sfp->pos;
979
980
2.88k
  return 0;
981
3.25k
}
982
983
664
static int parser_boolean(sfparse_parser *sfp, sfparse_value *dest) {
984
664
  int b;
985
986
  /* The first byte has already been validated by the caller. */
987
664
  assert('?' == *sfp->pos);
988
989
664
  ++sfp->pos;
990
991
664
  if (parser_eof(sfp)) {
992
18
    return SFPARSE_ERR_PARSE;
993
18
  }
994
995
646
  switch (*sfp->pos) {
996
411
  case '0':
997
411
    b = 0;
998
999
411
    break;
1000
225
  case '1':
1001
225
    b = 1;
1002
1003
225
    break;
1004
10
  default:
1005
10
    return SFPARSE_ERR_PARSE;
1006
646
  }
1007
1008
636
  ++sfp->pos;
1009
1010
636
  if (dest) {
1011
212
    dest->type = SFPARSE_TYPE_BOOLEAN;
1012
212
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
1013
212
    dest->boolean = b;
1014
212
  }
1015
1016
636
  return 0;
1017
646
}
1018
1019
8.58k
static int pctdecode(uint8_t *pc, const uint8_t **ppos) {
1020
8.58k
  uint8_t c, b = **ppos;
1021
1022
8.58k
  switch (b) {
1023
5.06k
  DIGIT_CASES:
1024
5.06k
    c = (uint8_t)((b - '0') << 4);
1025
1026
5.06k
    break;
1027
3.50k
  LCHEXALPHA_CASES:
1028
3.50k
    c = (uint8_t)((b - 'a' + 10) << 4);
1029
1030
3.50k
    break;
1031
10
  default:
1032
10
    return -1;
1033
8.58k
  }
1034
1035
8.57k
  b = *++*ppos;
1036
1037
8.57k
  switch (b) {
1038
5.57k
  DIGIT_CASES:
1039
5.57k
    c |= (uint8_t)(b - '0');
1040
1041
5.57k
    break;
1042
2.98k
  LCHEXALPHA_CASES:
1043
2.98k
    c |= (uint8_t)(b - 'a' + 10);
1044
1045
2.98k
    break;
1046
13
  default:
1047
13
    return -1;
1048
8.57k
  }
1049
1050
8.56k
  *pc = c;
1051
8.56k
  ++*ppos;
1052
1053
8.56k
  return 0;
1054
8.57k
}
1055
1056
/* Start of utf8 dfa */
1057
/* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
1058
 * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
1059
 *
1060
 * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
1061
 *
1062
 * Permission is hereby granted, free of charge, to any person
1063
 * obtaining a copy of this software and associated documentation
1064
 * files (the "Software"), to deal in the Software without
1065
 * restriction, including without limitation the rights to use, copy,
1066
 * modify, merge, publish, distribute, sublicense, and/or sell copies
1067
 * of the Software, and to permit persons to whom the Software is
1068
 * furnished to do so, subject to the following conditions:
1069
 *
1070
 * The above copyright notice and this permission notice shall be
1071
 * included in all copies or substantial portions of the Software.
1072
 *
1073
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1074
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1075
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1076
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
1077
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
1078
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
1079
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1080
 * SOFTWARE.
1081
 */
1082
36.9k
#define UTF8_ACCEPT 0
1083
8.56k
#define UTF8_REJECT 12
1084
1085
/* clang-format off */
1086
static const uint8_t utf8d[] = {
1087
  /*
1088
   * The first part of the table maps bytes to character classes that
1089
   * to reduce the size of the transition table and create bitmasks.
1090
   */
1091
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1092
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1093
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1094
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1095
   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
1096
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
1097
   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1098
  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
1099
1100
   /*
1101
    * The second part is a transition table that maps a combination
1102
    * of a state of the automaton and a character class to a state.
1103
    */
1104
   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
1105
  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
1106
  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
1107
  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
1108
  12,36,12,12,12,12,12,12,12,12,12,12,
1109
};
1110
/* clang-format on */
1111
1112
8.56k
static void utf8_decode(uint32_t *state, uint8_t byte) {
1113
8.56k
  *state = utf8d[256 + *state + utf8d[byte]];
1114
8.56k
}
1115
1116
/* End of utf8 dfa */
1117
1118
4.15k
static int parser_dispstring(sfparse_parser *sfp, sfparse_value *dest) {
1119
4.15k
  const uint8_t *base;
1120
4.15k
  uint8_t c;
1121
4.15k
  uint32_t utf8state = UTF8_ACCEPT;
1122
1123
4.15k
  assert('%' == *sfp->pos);
1124
1125
4.15k
  ++sfp->pos;
1126
1127
4.15k
  if (parser_eof(sfp) || *sfp->pos != '"') {
1128
28
    return SFPARSE_ERR_PARSE;
1129
28
  }
1130
1131
4.12k
  base = ++sfp->pos;
1132
1133
42.7k
  for (; !parser_eof(sfp);) {
1134
42.7k
    switch (*sfp->pos) {
1135
230
    X00_1F_CASES:
1136
1.29k
    X7F_FF_CASES:
1137
1.29k
      return SFPARSE_ERR_PARSE;
1138
8.59k
    case '%':
1139
8.59k
      ++sfp->pos;
1140
1141
8.59k
      if (sfp->pos + 2 > sfp->end) {
1142
10
        return SFPARSE_ERR_PARSE;
1143
10
      }
1144
1145
8.58k
      if (pctdecode(&c, &sfp->pos) != 0) {
1146
23
        return SFPARSE_ERR_PARSE;
1147
23
      }
1148
1149
8.56k
      utf8_decode(&utf8state, c);
1150
8.56k
      if (utf8state == UTF8_REJECT) {
1151
16
        return SFPARSE_ERR_PARSE;
1152
16
      }
1153
1154
8.54k
      break;
1155
8.54k
    case '"':
1156
2.74k
      if (utf8state != UTF8_ACCEPT) {
1157
11
        return SFPARSE_ERR_PARSE;
1158
11
      }
1159
1160
2.72k
      if (dest) {
1161
1.87k
        dest->type = SFPARSE_TYPE_DISPSTRING;
1162
1.87k
        dest->flags = SFPARSE_VALUE_FLAG_NONE;
1163
1.87k
        dest->vec.len = (size_t)(sfp->pos - base);
1164
1.87k
        dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
1165
1.87k
      }
1166
1167
2.72k
      ++sfp->pos;
1168
1169
2.72k
      return 0;
1170
30.1k
    default:
1171
30.1k
      if (utf8state != UTF8_ACCEPT) {
1172
12
        return SFPARSE_ERR_PARSE;
1173
12
      }
1174
1175
30.0k
      ++sfp->pos;
1176
42.7k
    }
1177
42.7k
  }
1178
1179
26
  return SFPARSE_ERR_PARSE;
1180
4.12k
}
1181
1182
33.8k
static int parser_bare_item(sfparse_parser *sfp, sfparse_value *dest) {
1183
33.8k
  switch (*sfp->pos) {
1184
3.26k
  case '"':
1185
3.26k
    return parser_string(sfp, dest);
1186
342
  case '-':
1187
4.77k
  DIGIT_CASES:
1188
4.77k
    return parser_number(sfp, dest);
1189
587
  case '@':
1190
587
    return parser_date(sfp, dest);
1191
3.25k
  case ':':
1192
3.25k
    return parser_byteseq(sfp, dest);
1193
664
  case '?':
1194
664
    return parser_boolean(sfp, dest);
1195
790
  case '*':
1196
17.1k
  ALPHA_CASES:
1197
17.1k
    return parser_token(sfp, dest);
1198
4.15k
  case '%':
1199
4.15k
    return parser_dispstring(sfp, dest);
1200
30
  default:
1201
30
    return SFPARSE_ERR_PARSE;
1202
33.8k
  }
1203
33.8k
}
1204
1205
static int parser_skip_inner_list(sfparse_parser *sfp);
1206
1207
int sfparse_parser_param(sfparse_parser *sfp, sfparse_vec *dest_key,
1208
44.8k
                         sfparse_value *dest_value) {
1209
44.8k
  int rv;
1210
1211
44.8k
  switch (sfp->state & SFPARSE_STATE_OP_MASK) {
1212
0
  case SFPARSE_STATE_BEFORE:
1213
0
    rv = parser_skip_inner_list(sfp);
1214
0
    if (rv != 0) {
1215
0
      return rv;
1216
0
    }
1217
1218
    /* fall through */
1219
34.9k
  case SFPARSE_STATE_BEFORE_PARAMS:
1220
34.9k
    parser_set_op_state(sfp, SFPARSE_STATE_PARAMS);
1221
1222
34.9k
    break;
1223
9.92k
  case SFPARSE_STATE_PARAMS:
1224
9.92k
    break;
1225
0
  default:
1226
0
    assert(0);
1227
0
    abort();
1228
44.8k
  }
1229
1230
44.8k
  if (parser_eof(sfp) || *sfp->pos != ';') {
1231
33.8k
    parser_set_op_state(sfp, SFPARSE_STATE_AFTER);
1232
1233
33.8k
    return SFPARSE_ERR_EOF;
1234
33.8k
  }
1235
1236
10.9k
  ++sfp->pos;
1237
1238
10.9k
  parser_discard_sp(sfp);
1239
10.9k
  if (parser_eof(sfp)) {
1240
19
    return SFPARSE_ERR_PARSE;
1241
19
  }
1242
1243
10.9k
  rv = parser_key(sfp, dest_key);
1244
10.9k
  if (rv != 0) {
1245
21
    return rv;
1246
21
  }
1247
1248
10.9k
  if (parser_eof(sfp) || *sfp->pos != '=') {
1249
3.80k
    if (dest_value) {
1250
0
      dest_value->type = SFPARSE_TYPE_BOOLEAN;
1251
0
      dest_value->flags = SFPARSE_VALUE_FLAG_NONE;
1252
0
      dest_value->boolean = 1;
1253
0
    }
1254
1255
3.80k
    return 0;
1256
3.80k
  }
1257
1258
7.11k
  ++sfp->pos;
1259
1260
7.11k
  if (parser_eof(sfp)) {
1261
10
    return SFPARSE_ERR_PARSE;
1262
10
  }
1263
1264
7.10k
  return parser_bare_item(sfp, dest_value);
1265
7.11k
}
1266
1267
34.9k
static int parser_skip_params(sfparse_parser *sfp) {
1268
34.9k
  int rv;
1269
1270
44.8k
  for (;;) {
1271
44.8k
    rv = sfparse_parser_param(sfp, NULL, NULL);
1272
44.8k
    switch (rv) {
1273
9.92k
    case 0:
1274
9.92k
      break;
1275
33.8k
    case SFPARSE_ERR_EOF:
1276
33.8k
      return 0;
1277
1.02k
    case SFPARSE_ERR_PARSE:
1278
1.02k
      return rv;
1279
0
    default:
1280
0
      assert(0);
1281
0
      abort();
1282
44.8k
    }
1283
44.8k
  }
1284
34.9k
}
1285
1286
9.06k
int sfparse_parser_inner_list(sfparse_parser *sfp, sfparse_value *dest) {
1287
9.06k
  int rv;
1288
1289
9.06k
  switch (sfp->state & SFPARSE_STATE_OP_MASK) {
1290
1.83k
  case SFPARSE_STATE_BEFORE:
1291
1.83k
    parser_discard_sp(sfp);
1292
1.83k
    if (parser_eof(sfp)) {
1293
18
      return SFPARSE_ERR_PARSE;
1294
18
    }
1295
1296
1.81k
    break;
1297
7.23k
  case SFPARSE_STATE_BEFORE_PARAMS:
1298
7.23k
    rv = parser_skip_params(sfp);
1299
7.23k
    if (rv != 0) {
1300
220
      return rv;
1301
220
    }
1302
1303
    /* Technically, we are entering SFPARSE_STATE_AFTER, but we will set
1304
       another state without reading the state. */
1305
    /* parser_set_op_state(sfp, SFPARSE_STATE_AFTER); */
1306
1307
    /* fall through */
1308
7.01k
  case SFPARSE_STATE_AFTER:
1309
7.01k
    if (parser_eof(sfp)) {
1310
23
      return SFPARSE_ERR_PARSE;
1311
23
    }
1312
1313
6.99k
    switch (*sfp->pos) {
1314
5.62k
    case ' ':
1315
5.62k
      parser_discard_sp(sfp);
1316
5.62k
      if (parser_eof(sfp)) {
1317
0
        return SFPARSE_ERR_PARSE;
1318
0
      }
1319
1320
5.62k
      break;
1321
5.62k
    case ')':
1322
1.35k
      break;
1323
10
    default:
1324
10
      return SFPARSE_ERR_PARSE;
1325
6.99k
    }
1326
1327
6.98k
    break;
1328
6.98k
  default:
1329
0
    assert(0);
1330
0
    abort();
1331
9.06k
  }
1332
1333
8.79k
  if (*sfp->pos == ')') {
1334
1.48k
    ++sfp->pos;
1335
1336
1.48k
    parser_unset_inner_list_state(sfp);
1337
1.48k
    parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS);
1338
1339
1.48k
    return SFPARSE_ERR_EOF;
1340
1.48k
  }
1341
1342
7.31k
  rv = parser_bare_item(sfp, dest);
1343
7.31k
  if (rv != 0) {
1344
80
    return rv;
1345
80
  }
1346
1347
7.23k
  parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS);
1348
1349
7.23k
  return 0;
1350
7.31k
}
1351
1352
1.83k
static int parser_skip_inner_list(sfparse_parser *sfp) {
1353
1.83k
  int rv;
1354
1355
9.06k
  for (;;) {
1356
9.06k
    rv = sfparse_parser_inner_list(sfp, NULL);
1357
9.06k
    switch (rv) {
1358
7.23k
    case 0:
1359
7.23k
      break;
1360
1.48k
    case SFPARSE_ERR_EOF:
1361
1.48k
      return 0;
1362
351
    case SFPARSE_ERR_PARSE:
1363
351
      return rv;
1364
0
    default:
1365
0
      assert(0);
1366
0
      abort();
1367
9.06k
    }
1368
9.06k
  }
1369
1.83k
}
1370
1371
26.8k
static int parser_next_key_or_item(sfparse_parser *sfp) {
1372
26.8k
  parser_discard_ows(sfp);
1373
1374
26.8k
  if (parser_eof(sfp)) {
1375
21.6k
    return SFPARSE_ERR_EOF;
1376
21.6k
  }
1377
1378
5.24k
  if (*sfp->pos != ',') {
1379
57
    return SFPARSE_ERR_PARSE;
1380
57
  }
1381
1382
5.19k
  ++sfp->pos;
1383
1384
5.19k
  parser_discard_ows(sfp);
1385
5.19k
  if (parser_eof(sfp)) {
1386
18
    return SFPARSE_ERR_PARSE;
1387
18
  }
1388
1389
5.17k
  return 0;
1390
5.19k
}
1391
1392
29.5k
static int parser_dict_value(sfparse_parser *sfp, sfparse_value *dest) {
1393
29.5k
  int rv;
1394
1395
29.5k
  if (parser_eof(sfp) || *(sfp->pos) != '=') {
1396
    /* Boolean true */
1397
8.25k
    if (dest) {
1398
8.25k
      dest->type = SFPARSE_TYPE_BOOLEAN;
1399
8.25k
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1400
8.25k
      dest->boolean = 1;
1401
8.25k
    }
1402
1403
8.25k
    sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS;
1404
1405
8.25k
    return 0;
1406
8.25k
  }
1407
1408
21.3k
  ++sfp->pos;
1409
1410
21.3k
  if (parser_eof(sfp)) {
1411
20
    return SFPARSE_ERR_PARSE;
1412
20
  }
1413
1414
21.2k
  if (*sfp->pos == '(') {
1415
1.85k
    if (dest) {
1416
1.85k
      dest->type = SFPARSE_TYPE_INNER_LIST;
1417
1.85k
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1418
1.85k
    }
1419
1420
1.85k
    ++sfp->pos;
1421
1422
1.85k
    sfp->state = SFPARSE_STATE_DICT_INNER_LIST_BEFORE;
1423
1424
1.85k
    return 0;
1425
1.85k
  }
1426
1427
19.4k
  rv = parser_bare_item(sfp, dest);
1428
19.4k
  if (rv != 0) {
1429
1.37k
    return rv;
1430
1.37k
  }
1431
1432
18.0k
  sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS;
1433
1434
18.0k
  return 0;
1435
19.4k
}
1436
1437
int sfparse_parser_dict(sfparse_parser *sfp, sfparse_vec *dest_key,
1438
52.7k
                        sfparse_value *dest_value) {
1439
52.7k
  int rv;
1440
1441
52.7k
  switch (sfp->state) {
1442
1.83k
  case SFPARSE_STATE_DICT_INNER_LIST_BEFORE:
1443
1.83k
    rv = parser_skip_inner_list(sfp);
1444
1.83k
    if (rv != 0) {
1445
351
      return rv;
1446
351
    }
1447
1448
    /* fall through */
1449
27.6k
  case SFPARSE_STATE_DICT_BEFORE_PARAMS:
1450
27.6k
    rv = parser_skip_params(sfp);
1451
27.6k
    if (rv != 0) {
1452
803
      return rv;
1453
803
    }
1454
1455
    /* fall through */
1456
26.8k
  case SFPARSE_STATE_DICT_AFTER:
1457
26.8k
    rv = parser_next_key_or_item(sfp);
1458
26.8k
    if (rv != 0) {
1459
21.7k
      return rv;
1460
21.7k
    }
1461
1462
5.17k
    break;
1463
24.6k
  case SFPARSE_STATE_INITIAL:
1464
24.6k
    parser_discard_sp(sfp);
1465
1466
24.6k
    if (parser_eof(sfp)) {
1467
260
      return SFPARSE_ERR_EOF;
1468
260
    }
1469
1470
24.4k
    break;
1471
24.4k
  default:
1472
0
    assert(0);
1473
0
    abort();
1474
52.7k
  }
1475
1476
29.5k
  rv = parser_key(sfp, dest_key);
1477
29.5k
  if (rv != 0) {
1478
35
    return rv;
1479
35
  }
1480
1481
29.5k
  return parser_dict_value(sfp, dest_value);
1482
29.5k
}
1483
1484
0
int sfparse_parser_list(sfparse_parser *sfp, sfparse_value *dest) {
1485
0
  int rv;
1486
1487
0
  switch (sfp->state) {
1488
0
  case SFPARSE_STATE_LIST_INNER_LIST_BEFORE:
1489
0
    rv = parser_skip_inner_list(sfp);
1490
0
    if (rv != 0) {
1491
0
      return rv;
1492
0
    }
1493
1494
    /* fall through */
1495
0
  case SFPARSE_STATE_LIST_BEFORE_PARAMS:
1496
0
    rv = parser_skip_params(sfp);
1497
0
    if (rv != 0) {
1498
0
      return rv;
1499
0
    }
1500
1501
    /* fall through */
1502
0
  case SFPARSE_STATE_LIST_AFTER:
1503
0
    rv = parser_next_key_or_item(sfp);
1504
0
    if (rv != 0) {
1505
0
      return rv;
1506
0
    }
1507
1508
0
    break;
1509
0
  case SFPARSE_STATE_INITIAL:
1510
0
    parser_discard_sp(sfp);
1511
1512
0
    if (parser_eof(sfp)) {
1513
0
      return SFPARSE_ERR_EOF;
1514
0
    }
1515
1516
0
    break;
1517
0
  default:
1518
0
    assert(0);
1519
0
    abort();
1520
0
  }
1521
1522
0
  if (*sfp->pos == '(') {
1523
0
    if (dest) {
1524
0
      dest->type = SFPARSE_TYPE_INNER_LIST;
1525
0
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1526
0
    }
1527
1528
0
    ++sfp->pos;
1529
1530
0
    sfp->state = SFPARSE_STATE_LIST_INNER_LIST_BEFORE;
1531
1532
0
    return 0;
1533
0
  }
1534
1535
0
  rv = parser_bare_item(sfp, dest);
1536
0
  if (rv != 0) {
1537
0
    return rv;
1538
0
  }
1539
1540
0
  sfp->state = SFPARSE_STATE_LIST_BEFORE_PARAMS;
1541
1542
0
  return 0;
1543
0
}
1544
1545
0
int sfparse_parser_item(sfparse_parser *sfp, sfparse_value *dest) {
1546
0
  int rv;
1547
1548
0
  switch (sfp->state) {
1549
0
  case SFPARSE_STATE_INITIAL:
1550
0
    parser_discard_sp(sfp);
1551
1552
0
    if (parser_eof(sfp)) {
1553
0
      return SFPARSE_ERR_PARSE;
1554
0
    }
1555
1556
0
    break;
1557
0
  case SFPARSE_STATE_ITEM_INNER_LIST_BEFORE:
1558
0
    rv = parser_skip_inner_list(sfp);
1559
0
    if (rv != 0) {
1560
0
      return rv;
1561
0
    }
1562
1563
    /* fall through */
1564
0
  case SFPARSE_STATE_ITEM_BEFORE_PARAMS:
1565
0
    rv = parser_skip_params(sfp);
1566
0
    if (rv != 0) {
1567
0
      return rv;
1568
0
    }
1569
1570
    /* fall through */
1571
0
  case SFPARSE_STATE_ITEM_AFTER:
1572
0
    parser_discard_sp(sfp);
1573
1574
0
    if (!parser_eof(sfp)) {
1575
0
      return SFPARSE_ERR_PARSE;
1576
0
    }
1577
1578
0
    return SFPARSE_ERR_EOF;
1579
0
  default:
1580
0
    assert(0);
1581
0
    abort();
1582
0
  }
1583
1584
0
  if (*sfp->pos == '(') {
1585
0
    if (dest) {
1586
0
      dest->type = SFPARSE_TYPE_INNER_LIST;
1587
0
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1588
0
    }
1589
1590
0
    ++sfp->pos;
1591
1592
0
    sfp->state = SFPARSE_STATE_ITEM_INNER_LIST_BEFORE;
1593
1594
0
    return 0;
1595
0
  }
1596
1597
0
  rv = parser_bare_item(sfp, dest);
1598
0
  if (rv != 0) {
1599
0
    return rv;
1600
0
  }
1601
1602
0
  sfp->state = SFPARSE_STATE_ITEM_BEFORE_PARAMS;
1603
1604
0
  return 0;
1605
0
}
1606
1607
void sfparse_parser_init(sfparse_parser *sfp, const uint8_t *data,
1608
24.6k
                         size_t datalen) {
1609
24.6k
  if (datalen == 0) {
1610
260
    sfp->pos = sfp->end = NULL;
1611
24.4k
  } else {
1612
24.4k
    sfp->pos = data;
1613
24.4k
    sfp->end = data + datalen;
1614
24.4k
  }
1615
1616
24.6k
  sfp->state = SFPARSE_STATE_INITIAL;
1617
24.6k
}
1618
1619
0
void sfparse_unescape(sfparse_vec *dest, const sfparse_vec *src) {
1620
0
  const uint8_t *p, *q;
1621
0
  uint8_t *o;
1622
0
  size_t len, slen;
1623
1624
0
  if (src->len == 0) {
1625
0
    dest->len = 0;
1626
1627
0
    return;
1628
0
  }
1629
1630
0
  o = dest->base;
1631
0
  p = src->base;
1632
0
  len = src->len;
1633
1634
0
  for (;;) {
1635
0
    q = memchr(p, '\\', len);
1636
0
    if (q == NULL) {
1637
0
      memcpy(o, p, len);
1638
0
      o += len;
1639
1640
0
      dest->len = (size_t)(o - dest->base);
1641
1642
0
      return;
1643
0
    }
1644
1645
0
    slen = (size_t)(q - p);
1646
0
    memcpy(o, p, slen);
1647
0
    o += slen;
1648
1649
0
    p = q + 1;
1650
0
    *o++ = *p++;
1651
0
    len -= slen + 2;
1652
0
  }
1653
0
}
1654
1655
0
void sfparse_base64decode(sfparse_vec *dest, const sfparse_vec *src) {
1656
0
  static const int index_tbl[] = {
1657
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1658
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1659
0
    -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
1660
0
    61, -1, -1, -1, -1, -1, -1, -1, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
1661
0
    11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1,
1662
0
    -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
1663
0
    43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1664
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1665
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1666
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1667
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1668
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1669
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1670
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1};
1671
0
  uint8_t *o;
1672
0
  const uint8_t *p, *end;
1673
0
  uint32_t n;
1674
0
  size_t i, left;
1675
0
  int idx;
1676
1677
0
  if (src->len == 0) {
1678
0
    dest->len = 0;
1679
1680
0
    return;
1681
0
  }
1682
1683
0
  o = dest->base;
1684
0
  p = src->base;
1685
0
  left = src->len & 0x3;
1686
0
  if (left == 0 && src->base[src->len - 1] == '=') {
1687
0
    left = 4;
1688
0
  }
1689
0
  end = src->base + src->len - left;
1690
1691
0
  for (; p != end;) {
1692
0
    n = 0;
1693
1694
0
    for (i = 1; i <= 4; ++i, ++p) {
1695
0
      idx = index_tbl[*p];
1696
1697
0
      assert(idx != -1);
1698
1699
0
      n += (uint32_t)(idx << (24 - i * 6));
1700
0
    }
1701
1702
0
    *o++ = (uint8_t)(n >> 16);
1703
0
    *o++ = (n >> 8) & 0xffu;
1704
0
    *o++ = n & 0xffu;
1705
0
  }
1706
1707
0
  switch (left) {
1708
0
  case 0:
1709
0
    goto fin;
1710
0
  case 1:
1711
0
    assert(0);
1712
0
    abort();
1713
0
  case 3:
1714
0
    if (src->base[src->len - 1] == '=') {
1715
0
      left = 2;
1716
0
    }
1717
1718
0
    break;
1719
0
  case 4:
1720
0
    assert('=' == src->base[src->len - 1]);
1721
1722
0
    if (src->base[src->len - 2] == '=') {
1723
0
      left = 2;
1724
0
    } else {
1725
0
      left = 3;
1726
0
    }
1727
1728
0
    break;
1729
0
  }
1730
1731
0
  switch (left) {
1732
0
  case 2:
1733
0
    *o = (uint8_t)(index_tbl[*p++] << 2);
1734
0
    *o++ |= (uint8_t)(index_tbl[*p++] >> 4);
1735
1736
0
    break;
1737
0
  case 3:
1738
0
    n = (uint32_t)(index_tbl[*p++] << 10);
1739
0
    n += (uint32_t)(index_tbl[*p++] << 4);
1740
0
    n += (uint32_t)(index_tbl[*p++] >> 2);
1741
0
    *o++ = (n >> 8) & 0xffu;
1742
0
    *o++ = n & 0xffu;
1743
1744
0
    break;
1745
0
  }
1746
1747
0
fin:
1748
0
  dest->len = (size_t)(o - dest->base);
1749
0
}
1750
1751
0
void sfparse_pctdecode(sfparse_vec *dest, const sfparse_vec *src) {
1752
0
  const uint8_t *p, *q;
1753
0
  uint8_t *o;
1754
0
  size_t len, slen;
1755
1756
0
  if (src->len == 0) {
1757
0
    dest->len = 0;
1758
1759
0
    return;
1760
0
  }
1761
1762
0
  o = dest->base;
1763
0
  p = src->base;
1764
0
  len = src->len;
1765
1766
0
  for (;;) {
1767
0
    q = memchr(p, '%', len);
1768
0
    if (q == NULL) {
1769
0
      memcpy(o, p, len);
1770
0
      o += len;
1771
1772
0
      dest->len = (size_t)(o - dest->base);
1773
1774
0
      return;
1775
0
    }
1776
1777
0
    slen = (size_t)(q - p);
1778
0
    memcpy(o, p, slen);
1779
0
    o += slen;
1780
1781
0
    p = q + 1;
1782
1783
0
    pctdecode(o++, &p);
1784
1785
0
    len -= slen + 3;
1786
0
  }
1787
0
}