Coverage Report

Created: 2025-08-29 06:03

/src/nghttp2/lib/sfparse.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * sfparse
3
 *
4
 * Copyright (c) 2023 sfparse contributors
5
 * Copyright (c) 2019 nghttp3 contributors
6
 * Copyright (c) 2015 nghttp2 contributors
7
 *
8
 * Permission is hereby granted, free of charge, to any person obtaining
9
 * a copy of this software and associated documentation files (the
10
 * "Software"), to deal in the Software without restriction, including
11
 * without limitation the rights to use, copy, modify, merge, publish,
12
 * distribute, sublicense, and/or sell copies of the Software, and to
13
 * permit persons to whom the Software is furnished to do so, subject to
14
 * the following conditions:
15
 *
16
 * The above copyright notice and this permission notice shall be
17
 * included in all copies or substantial portions of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
 */
27
#include "sfparse.h"
28
29
#include <string.h>
30
#include <assert.h>
31
#include <stdlib.h>
32
33
#ifdef __AVX2__
34
#  include <immintrin.h>
35
#endif /* __AVX2__ */
36
37
172k
#define SFPARSE_STATE_DICT 0x08u
38
0
#define SFPARSE_STATE_LIST 0x10u
39
0
#define SFPARSE_STATE_ITEM 0x18u
40
41
16.1k
#define SFPARSE_STATE_INNER_LIST 0x04u
42
43
16.2k
#define SFPARSE_STATE_BEFORE 0x00u
44
218k
#define SFPARSE_STATE_BEFORE_PARAMS 0x01u
45
89.5k
#define SFPARSE_STATE_PARAMS 0x02u
46
144k
#define SFPARSE_STATE_AFTER 0x03u
47
48
279k
#define SFPARSE_STATE_OP_MASK 0x03u
49
50
#define SFPARSE_SET_STATE_AFTER(NAME)                                          \
51
55.3k
  (SFPARSE_STATE_##NAME | SFPARSE_STATE_AFTER)
52
#define SFPARSE_SET_STATE_BEFORE_PARAMS(NAME)                                  \
53
106k
  (SFPARSE_STATE_##NAME | SFPARSE_STATE_BEFORE_PARAMS)
54
#define SFPARSE_SET_STATE_INNER_LIST_BEFORE(NAME)                              \
55
10.8k
  (SFPARSE_STATE_##NAME | SFPARSE_STATE_INNER_LIST | SFPARSE_STATE_BEFORE)
56
57
55.3k
#define SFPARSE_STATE_DICT_AFTER SFPARSE_SET_STATE_AFTER(DICT)
58
106k
#define SFPARSE_STATE_DICT_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(DICT)
59
#define SFPARSE_STATE_DICT_INNER_LIST_BEFORE                                   \
60
10.8k
  SFPARSE_SET_STATE_INNER_LIST_BEFORE(DICT)
61
62
0
#define SFPARSE_STATE_LIST_AFTER SFPARSE_SET_STATE_AFTER(LIST)
63
0
#define SFPARSE_STATE_LIST_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(LIST)
64
#define SFPARSE_STATE_LIST_INNER_LIST_BEFORE                                   \
65
0
  SFPARSE_SET_STATE_INNER_LIST_BEFORE(LIST)
66
67
0
#define SFPARSE_STATE_ITEM_AFTER SFPARSE_SET_STATE_AFTER(ITEM)
68
0
#define SFPARSE_STATE_ITEM_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(ITEM)
69
#define SFPARSE_STATE_ITEM_INNER_LIST_BEFORE                                   \
70
0
  SFPARSE_SET_STATE_INNER_LIST_BEFORE(ITEM)
71
72
102k
#define SFPARSE_STATE_INITIAL 0x00u
73
74
#define DIGIT_CASES                                                            \
75
48.3k
  case '0':                                                                    \
76
61.5k
  case '1':                                                                    \
77
72.4k
  case '2':                                                                    \
78
79.7k
  case '3':                                                                    \
79
86.5k
  case '4':                                                                    \
80
92.5k
  case '5':                                                                    \
81
99.7k
  case '6':                                                                    \
82
106k
  case '7':                                                                    \
83
114k
  case '8':                                                                    \
84
121k
  case '9'
85
86
#define LCALPHA_CASES                                                          \
87
170k
  case 'a':                                                                    \
88
175k
  case 'b':                                                                    \
89
185k
  case 'c':                                                                    \
90
191k
  case 'd':                                                                    \
91
201k
  case 'e':                                                                    \
92
208k
  case 'f':                                                                    \
93
215k
  case 'g':                                                                    \
94
223k
  case 'h':                                                                    \
95
234k
  case 'i':                                                                    \
96
240k
  case 'j':                                                                    \
97
246k
  case 'k':                                                                    \
98
256k
  case 'l':                                                                    \
99
285k
  case 'm':                                                                    \
100
293k
  case 'n':                                                                    \
101
306k
  case 'o':                                                                    \
102
313k
  case 'p':                                                                    \
103
320k
  case 'q':                                                                    \
104
329k
  case 'r':                                                                    \
105
338k
  case 's':                                                                    \
106
351k
  case 't':                                                                    \
107
359k
  case 'u':                                                                    \
108
366k
  case 'v':                                                                    \
109
373k
  case 'w':                                                                    \
110
382k
  case 'x':                                                                    \
111
395k
  case 'y':                                                                    \
112
402k
  case 'z'
113
114
#define UCALPHA_CASES                                                          \
115
57.5k
  case 'A':                                                                    \
116
59.9k
  case 'B':                                                                    \
117
62.2k
  case 'C':                                                                    \
118
64.9k
  case 'D':                                                                    \
119
67.3k
  case 'E':                                                                    \
120
69.6k
  case 'F':                                                                    \
121
71.7k
  case 'G':                                                                    \
122
74.4k
  case 'H':                                                                    \
123
76.9k
  case 'I':                                                                    \
124
79.6k
  case 'J':                                                                    \
125
81.7k
  case 'K':                                                                    \
126
84.4k
  case 'L':                                                                    \
127
86.9k
  case 'M':                                                                    \
128
89.0k
  case 'N':                                                                    \
129
91.5k
  case 'O':                                                                    \
130
94.9k
  case 'P':                                                                    \
131
97.1k
  case 'Q':                                                                    \
132
99.8k
  case 'R':                                                                    \
133
102k
  case 'S':                                                                    \
134
105k
  case 'T':                                                                    \
135
107k
  case 'U':                                                                    \
136
109k
  case 'V':                                                                    \
137
112k
  case 'W':                                                                    \
138
115k
  case 'X':                                                                    \
139
117k
  case 'Y':                                                                    \
140
119k
  case 'Z'
141
142
#define ALPHA_CASES                                                            \
143
39.5k
  UCALPHA_CASES:                                                               \
144
40.9k
  LCALPHA_CASES
145
146
#define TOKEN_CASES                                                            \
147
777
  case '!':                                                                    \
148
1.31k
  case '#':                                                                    \
149
2.03k
  case '$':                                                                    \
150
3.10k
  case '%':                                                                    \
151
3.80k
  case '&':                                                                    \
152
4.49k
  case '\'':                                                                   \
153
5.73k
  case '*':                                                                    \
154
6.55k
  case '+':                                                                    \
155
7.34k
  case '-':                                                                    \
156
8.87k
  case '.':                                                                    \
157
9.94k
  case '/':                                                                    \
158
22.7k
  DIGIT_CASES:                                                                 \
159
24.2k
  case ':':                                                                    \
160
46.4k
  UCALPHA_CASES:                                                               \
161
46.9k
  case '^':                                                                    \
162
47.5k
  case '_':                                                                    \
163
48.2k
  case '`':                                                                    \
164
77.4k
  LCALPHA_CASES:                                                               \
165
78.0k
  case '|':                                                                    \
166
78.5k
  case '~'
167
168
#define LCHEXALPHA_CASES                                                       \
169
3.24k
  case 'a':                                                                    \
170
5.51k
  case 'b':                                                                    \
171
7.70k
  case 'c':                                                                    \
172
10.0k
  case 'd':                                                                    \
173
11.5k
  case 'e':                                                                    \
174
12.6k
  case 'f'
175
176
#define X00_1F_CASES                                                           \
177
0
  case 0x00:                                                                   \
178
0
  case 0x01:                                                                   \
179
0
  case 0x02:                                                                   \
180
0
  case 0x03:                                                                   \
181
0
  case 0x04:                                                                   \
182
0
  case 0x05:                                                                   \
183
0
  case 0x06:                                                                   \
184
0
  case 0x07:                                                                   \
185
0
  case 0x08:                                                                   \
186
11
  case 0x09:                                                                   \
187
11
  case 0x0a:                                                                   \
188
11
  case 0x0b:                                                                   \
189
11
  case 0x0c:                                                                   \
190
11
  case 0x0d:                                                                   \
191
11
  case 0x0e:                                                                   \
192
11
  case 0x0f:                                                                   \
193
11
  case 0x10:                                                                   \
194
11
  case 0x11:                                                                   \
195
11
  case 0x12:                                                                   \
196
11
  case 0x13:                                                                   \
197
11
  case 0x14:                                                                   \
198
11
  case 0x15:                                                                   \
199
11
  case 0x16:                                                                   \
200
11
  case 0x17:                                                                   \
201
11
  case 0x18:                                                                   \
202
11
  case 0x19:                                                                   \
203
11
  case 0x1a:                                                                   \
204
11
  case 0x1b:                                                                   \
205
11
  case 0x1c:                                                                   \
206
11
  case 0x1d:                                                                   \
207
11
  case 0x1e:                                                                   \
208
11
  case 0x1f
209
210
#define X20_21_CASES                                                           \
211
722
  case ' ':                                                                    \
212
1.27k
  case '!'
213
214
#define X23_5B_CASES                                                           \
215
2.03k
  case '#':                                                                    \
216
2.55k
  case '$':                                                                    \
217
3.85k
  case '%':                                                                    \
218
4.35k
  case '&':                                                                    \
219
4.97k
  case '\'':                                                                   \
220
5.54k
  case '(':                                                                    \
221
6.07k
  case ')':                                                                    \
222
6.78k
  case '*':                                                                    \
223
7.36k
  case '+':                                                                    \
224
7.99k
  case ',':                                                                    \
225
8.56k
  case '-':                                                                    \
226
9.11k
  case '.':                                                                    \
227
9.84k
  case '/':                                                                    \
228
16.3k
  DIGIT_CASES:                                                                 \
229
16.9k
  case ':':                                                                    \
230
17.5k
  case ';':                                                                    \
231
18.1k
  case '<':                                                                    \
232
18.6k
  case '=':                                                                    \
233
19.2k
  case '>':                                                                    \
234
19.8k
  case '?':                                                                    \
235
20.3k
  case '@':                                                                    \
236
33.6k
  UCALPHA_CASES:                                                               \
237
34.1k
  case '['
238
239
#define X5D_7E_CASES                                                           \
240
34.7k
  case ']':                                                                    \
241
35.3k
  case '^':                                                                    \
242
35.9k
  case '_':                                                                    \
243
36.3k
  case '`':                                                                    \
244
52.1k
  LCALPHA_CASES:                                                               \
245
52.6k
  case '{':                                                                    \
246
53.1k
  case '|':                                                                    \
247
53.5k
  case '}':                                                                    \
248
54.1k
  case '~'
249
250
#define X7F_FF_CASES                                                           \
251
11
  case 0x7f:                                                                   \
252
27
  case 0x80:                                                                   \
253
43
  case 0x81:                                                                   \
254
59
  case 0x82:                                                                   \
255
79
  case 0x83:                                                                   \
256
95
  case 0x84:                                                                   \
257
111
  case 0x85:                                                                   \
258
128
  case 0x86:                                                                   \
259
144
  case 0x87:                                                                   \
260
160
  case 0x88:                                                                   \
261
176
  case 0x89:                                                                   \
262
196
  case 0x8a:                                                                   \
263
212
  case 0x8b:                                                                   \
264
228
  case 0x8c:                                                                   \
265
244
  case 0x8d:                                                                   \
266
260
  case 0x8e:                                                                   \
267
276
  case 0x8f:                                                                   \
268
292
  case 0x90:                                                                   \
269
308
  case 0x91:                                                                   \
270
324
  case 0x92:                                                                   \
271
340
  case 0x93:                                                                   \
272
356
  case 0x94:                                                                   \
273
372
  case 0x95:                                                                   \
274
388
  case 0x96:                                                                   \
275
404
  case 0x97:                                                                   \
276
420
  case 0x98:                                                                   \
277
436
  case 0x99:                                                                   \
278
452
  case 0x9a:                                                                   \
279
468
  case 0x9b:                                                                   \
280
484
  case 0x9c:                                                                   \
281
500
  case 0x9d:                                                                   \
282
516
  case 0x9e:                                                                   \
283
532
  case 0x9f:                                                                   \
284
548
  case 0xa0:                                                                   \
285
568
  case 0xa1:                                                                   \
286
584
  case 0xa2:                                                                   \
287
600
  case 0xa3:                                                                   \
288
616
  case 0xa4:                                                                   \
289
632
  case 0xa5:                                                                   \
290
648
  case 0xa6:                                                                   \
291
664
  case 0xa7:                                                                   \
292
680
  case 0xa8:                                                                   \
293
696
  case 0xa9:                                                                   \
294
712
  case 0xaa:                                                                   \
295
728
  case 0xab:                                                                   \
296
748
  case 0xac:                                                                   \
297
764
  case 0xad:                                                                   \
298
780
  case 0xae:                                                                   \
299
796
  case 0xaf:                                                                   \
300
812
  case 0xb0:                                                                   \
301
828
  case 0xb1:                                                                   \
302
844
  case 0xb2:                                                                   \
303
860
  case 0xb3:                                                                   \
304
876
  case 0xb4:                                                                   \
305
892
  case 0xb5:                                                                   \
306
908
  case 0xb6:                                                                   \
307
924
  case 0xb7:                                                                   \
308
940
  case 0xb8:                                                                   \
309
956
  case 0xb9:                                                                   \
310
972
  case 0xba:                                                                   \
311
989
  case 0xbb:                                                                   \
312
1.00k
  case 0xbc:                                                                   \
313
1.02k
  case 0xbd:                                                                   \
314
1.04k
  case 0xbe:                                                                   \
315
1.05k
  case 0xbf:                                                                   \
316
1.07k
  case 0xc0:                                                                   \
317
1.09k
  case 0xc1:                                                                   \
318
1.11k
  case 0xc2:                                                                   \
319
1.12k
  case 0xc3:                                                                   \
320
1.14k
  case 0xc4:                                                                   \
321
1.16k
  case 0xc5:                                                                   \
322
1.17k
  case 0xc6:                                                                   \
323
1.19k
  case 0xc7:                                                                   \
324
1.21k
  case 0xc8:                                                                   \
325
1.22k
  case 0xc9:                                                                   \
326
1.24k
  case 0xca:                                                                   \
327
1.26k
  case 0xcb:                                                                   \
328
1.27k
  case 0xcc:                                                                   \
329
1.29k
  case 0xcd:                                                                   \
330
1.30k
  case 0xce:                                                                   \
331
1.32k
  case 0xcf:                                                                   \
332
1.34k
  case 0xd0:                                                                   \
333
1.35k
  case 0xd1:                                                                   \
334
1.37k
  case 0xd2:                                                                   \
335
1.39k
  case 0xd3:                                                                   \
336
1.40k
  case 0xd4:                                                                   \
337
1.42k
  case 0xd5:                                                                   \
338
1.44k
  case 0xd6:                                                                   \
339
1.45k
  case 0xd7:                                                                   \
340
1.47k
  case 0xd8:                                                                   \
341
1.49k
  case 0xd9:                                                                   \
342
1.50k
  case 0xda:                                                                   \
343
1.52k
  case 0xdb:                                                                   \
344
1.53k
  case 0xdc:                                                                   \
345
1.55k
  case 0xdd:                                                                   \
346
1.57k
  case 0xde:                                                                   \
347
1.59k
  case 0xdf:                                                                   \
348
1.60k
  case 0xe0:                                                                   \
349
1.62k
  case 0xe1:                                                                   \
350
1.63k
  case 0xe2:                                                                   \
351
1.65k
  case 0xe3:                                                                   \
352
1.67k
  case 0xe4:                                                                   \
353
1.69k
  case 0xe5:                                                                   \
354
1.70k
  case 0xe6:                                                                   \
355
1.72k
  case 0xe7:                                                                   \
356
1.73k
  case 0xe8:                                                                   \
357
1.75k
  case 0xe9:                                                                   \
358
1.77k
  case 0xea:                                                                   \
359
1.79k
  case 0xeb:                                                                   \
360
1.80k
  case 0xec:                                                                   \
361
1.82k
  case 0xed:                                                                   \
362
1.83k
  case 0xee:                                                                   \
363
1.85k
  case 0xef:                                                                   \
364
1.87k
  case 0xf0:                                                                   \
365
1.88k
  case 0xf1:                                                                   \
366
1.90k
  case 0xf2:                                                                   \
367
1.91k
  case 0xf3:                                                                   \
368
1.93k
  case 0xf4:                                                                   \
369
1.95k
  case 0xf5:                                                                   \
370
1.96k
  case 0xf6:                                                                   \
371
1.98k
  case 0xf7:                                                                   \
372
1.99k
  case 0xf8:                                                                   \
373
2.01k
  case 0xf9:                                                                   \
374
2.03k
  case 0xfa:                                                                   \
375
2.05k
  case 0xfb:                                                                   \
376
2.06k
  case 0xfc:                                                                   \
377
2.08k
  case 0xfd:                                                                   \
378
2.09k
  case 0xfe:                                                                   \
379
2.11k
  case 0xff
380
381
19.8k
static int is_ws(uint8_t c) {
382
19.8k
  switch (c) {
383
740
  case ' ':
384
1.45k
  case '\t':
385
1.45k
    return 1;
386
18.3k
  default:
387
18.3k
    return 0;
388
19.8k
  }
389
19.8k
}
390
391
#ifdef __AVX2__
392
#  ifdef _MSC_VER
393
#    include <intrin.h>
394
395
static int ctz(unsigned int v) {
396
  unsigned long n;
397
398
  /* Assume that v is not 0. */
399
  _BitScanForward(&n, v);
400
401
  return (int)n;
402
}
403
#  else /* !_MSC_VER */
404
#    define ctz __builtin_ctz
405
#  endif /* !_MSC_VER */
406
#endif   /* __AVX2__ */
407
408
1.09M
static int parser_eof(sfparse_parser *sfp) { return sfp->pos == sfp->end; }
409
410
64.4k
static void parser_discard_ows(sfparse_parser *sfp) {
411
65.9k
  for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos)
412
1.45k
    ;
413
64.4k
}
414
415
85.9k
static void parser_discard_sp(sfparse_parser *sfp) {
416
101k
  for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos)
417
15.1k
    ;
418
85.9k
}
419
420
167k
static void parser_set_op_state(sfparse_parser *sfp, uint32_t op) {
421
167k
  sfp->state &= ~SFPARSE_STATE_OP_MASK;
422
167k
  sfp->state |= op;
423
167k
}
424
425
5.25k
static void parser_unset_inner_list_state(sfparse_parser *sfp) {
426
5.25k
  sfp->state &= ~SFPARSE_STATE_INNER_LIST;
427
5.25k
}
428
429
#ifdef __AVX2__
430
static const uint8_t *find_char_key(const uint8_t *first, const uint8_t *last) {
431
  const __m256i us = _mm256_set1_epi8('_');
432
  const __m256i ds = _mm256_set1_epi8('-');
433
  const __m256i dot = _mm256_set1_epi8('.');
434
  const __m256i ast = _mm256_set1_epi8('*');
435
  const __m256i r0l = _mm256_set1_epi8('0' - 1);
436
  const __m256i r0r = _mm256_set1_epi8('9' + 1);
437
  const __m256i r1l = _mm256_set1_epi8('a' - 1);
438
  const __m256i r1r = _mm256_set1_epi8('z' + 1);
439
  __m256i s, x;
440
  uint32_t m;
441
442
  for (; first != last; first += 32) {
443
    s = _mm256_loadu_si256((void *)first);
444
445
    x = _mm256_cmpeq_epi8(s, us);
446
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ds), x);
447
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dot), x);
448
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ast), x);
449
    x = _mm256_or_si256(
450
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)),
451
      x);
452
    x = _mm256_or_si256(
453
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
454
      x);
455
456
    m = ~(uint32_t)_mm256_movemask_epi8(x);
457
    if (m) {
458
      return first + ctz(m);
459
    }
460
  }
461
462
  return last;
463
}
464
#endif /* __AVX2__ */
465
466
77.1k
static int parser_key(sfparse_parser *sfp, sfparse_vec *dest) {
467
77.1k
  const uint8_t *base;
468
#ifdef __AVX2__
469
  const uint8_t *last;
470
#endif /* __AVX2__ */
471
472
77.1k
  switch (*sfp->pos) {
473
5.76k
  case '*':
474
77.0k
  LCALPHA_CASES:
475
77.0k
    break;
476
114
  default:
477
114
    return SFPARSE_ERR_PARSE;
478
77.1k
  }
479
480
77.0k
  base = sfp->pos++;
481
482
#ifdef __AVX2__
483
  if (sfp->end - sfp->pos >= 32) {
484
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
485
486
    sfp->pos = find_char_key(sfp->pos, last);
487
    if (sfp->pos != last) {
488
      goto fin;
489
    }
490
  }
491
#endif /* __AVX2__ */
492
493
199k
  for (; !parser_eof(sfp); ++sfp->pos) {
494
191k
    switch (*sfp->pos) {
495
1.62k
    case '_':
496
6.22k
    case '-':
497
7.63k
    case '.':
498
9.04k
    case '*':
499
190k
    DIGIT_CASES:
500
1.90M
    LCALPHA_CASES:
501
1.90M
      continue;
502
191k
    }
503
504
68.8k
    break;
505
191k
  }
506
507
#ifdef __AVX2__
508
fin:
509
#endif /* __AVX2__ */
510
77.0k
  if (dest) {
511
59.7k
    dest->base = (uint8_t *)base;
512
59.7k
    dest->len = (size_t)(sfp->pos - dest->base);
513
59.7k
  }
514
515
77.0k
  return 0;
516
77.0k
}
517
518
11.3k
static int parser_number(sfparse_parser *sfp, sfparse_value *dest) {
519
11.3k
  int sign = 1;
520
11.3k
  int64_t value = 0;
521
11.3k
  size_t len = 0;
522
11.3k
  size_t fpos = 0;
523
524
11.3k
  if (*sfp->pos == '-') {
525
568
    ++sfp->pos;
526
568
    if (parser_eof(sfp)) {
527
36
      return SFPARSE_ERR_PARSE;
528
36
    }
529
530
532
    sign = -1;
531
532
  }
532
533
11.3k
  assert(!parser_eof(sfp));
534
535
27.9k
  for (; !parser_eof(sfp); ++sfp->pos) {
536
24.8k
    switch (*sfp->pos) {
537
110k
    DIGIT_CASES:
538
110k
      if (++len > 15) {
539
11
        return SFPARSE_ERR_PARSE;
540
11
      }
541
542
16.5k
      value *= 10;
543
16.5k
      value += *sfp->pos - '0';
544
545
16.5k
      continue;
546
24.8k
    }
547
548
8.26k
    break;
549
24.8k
  }
550
551
11.2k
  if (len == 0) {
552
32
    return SFPARSE_ERR_PARSE;
553
32
  }
554
555
11.2k
  if (parser_eof(sfp) || *sfp->pos != '.') {
556
8.64k
    if (dest) {
557
4.36k
      dest->type = SFPARSE_TYPE_INTEGER;
558
4.36k
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
559
4.36k
      dest->integer = value * sign;
560
4.36k
    }
561
562
8.64k
    return 0;
563
8.64k
  }
564
565
  /* decimal */
566
567
2.62k
  if (len > 12) {
568
11
    return SFPARSE_ERR_PARSE;
569
11
  }
570
571
2.61k
  fpos = len;
572
573
2.61k
  ++sfp->pos;
574
575
6.85k
  for (; !parser_eof(sfp); ++sfp->pos) {
576
5.65k
    switch (*sfp->pos) {
577
32.4k
    DIGIT_CASES:
578
32.4k
      if (++len > 15) {
579
11
        return SFPARSE_ERR_PARSE;
580
11
      }
581
582
4.24k
      value *= 10;
583
4.24k
      value += *sfp->pos - '0';
584
585
4.24k
      continue;
586
5.65k
    }
587
588
1.39k
    break;
589
5.65k
  }
590
591
2.59k
  if (fpos == len || len - fpos > 3) {
592
55
    return SFPARSE_ERR_PARSE;
593
55
  }
594
595
2.54k
  if (dest) {
596
1.69k
    dest->type = SFPARSE_TYPE_DECIMAL;
597
1.69k
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
598
1.69k
    dest->decimal.numer = value * sign;
599
600
1.69k
    switch (len - fpos) {
601
832
    case 1:
602
832
      dest->decimal.denom = 10;
603
604
832
      break;
605
419
    case 2:
606
419
      dest->decimal.denom = 100;
607
608
419
      break;
609
444
    case 3:
610
444
      dest->decimal.denom = 1000;
611
612
444
      break;
613
1.69k
    }
614
1.69k
  }
615
616
2.54k
  return 0;
617
2.54k
}
618
619
1.18k
static int parser_date(sfparse_parser *sfp, sfparse_value *dest) {
620
1.18k
  int rv;
621
1.18k
  sfparse_value val;
622
623
  /* The first byte has already been validated by the caller. */
624
1.18k
  assert('@' == *sfp->pos);
625
626
1.18k
  ++sfp->pos;
627
628
1.18k
  if (parser_eof(sfp)) {
629
31
    return SFPARSE_ERR_PARSE;
630
31
  }
631
632
1.15k
  rv = parser_number(sfp, &val);
633
1.15k
  if (rv != 0) {
634
30
    return rv;
635
30
  }
636
637
1.12k
  if (val.type != SFPARSE_TYPE_INTEGER) {
638
11
    return SFPARSE_ERR_PARSE;
639
11
  }
640
641
1.10k
  if (dest) {
642
466
    *dest = val;
643
466
    dest->type = SFPARSE_TYPE_DATE;
644
466
  }
645
646
1.10k
  return 0;
647
1.12k
}
648
649
#ifdef __AVX2__
650
static const uint8_t *find_char_string(const uint8_t *first,
651
                                       const uint8_t *last) {
652
  const __m256i bs = _mm256_set1_epi8('\\');
653
  const __m256i dq = _mm256_set1_epi8('"');
654
  const __m256i del = _mm256_set1_epi8(0x7f);
655
  const __m256i sp = _mm256_set1_epi8(' ');
656
  __m256i s, x;
657
  uint32_t m;
658
659
  for (; first != last; first += 32) {
660
    s = _mm256_loadu_si256((void *)first);
661
662
    x = _mm256_cmpgt_epi8(sp, s);
663
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, bs), x);
664
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dq), x);
665
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, del), x);
666
667
    m = (uint32_t)_mm256_movemask_epi8(x);
668
    if (m) {
669
      return first + ctz(m);
670
    }
671
  }
672
673
  return last;
674
}
675
#endif /* __AVX2__ */
676
677
6.74k
static int parser_string(sfparse_parser *sfp, sfparse_value *dest) {
678
6.74k
  const uint8_t *base;
679
#ifdef __AVX2__
680
  const uint8_t *last;
681
#endif /* __AVX2__ */
682
6.74k
  uint32_t flags = SFPARSE_VALUE_FLAG_NONE;
683
684
  /* The first byte has already been validated by the caller. */
685
6.74k
  assert('"' == *sfp->pos);
686
687
6.74k
  base = ++sfp->pos;
688
689
#ifdef __AVX2__
690
  for (; sfp->end - sfp->pos >= 32; ++sfp->pos) {
691
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
692
693
    sfp->pos = find_char_string(sfp->pos, last);
694
    if (sfp->pos == last) {
695
      break;
696
    }
697
698
    switch (*sfp->pos) {
699
    case '\\':
700
      ++sfp->pos;
701
      if (parser_eof(sfp)) {
702
        return SFPARSE_ERR_PARSE;
703
      }
704
705
      switch (*sfp->pos) {
706
      case '"':
707
      case '\\':
708
        flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING;
709
710
        break;
711
      default:
712
        return SFPARSE_ERR_PARSE;
713
      }
714
715
      break;
716
    case '"':
717
      goto fin;
718
    default:
719
      return SFPARSE_ERR_PARSE;
720
    }
721
  }
722
#endif /* __AVX2__ */
723
724
61.7k
  for (; !parser_eof(sfp); ++sfp->pos) {
725
61.0k
    switch (*sfp->pos) {
726
1.99k
    X20_21_CASES:
727
1.09M
    X23_5B_CASES:
728
1.09M
    X5D_7E_CASES:
729
54.1k
      break;
730
860
    case '\\':
731
860
      ++sfp->pos;
732
860
      if (parser_eof(sfp)) {
733
17
        return SFPARSE_ERR_PARSE;
734
17
      }
735
736
843
      switch (*sfp->pos) {
737
388
      case '"':
738
827
      case '\\':
739
827
        flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING;
740
741
827
        break;
742
16
      default:
743
16
        return SFPARSE_ERR_PARSE;
744
843
      }
745
746
827
      break;
747
5.93k
    case '"':
748
5.93k
      goto fin;
749
65
    default:
750
65
      return SFPARSE_ERR_PARSE;
751
61.0k
    }
752
61.0k
  }
753
754
713
  return SFPARSE_ERR_PARSE;
755
756
5.93k
fin:
757
5.93k
  if (dest) {
758
5.30k
    dest->type = SFPARSE_TYPE_STRING;
759
5.30k
    dest->flags = flags;
760
5.30k
    dest->vec.len = (size_t)(sfp->pos - base);
761
5.30k
    dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
762
5.30k
  }
763
764
5.93k
  ++sfp->pos;
765
766
5.93k
  return 0;
767
6.74k
}
768
769
#ifdef __AVX2__
770
static const uint8_t *find_char_token(const uint8_t *first,
771
                                      const uint8_t *last) {
772
  /* r0: !..:, excluding "(),
773
     r1: A..Z
774
     r2: ^..~, excluding {} */
775
  const __m256i r0l = _mm256_set1_epi8('!' - 1);
776
  const __m256i r0r = _mm256_set1_epi8(':' + 1);
777
  const __m256i dq = _mm256_set1_epi8('"');
778
  const __m256i prl = _mm256_set1_epi8('(');
779
  const __m256i prr = _mm256_set1_epi8(')');
780
  const __m256i comma = _mm256_set1_epi8(',');
781
  const __m256i r1l = _mm256_set1_epi8('A' - 1);
782
  const __m256i r1r = _mm256_set1_epi8('Z' + 1);
783
  const __m256i r2l = _mm256_set1_epi8('^' - 1);
784
  const __m256i r2r = _mm256_set1_epi8('~' + 1);
785
  const __m256i cbl = _mm256_set1_epi8('{');
786
  const __m256i cbr = _mm256_set1_epi8('}');
787
  __m256i s, x;
788
  uint32_t m;
789
790
  for (; first != last; first += 32) {
791
    s = _mm256_loadu_si256((void *)first);
792
793
    x = _mm256_andnot_si256(
794
      _mm256_cmpeq_epi8(s, comma),
795
      _mm256_andnot_si256(
796
        _mm256_cmpeq_epi8(s, prr),
797
        _mm256_andnot_si256(
798
          _mm256_cmpeq_epi8(s, prl),
799
          _mm256_andnot_si256(_mm256_cmpeq_epi8(s, dq),
800
                              _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l),
801
                                               _mm256_cmpgt_epi8(r0r, s))))));
802
    x = _mm256_or_si256(
803
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
804
      x);
805
    x = _mm256_or_si256(
806
      _mm256_andnot_si256(
807
        _mm256_cmpeq_epi8(s, cbr),
808
        _mm256_andnot_si256(_mm256_cmpeq_epi8(s, cbl),
809
                            _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l),
810
                                             _mm256_cmpgt_epi8(r2r, s)))),
811
      x);
812
813
    m = ~(uint32_t)_mm256_movemask_epi8(x);
814
    if (m) {
815
      return first + ctz(m);
816
    }
817
  }
818
819
  return last;
820
}
821
#endif /* __AVX2__ */
822
823
31.4k
static int parser_token(sfparse_parser *sfp, sfparse_value *dest) {
824
31.4k
  const uint8_t *base;
825
#ifdef __AVX2__
826
  const uint8_t *last;
827
#endif /* __AVX2__ */
828
829
  /* The first byte has already been validated by the caller. */
830
31.4k
  base = sfp->pos++;
831
832
#ifdef __AVX2__
833
  if (sfp->end - sfp->pos >= 32) {
834
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
835
836
    sfp->pos = find_char_token(sfp->pos, last);
837
    if (sfp->pos != last) {
838
      goto fin;
839
    }
840
  }
841
#endif /* __AVX2__ */
842
843
110k
  for (; !parser_eof(sfp); ++sfp->pos) {
844
97.6k
    switch (*sfp->pos) {
845
3.14M
    TOKEN_CASES:
846
3.14M
      continue;
847
97.6k
    }
848
849
19.0k
    break;
850
97.6k
  }
851
852
#ifdef __AVX2__
853
fin:
854
#endif /* __AVX2__ */
855
31.4k
  if (dest) {
856
14.0k
    dest->type = SFPARSE_TYPE_TOKEN;
857
14.0k
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
858
14.0k
    dest->vec.base = (uint8_t *)base;
859
14.0k
    dest->vec.len = (size_t)(sfp->pos - base);
860
14.0k
  }
861
862
31.4k
  return 0;
863
31.4k
}
864
865
#ifdef __AVX2__
866
static const uint8_t *find_char_byteseq(const uint8_t *first,
867
                                        const uint8_t *last) {
868
  const __m256i pls = _mm256_set1_epi8('+');
869
  const __m256i fs = _mm256_set1_epi8('/');
870
  const __m256i r0l = _mm256_set1_epi8('0' - 1);
871
  const __m256i r0r = _mm256_set1_epi8('9' + 1);
872
  const __m256i r1l = _mm256_set1_epi8('A' - 1);
873
  const __m256i r1r = _mm256_set1_epi8('Z' + 1);
874
  const __m256i r2l = _mm256_set1_epi8('a' - 1);
875
  const __m256i r2r = _mm256_set1_epi8('z' + 1);
876
  __m256i s, x;
877
  uint32_t m;
878
879
  for (; first != last; first += 32) {
880
    s = _mm256_loadu_si256((void *)first);
881
882
    x = _mm256_cmpeq_epi8(s, pls);
883
    x = _mm256_or_si256(_mm256_cmpeq_epi8(s, fs), x);
884
    x = _mm256_or_si256(
885
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)),
886
      x);
887
    x = _mm256_or_si256(
888
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
889
      x);
890
    x = _mm256_or_si256(
891
      _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), _mm256_cmpgt_epi8(r2r, s)),
892
      x);
893
894
    m = ~(uint32_t)_mm256_movemask_epi8(x);
895
    if (m) {
896
      return first + ctz(m);
897
    }
898
  }
899
900
  return last;
901
}
902
#endif /* __AVX2__ */
903
904
6.42k
static int parser_byteseq(sfparse_parser *sfp, sfparse_value *dest) {
905
6.42k
  const uint8_t *base;
906
#ifdef __AVX2__
907
  const uint8_t *last;
908
#endif /* __AVX2__ */
909
910
  /* The first byte has already been validated by the caller. */
911
6.42k
  assert(':' == *sfp->pos);
912
913
6.42k
  base = ++sfp->pos;
914
915
#ifdef __AVX2__
916
  if (sfp->end - sfp->pos >= 32) {
917
    last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
918
    sfp->pos = find_char_byteseq(sfp->pos, last);
919
  }
920
#endif /* __AVX2__ */
921
922
47.5k
  for (; !parser_eof(sfp); ++sfp->pos) {
923
47.1k
    switch (*sfp->pos) {
924
550
    case '+':
925
1.24k
    case '/':
926
60.1k
    DIGIT_CASES:
927
60.1k
    ALPHA_CASES:
928
41.1k
      continue;
929
1.49k
    case '=':
930
1.49k
      switch ((sfp->pos - base) & 0x3) {
931
19
      case 0:
932
32
      case 1:
933
32
        return SFPARSE_ERR_PARSE;
934
883
      case 2:
935
883
        ++sfp->pos;
936
937
883
        if (parser_eof(sfp)) {
938
12
          return SFPARSE_ERR_PARSE;
939
12
        }
940
941
871
        if (*sfp->pos == '=') {
942
398
          ++sfp->pos;
943
398
        }
944
945
871
        break;
946
582
      case 3:
947
582
        ++sfp->pos;
948
949
582
        break;
950
1.49k
      }
951
952
1.45k
      if (parser_eof(sfp) || *sfp->pos != ':') {
953
31
        return SFPARSE_ERR_PARSE;
954
31
      }
955
956
1.42k
      goto fin;
957
4.48k
    case ':':
958
4.48k
      if (((sfp->pos - base) & 0x3) == 1) {
959
13
        return SFPARSE_ERR_PARSE;
960
13
      }
961
962
4.47k
      goto fin;
963
4.47k
    default:
964
46
      return SFPARSE_ERR_PARSE;
965
47.1k
    }
966
47.1k
  }
967
968
399
  return SFPARSE_ERR_PARSE;
969
970
5.89k
fin:
971
5.89k
  if (dest) {
972
5.27k
    dest->type = SFPARSE_TYPE_BYTESEQ;
973
5.27k
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
974
5.27k
    dest->vec.len = (size_t)(sfp->pos - base);
975
5.27k
    dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
976
5.27k
  }
977
978
5.89k
  ++sfp->pos;
979
980
5.89k
  return 0;
981
6.42k
}
982
983
1.58k
static int parser_boolean(sfparse_parser *sfp, sfparse_value *dest) {
984
1.58k
  int b;
985
986
  /* The first byte has already been validated by the caller. */
987
1.58k
  assert('?' == *sfp->pos);
988
989
1.58k
  ++sfp->pos;
990
991
1.58k
  if (parser_eof(sfp)) {
992
24
    return SFPARSE_ERR_PARSE;
993
24
  }
994
995
1.56k
  switch (*sfp->pos) {
996
798
  case '0':
997
798
    b = 0;
998
999
798
    break;
1000
748
  case '1':
1001
748
    b = 1;
1002
1003
748
    break;
1004
16
  default:
1005
16
    return SFPARSE_ERR_PARSE;
1006
1.56k
  }
1007
1008
1.54k
  ++sfp->pos;
1009
1010
1.54k
  if (dest) {
1011
676
    dest->type = SFPARSE_TYPE_BOOLEAN;
1012
676
    dest->flags = SFPARSE_VALUE_FLAG_NONE;
1013
676
    dest->boolean = b;
1014
676
  }
1015
1016
1.54k
  return 0;
1017
1.56k
}
1018
1019
15.1k
static int pctdecode(uint8_t *pc, const uint8_t **ppos) {
1020
15.1k
  uint8_t c, b = **ppos;
1021
1022
15.1k
  switch (b) {
1023
8.76k
  DIGIT_CASES:
1024
8.76k
    c = (uint8_t)((b - '0') << 4);
1025
1026
8.76k
    break;
1027
6.32k
  LCHEXALPHA_CASES:
1028
6.32k
    c = (uint8_t)((b - 'a' + 10) << 4);
1029
1030
6.32k
    break;
1031
11
  default:
1032
11
    return -1;
1033
15.1k
  }
1034
1035
15.0k
  b = *++*ppos;
1036
1037
15.0k
  switch (b) {
1038
8.71k
  DIGIT_CASES:
1039
8.71k
    c |= (uint8_t)(b - '0');
1040
1041
8.71k
    break;
1042
6.36k
  LCHEXALPHA_CASES:
1043
6.36k
    c |= (uint8_t)(b - 'a' + 10);
1044
1045
6.36k
    break;
1046
14
  default:
1047
14
    return -1;
1048
15.0k
  }
1049
1050
15.0k
  *pc = c;
1051
15.0k
  ++*ppos;
1052
1053
15.0k
  return 0;
1054
15.0k
}
1055
1056
/* Start of utf8 dfa */
1057
/* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
1058
 * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
1059
 *
1060
 * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
1061
 *
1062
 * Permission is hereby granted, free of charge, to any person
1063
 * obtaining a copy of this software and associated documentation
1064
 * files (the "Software"), to deal in the Software without
1065
 * restriction, including without limitation the rights to use, copy,
1066
 * modify, merge, publish, distribute, sublicense, and/or sell copies
1067
 * of the Software, and to permit persons to whom the Software is
1068
 * furnished to do so, subject to the following conditions:
1069
 *
1070
 * The above copyright notice and this permission notice shall be
1071
 * included in all copies or substantial portions of the Software.
1072
 *
1073
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1074
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1075
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1076
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
1077
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
1078
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
1079
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1080
 * SOFTWARE.
1081
 */
1082
42.8k
#define UTF8_ACCEPT 0
1083
15.0k
#define UTF8_REJECT 12
1084
1085
/* clang-format off */
1086
static const uint8_t utf8d[] = {
1087
  /*
1088
   * The first part of the table maps bytes to character classes that
1089
   * to reduce the size of the transition table and create bitmasks.
1090
   */
1091
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1092
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1093
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1094
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1095
   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
1096
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
1097
   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1098
  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
1099
1100
   /*
1101
    * The second part is a transition table that maps a combination
1102
    * of a state of the automaton and a character class to a state.
1103
    */
1104
   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
1105
  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
1106
  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
1107
  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
1108
  12,36,12,12,12,12,12,12,12,12,12,12,
1109
};
1110
/* clang-format on */
1111
1112
15.0k
static void utf8_decode(uint32_t *state, uint8_t byte) {
1113
15.0k
  *state = utf8d[256 + *state + utf8d[byte]];
1114
15.0k
}
1115
1116
/* End of utf8 dfa */
1117
1118
8.41k
static int parser_dispstring(sfparse_parser *sfp, sfparse_value *dest) {
1119
8.41k
  const uint8_t *base;
1120
8.41k
  uint8_t c;
1121
8.41k
  uint32_t utf8state = UTF8_ACCEPT;
1122
1123
8.41k
  assert('%' == *sfp->pos);
1124
1125
8.41k
  ++sfp->pos;
1126
1127
8.41k
  if (parser_eof(sfp) || *sfp->pos != '"') {
1128
60
    return SFPARSE_ERR_PARSE;
1129
60
  }
1130
1131
8.35k
  base = ++sfp->pos;
1132
1133
51.6k
  for (; !parser_eof(sfp);) {
1134
51.6k
    switch (*sfp->pos) {
1135
253
    X00_1F_CASES:
1136
2.11k
    X7F_FF_CASES:
1137
2.11k
      return SFPARSE_ERR_PARSE;
1138
15.1k
    case '%':
1139
15.1k
      ++sfp->pos;
1140
1141
15.1k
      if (sfp->pos + 2 > sfp->end) {
1142
16
        return SFPARSE_ERR_PARSE;
1143
16
      }
1144
1145
15.1k
      if (pctdecode(&c, &sfp->pos) != 0) {
1146
25
        return SFPARSE_ERR_PARSE;
1147
25
      }
1148
1149
15.0k
      utf8_decode(&utf8state, c);
1150
15.0k
      if (utf8state == UTF8_REJECT) {
1151
28
        return SFPARSE_ERR_PARSE;
1152
28
      }
1153
1154
15.0k
      break;
1155
15.0k
    case '"':
1156
6.10k
      if (utf8state != UTF8_ACCEPT) {
1157
12
        return SFPARSE_ERR_PARSE;
1158
12
      }
1159
1160
6.09k
      if (dest) {
1161
5.11k
        dest->type = SFPARSE_TYPE_DISPSTRING;
1162
5.11k
        dest->flags = SFPARSE_VALUE_FLAG_NONE;
1163
5.11k
        dest->vec.len = (size_t)(sfp->pos - base);
1164
5.11k
        dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
1165
5.11k
      }
1166
1167
6.09k
      ++sfp->pos;
1168
1169
6.09k
      return 0;
1170
28.3k
    default:
1171
28.3k
      if (utf8state != UTF8_ACCEPT) {
1172
13
        return SFPARSE_ERR_PARSE;
1173
13
      }
1174
1175
28.2k
      ++sfp->pos;
1176
51.6k
    }
1177
51.6k
  }
1178
1179
53
  return SFPARSE_ERR_PARSE;
1180
8.35k
}
1181
1182
66.0k
static int parser_bare_item(sfparse_parser *sfp, sfparse_value *dest) {
1183
66.0k
  switch (*sfp->pos) {
1184
6.74k
  case '"':
1185
6.74k
    return parser_string(sfp, dest);
1186
568
  case '-':
1187
10.1k
  DIGIT_CASES:
1188
10.1k
    return parser_number(sfp, dest);
1189
1.18k
  case '@':
1190
1.18k
    return parser_date(sfp, dest);
1191
6.42k
  case ':':
1192
6.42k
    return parser_byteseq(sfp, dest);
1193
1.58k
  case '?':
1194
1.58k
    return parser_boolean(sfp, dest);
1195
869
  case '*':
1196
31.4k
  ALPHA_CASES:
1197
31.4k
    return parser_token(sfp, dest);
1198
8.41k
  case '%':
1199
8.41k
    return parser_dispstring(sfp, dest);
1200
44
  default:
1201
44
    return SFPARSE_ERR_PARSE;
1202
66.0k
  }
1203
66.0k
}
1204
1205
static int parser_skip_inner_list(sfparse_parser *sfp);
1206
1207
int sfparse_parser_param(sfparse_parser *sfp, sfparse_vec *dest_key,
1208
89.5k
                         sfparse_value *dest_value) {
1209
89.5k
  int rv;
1210
1211
89.5k
  switch (sfp->state & SFPARSE_STATE_OP_MASK) {
1212
0
  case SFPARSE_STATE_BEFORE:
1213
0
    rv = parser_skip_inner_list(sfp);
1214
0
    if (rv != 0) {
1215
0
      return rv;
1216
0
    }
1217
1218
    /* fall through */
1219
72.8k
  case SFPARSE_STATE_BEFORE_PARAMS:
1220
72.8k
    parser_set_op_state(sfp, SFPARSE_STATE_PARAMS);
1221
1222
72.8k
    break;
1223
16.7k
  case SFPARSE_STATE_PARAMS:
1224
16.7k
    break;
1225
0
  default:
1226
0
    assert(0);
1227
0
    abort();
1228
89.5k
  }
1229
1230
89.5k
  if (parser_eof(sfp) || *sfp->pos != ';') {
1231
72.1k
    parser_set_op_state(sfp, SFPARSE_STATE_AFTER);
1232
1233
72.1k
    return SFPARSE_ERR_EOF;
1234
72.1k
  }
1235
1236
17.4k
  ++sfp->pos;
1237
1238
17.4k
  parser_discard_sp(sfp);
1239
17.4k
  if (parser_eof(sfp)) {
1240
29
    return SFPARSE_ERR_PARSE;
1241
29
  }
1242
1243
17.3k
  rv = parser_key(sfp, dest_key);
1244
17.3k
  if (rv != 0) {
1245
44
    return rv;
1246
44
  }
1247
1248
17.3k
  if (parser_eof(sfp) || *sfp->pos != '=') {
1249
7.40k
    if (dest_value) {
1250
0
      dest_value->type = SFPARSE_TYPE_BOOLEAN;
1251
0
      dest_value->flags = SFPARSE_VALUE_FLAG_NONE;
1252
0
      dest_value->boolean = 1;
1253
0
    }
1254
1255
7.40k
    return 0;
1256
7.40k
  }
1257
1258
9.92k
  ++sfp->pos;
1259
1260
9.92k
  if (parser_eof(sfp)) {
1261
11
    return SFPARSE_ERR_PARSE;
1262
11
  }
1263
1264
9.91k
  return parser_bare_item(sfp, dest_value);
1265
9.92k
}
1266
1267
72.8k
static int parser_skip_params(sfparse_parser *sfp) {
1268
72.8k
  int rv;
1269
1270
89.5k
  for (;;) {
1271
89.5k
    rv = sfparse_parser_param(sfp, NULL, NULL);
1272
89.5k
    switch (rv) {
1273
16.7k
    case 0:
1274
16.7k
      break;
1275
72.1k
    case SFPARSE_ERR_EOF:
1276
72.1k
      return 0;
1277
618
    case SFPARSE_ERR_PARSE:
1278
618
      return rv;
1279
0
    default:
1280
0
      assert(0);
1281
0
      abort();
1282
89.5k
    }
1283
89.5k
  }
1284
72.8k
}
1285
1286
22.3k
int sfparse_parser_inner_list(sfparse_parser *sfp, sfparse_value *dest) {
1287
22.3k
  int rv;
1288
1289
22.3k
  switch (sfp->state & SFPARSE_STATE_OP_MASK) {
1290
5.42k
  case SFPARSE_STATE_BEFORE:
1291
5.42k
    parser_discard_sp(sfp);
1292
5.42k
    if (parser_eof(sfp)) {
1293
25
      return SFPARSE_ERR_PARSE;
1294
25
    }
1295
1296
5.40k
    break;
1297
16.8k
  case SFPARSE_STATE_BEFORE_PARAMS:
1298
16.8k
    rv = parser_skip_params(sfp);
1299
16.8k
    if (rv != 0) {
1300
28
      return rv;
1301
28
    }
1302
1303
    /* Technically, we are entering SFPARSE_STATE_AFTER, but we will set
1304
       another state without reading the state. */
1305
    /* parser_set_op_state(sfp, SFPARSE_STATE_AFTER); */
1306
1307
    /* fall through */
1308
16.8k
  case SFPARSE_STATE_AFTER:
1309
16.8k
    if (parser_eof(sfp)) {
1310
39
      return SFPARSE_ERR_PARSE;
1311
39
    }
1312
1313
16.8k
    switch (*sfp->pos) {
1314
11.9k
    case ' ':
1315
11.9k
      parser_discard_sp(sfp);
1316
11.9k
      if (parser_eof(sfp)) {
1317
0
        return SFPARSE_ERR_PARSE;
1318
0
      }
1319
1320
11.9k
      break;
1321
11.9k
    case ')':
1322
4.90k
      break;
1323
18
    default:
1324
18
      return SFPARSE_ERR_PARSE;
1325
16.8k
    }
1326
1327
16.8k
    break;
1328
16.8k
  default:
1329
0
    assert(0);
1330
0
    abort();
1331
22.3k
  }
1332
1333
22.2k
  if (*sfp->pos == ')') {
1334
5.25k
    ++sfp->pos;
1335
1336
5.25k
    parser_unset_inner_list_state(sfp);
1337
5.25k
    parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS);
1338
1339
5.25k
    return SFPARSE_ERR_EOF;
1340
5.25k
  }
1341
1342
16.9k
  rv = parser_bare_item(sfp, dest);
1343
16.9k
  if (rv != 0) {
1344
65
    return rv;
1345
65
  }
1346
1347
16.8k
  parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS);
1348
1349
16.8k
  return 0;
1350
16.9k
}
1351
1352
5.42k
static int parser_skip_inner_list(sfparse_parser *sfp) {
1353
5.42k
  int rv;
1354
1355
22.3k
  for (;;) {
1356
22.3k
    rv = sfparse_parser_inner_list(sfp, NULL);
1357
22.3k
    switch (rv) {
1358
16.8k
    case 0:
1359
16.8k
      break;
1360
5.25k
    case SFPARSE_ERR_EOF:
1361
5.25k
      return 0;
1362
175
    case SFPARSE_ERR_PARSE:
1363
175
      return rv;
1364
0
    default:
1365
0
      assert(0);
1366
0
      abort();
1367
22.3k
    }
1368
22.3k
  }
1369
5.42k
}
1370
1371
55.3k
static int parser_next_key_or_item(sfparse_parser *sfp) {
1372
55.3k
  parser_discard_ows(sfp);
1373
1374
55.3k
  if (parser_eof(sfp)) {
1375
46.0k
    return SFPARSE_ERR_EOF;
1376
46.0k
  }
1377
1378
9.26k
  if (*sfp->pos != ',') {
1379
123
    return SFPARSE_ERR_PARSE;
1380
123
  }
1381
1382
9.14k
  ++sfp->pos;
1383
1384
9.14k
  parser_discard_ows(sfp);
1385
9.14k
  if (parser_eof(sfp)) {
1386
25
    return SFPARSE_ERR_PARSE;
1387
25
  }
1388
1389
9.11k
  return 0;
1390
9.14k
}
1391
1392
59.7k
static int parser_dict_value(sfparse_parser *sfp, sfparse_value *dest) {
1393
59.7k
  int rv;
1394
1395
59.7k
  if (parser_eof(sfp) || *(sfp->pos) != '=') {
1396
    /* Boolean true */
1397
15.0k
    if (dest) {
1398
15.0k
      dest->type = SFPARSE_TYPE_BOOLEAN;
1399
15.0k
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1400
15.0k
      dest->boolean = 1;
1401
15.0k
    }
1402
1403
15.0k
    sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS;
1404
1405
15.0k
    return 0;
1406
15.0k
  }
1407
1408
44.6k
  ++sfp->pos;
1409
1410
44.6k
  if (parser_eof(sfp)) {
1411
31
    return SFPARSE_ERR_PARSE;
1412
31
  }
1413
1414
44.6k
  if (*sfp->pos == '(') {
1415
5.44k
    if (dest) {
1416
5.44k
      dest->type = SFPARSE_TYPE_INNER_LIST;
1417
5.44k
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1418
5.44k
    }
1419
1420
5.44k
    ++sfp->pos;
1421
1422
5.44k
    sfp->state = SFPARSE_STATE_DICT_INNER_LIST_BEFORE;
1423
1424
5.44k
    return 0;
1425
5.44k
  }
1426
1427
39.2k
  rv = parser_bare_item(sfp, dest);
1428
39.2k
  if (rv != 0) {
1429
3.34k
    return rv;
1430
3.34k
  }
1431
1432
35.8k
  sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS;
1433
1434
35.8k
  return 0;
1435
39.2k
}
1436
1437
int sfparse_parser_dict(sfparse_parser *sfp, sfparse_vec *dest_key,
1438
107k
                        sfparse_value *dest_value) {
1439
107k
  int rv;
1440
1441
107k
  switch (sfp->state) {
1442
5.42k
  case SFPARSE_STATE_DICT_INNER_LIST_BEFORE:
1443
5.42k
    rv = parser_skip_inner_list(sfp);
1444
5.42k
    if (rv != 0) {
1445
175
      return rv;
1446
175
    }
1447
1448
    /* fall through */
1449
55.9k
  case SFPARSE_STATE_DICT_BEFORE_PARAMS:
1450
55.9k
    rv = parser_skip_params(sfp);
1451
55.9k
    if (rv != 0) {
1452
590
      return rv;
1453
590
    }
1454
1455
    /* fall through */
1456
55.3k
  case SFPARSE_STATE_DICT_AFTER:
1457
55.3k
    rv = parser_next_key_or_item(sfp);
1458
55.3k
    if (rv != 0) {
1459
46.2k
      return rv;
1460
46.2k
    }
1461
1462
9.11k
    break;
1463
51.2k
  case SFPARSE_STATE_INITIAL:
1464
51.2k
    parser_discard_sp(sfp);
1465
1466
51.2k
    if (parser_eof(sfp)) {
1467
579
      return SFPARSE_ERR_EOF;
1468
579
    }
1469
1470
50.6k
    break;
1471
50.6k
  default:
1472
0
    assert(0);
1473
0
    abort();
1474
107k
  }
1475
1476
59.7k
  rv = parser_key(sfp, dest_key);
1477
59.7k
  if (rv != 0) {
1478
70
    return rv;
1479
70
  }
1480
1481
59.7k
  return parser_dict_value(sfp, dest_value);
1482
59.7k
}
1483
1484
0
int sfparse_parser_list(sfparse_parser *sfp, sfparse_value *dest) {
1485
0
  int rv;
1486
1487
0
  switch (sfp->state) {
1488
0
  case SFPARSE_STATE_LIST_INNER_LIST_BEFORE:
1489
0
    rv = parser_skip_inner_list(sfp);
1490
0
    if (rv != 0) {
1491
0
      return rv;
1492
0
    }
1493
1494
    /* fall through */
1495
0
  case SFPARSE_STATE_LIST_BEFORE_PARAMS:
1496
0
    rv = parser_skip_params(sfp);
1497
0
    if (rv != 0) {
1498
0
      return rv;
1499
0
    }
1500
1501
    /* fall through */
1502
0
  case SFPARSE_STATE_LIST_AFTER:
1503
0
    rv = parser_next_key_or_item(sfp);
1504
0
    if (rv != 0) {
1505
0
      return rv;
1506
0
    }
1507
1508
0
    break;
1509
0
  case SFPARSE_STATE_INITIAL:
1510
0
    parser_discard_sp(sfp);
1511
1512
0
    if (parser_eof(sfp)) {
1513
0
      return SFPARSE_ERR_EOF;
1514
0
    }
1515
1516
0
    break;
1517
0
  default:
1518
0
    assert(0);
1519
0
    abort();
1520
0
  }
1521
1522
0
  if (*sfp->pos == '(') {
1523
0
    if (dest) {
1524
0
      dest->type = SFPARSE_TYPE_INNER_LIST;
1525
0
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1526
0
    }
1527
1528
0
    ++sfp->pos;
1529
1530
0
    sfp->state = SFPARSE_STATE_LIST_INNER_LIST_BEFORE;
1531
1532
0
    return 0;
1533
0
  }
1534
1535
0
  rv = parser_bare_item(sfp, dest);
1536
0
  if (rv != 0) {
1537
0
    return rv;
1538
0
  }
1539
1540
0
  sfp->state = SFPARSE_STATE_LIST_BEFORE_PARAMS;
1541
1542
0
  return 0;
1543
0
}
1544
1545
0
int sfparse_parser_item(sfparse_parser *sfp, sfparse_value *dest) {
1546
0
  int rv;
1547
1548
0
  switch (sfp->state) {
1549
0
  case SFPARSE_STATE_INITIAL:
1550
0
    parser_discard_sp(sfp);
1551
1552
0
    if (parser_eof(sfp)) {
1553
0
      return SFPARSE_ERR_PARSE;
1554
0
    }
1555
1556
0
    break;
1557
0
  case SFPARSE_STATE_ITEM_INNER_LIST_BEFORE:
1558
0
    rv = parser_skip_inner_list(sfp);
1559
0
    if (rv != 0) {
1560
0
      return rv;
1561
0
    }
1562
1563
    /* fall through */
1564
0
  case SFPARSE_STATE_ITEM_BEFORE_PARAMS:
1565
0
    rv = parser_skip_params(sfp);
1566
0
    if (rv != 0) {
1567
0
      return rv;
1568
0
    }
1569
1570
    /* fall through */
1571
0
  case SFPARSE_STATE_ITEM_AFTER:
1572
0
    parser_discard_sp(sfp);
1573
1574
0
    if (!parser_eof(sfp)) {
1575
0
      return SFPARSE_ERR_PARSE;
1576
0
    }
1577
1578
0
    return SFPARSE_ERR_EOF;
1579
0
  default:
1580
0
    assert(0);
1581
0
    abort();
1582
0
  }
1583
1584
0
  if (*sfp->pos == '(') {
1585
0
    if (dest) {
1586
0
      dest->type = SFPARSE_TYPE_INNER_LIST;
1587
0
      dest->flags = SFPARSE_VALUE_FLAG_NONE;
1588
0
    }
1589
1590
0
    ++sfp->pos;
1591
1592
0
    sfp->state = SFPARSE_STATE_ITEM_INNER_LIST_BEFORE;
1593
1594
0
    return 0;
1595
0
  }
1596
1597
0
  rv = parser_bare_item(sfp, dest);
1598
0
  if (rv != 0) {
1599
0
    return rv;
1600
0
  }
1601
1602
0
  sfp->state = SFPARSE_STATE_ITEM_BEFORE_PARAMS;
1603
1604
0
  return 0;
1605
0
}
1606
1607
void sfparse_parser_init(sfparse_parser *sfp, const uint8_t *data,
1608
51.2k
                         size_t datalen) {
1609
51.2k
  if (datalen == 0) {
1610
579
    sfp->pos = sfp->end = NULL;
1611
50.6k
  } else {
1612
50.6k
    sfp->pos = data;
1613
50.6k
    sfp->end = data + datalen;
1614
50.6k
  }
1615
1616
51.2k
  sfp->state = SFPARSE_STATE_INITIAL;
1617
51.2k
}
1618
1619
0
void sfparse_unescape(sfparse_vec *dest, const sfparse_vec *src) {
1620
0
  const uint8_t *p, *q;
1621
0
  uint8_t *o;
1622
0
  size_t len, slen;
1623
1624
0
  if (src->len == 0) {
1625
0
    dest->len = 0;
1626
1627
0
    return;
1628
0
  }
1629
1630
0
  o = dest->base;
1631
0
  p = src->base;
1632
0
  len = src->len;
1633
1634
0
  for (;;) {
1635
0
    q = memchr(p, '\\', len);
1636
0
    if (q == NULL) {
1637
0
      memcpy(o, p, len);
1638
0
      o += len;
1639
1640
0
      dest->len = (size_t)(o - dest->base);
1641
1642
0
      return;
1643
0
    }
1644
1645
0
    slen = (size_t)(q - p);
1646
0
    memcpy(o, p, slen);
1647
0
    o += slen;
1648
1649
0
    p = q + 1;
1650
0
    *o++ = *p++;
1651
0
    len -= slen + 2;
1652
0
  }
1653
0
}
1654
1655
0
void sfparse_base64decode(sfparse_vec *dest, const sfparse_vec *src) {
1656
0
  static const int index_tbl[] = {
1657
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1658
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1659
0
    -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
1660
0
    61, -1, -1, -1, -1, -1, -1, -1, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
1661
0
    11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1,
1662
0
    -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
1663
0
    43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1664
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1665
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1666
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1667
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1668
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1669
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1670
0
    -1, -1, -1, -1, -1, -1, -1, -1, -1};
1671
0
  uint8_t *o;
1672
0
  const uint8_t *p, *end;
1673
0
  uint32_t n;
1674
0
  size_t i, left;
1675
0
  int idx;
1676
1677
0
  if (src->len == 0) {
1678
0
    dest->len = 0;
1679
1680
0
    return;
1681
0
  }
1682
1683
0
  o = dest->base;
1684
0
  p = src->base;
1685
0
  left = src->len & 0x3;
1686
0
  if (left == 0 && src->base[src->len - 1] == '=') {
1687
0
    left = 4;
1688
0
  }
1689
0
  end = src->base + src->len - left;
1690
1691
0
  for (; p != end;) {
1692
0
    n = 0;
1693
1694
0
    for (i = 1; i <= 4; ++i, ++p) {
1695
0
      idx = index_tbl[*p];
1696
1697
0
      assert(idx != -1);
1698
1699
0
      n += (uint32_t)(idx << (24 - i * 6));
1700
0
    }
1701
1702
0
    *o++ = (uint8_t)(n >> 16);
1703
0
    *o++ = (n >> 8) & 0xffu;
1704
0
    *o++ = n & 0xffu;
1705
0
  }
1706
1707
0
  switch (left) {
1708
0
  case 0:
1709
0
    goto fin;
1710
0
  case 1:
1711
0
    assert(0);
1712
0
    abort();
1713
0
  case 3:
1714
0
    if (src->base[src->len - 1] == '=') {
1715
0
      left = 2;
1716
0
    }
1717
1718
0
    break;
1719
0
  case 4:
1720
0
    assert('=' == src->base[src->len - 1]);
1721
1722
0
    if (src->base[src->len - 2] == '=') {
1723
0
      left = 2;
1724
0
    } else {
1725
0
      left = 3;
1726
0
    }
1727
1728
0
    break;
1729
0
  }
1730
1731
0
  switch (left) {
1732
0
  case 2:
1733
0
    *o = (uint8_t)(index_tbl[*p++] << 2);
1734
0
    *o++ |= (uint8_t)(index_tbl[*p++] >> 4);
1735
1736
0
    break;
1737
0
  case 3:
1738
0
    n = (uint32_t)(index_tbl[*p++] << 10);
1739
0
    n += (uint32_t)(index_tbl[*p++] << 4);
1740
0
    n += (uint32_t)(index_tbl[*p++] >> 2);
1741
0
    *o++ = (n >> 8) & 0xffu;
1742
0
    *o++ = n & 0xffu;
1743
1744
0
    break;
1745
0
  }
1746
1747
0
fin:
1748
0
  dest->len = (size_t)(o - dest->base);
1749
0
}
1750
1751
0
void sfparse_pctdecode(sfparse_vec *dest, const sfparse_vec *src) {
1752
0
  const uint8_t *p, *q;
1753
0
  uint8_t *o;
1754
0
  size_t len, slen;
1755
1756
0
  if (src->len == 0) {
1757
0
    dest->len = 0;
1758
1759
0
    return;
1760
0
  }
1761
1762
0
  o = dest->base;
1763
0
  p = src->base;
1764
0
  len = src->len;
1765
1766
0
  for (;;) {
1767
0
    q = memchr(p, '%', len);
1768
0
    if (q == NULL) {
1769
0
      memcpy(o, p, len);
1770
0
      o += len;
1771
1772
0
      dest->len = (size_t)(o - dest->base);
1773
1774
0
      return;
1775
0
    }
1776
1777
0
    slen = (size_t)(q - p);
1778
0
    memcpy(o, p, slen);
1779
0
    o += slen;
1780
1781
0
    p = q + 1;
1782
1783
0
    pctdecode(o++, &p);
1784
1785
0
    len -= slen + 3;
1786
0
  }
1787
0
}