/src/nghttp2/lib/sfparse.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * sfparse |
3 | | * |
4 | | * Copyright (c) 2023 sfparse contributors |
5 | | * Copyright (c) 2019 nghttp3 contributors |
6 | | * Copyright (c) 2015 nghttp2 contributors |
7 | | * |
8 | | * Permission is hereby granted, free of charge, to any person obtaining |
9 | | * a copy of this software and associated documentation files (the |
10 | | * "Software"), to deal in the Software without restriction, including |
11 | | * without limitation the rights to use, copy, modify, merge, publish, |
12 | | * distribute, sublicense, and/or sell copies of the Software, and to |
13 | | * permit persons to whom the Software is furnished to do so, subject to |
14 | | * the following conditions: |
15 | | * |
16 | | * The above copyright notice and this permission notice shall be |
17 | | * included in all copies or substantial portions of the Software. |
18 | | * |
19 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
20 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
21 | | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
22 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
23 | | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
24 | | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
25 | | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
26 | | */ |
27 | | #include "sfparse.h" |
28 | | |
29 | | #include <string.h> |
30 | | #include <assert.h> |
31 | | #include <stdlib.h> |
32 | | |
33 | | #ifdef __AVX2__ |
34 | | # include <immintrin.h> |
35 | | #endif /* __AVX2__ */ |
36 | | |
37 | 187k | #define SFPARSE_STATE_DICT 0x08u |
38 | 0 | #define SFPARSE_STATE_LIST 0x10u |
39 | 0 | #define SFPARSE_STATE_ITEM 0x18u |
40 | | |
41 | 17.1k | #define SFPARSE_STATE_INNER_LIST 0x04u |
42 | | |
43 | 18.6k | #define SFPARSE_STATE_BEFORE 0x00u |
44 | 230k | #define SFPARSE_STATE_BEFORE_PARAMS 0x01u |
45 | 95.6k | #define SFPARSE_STATE_PARAMS 0x02u |
46 | 152k | #define SFPARSE_STATE_AFTER 0x03u |
47 | | |
48 | 293k | #define SFPARSE_STATE_OP_MASK 0x03u |
49 | | |
50 | | #define SFPARSE_SET_STATE_AFTER(NAME) \ |
51 | 59.5k | (SFPARSE_STATE_##NAME | SFPARSE_STATE_AFTER) |
52 | | #define SFPARSE_SET_STATE_BEFORE_PARAMS(NAME) \ |
53 | 115k | (SFPARSE_STATE_##NAME | SFPARSE_STATE_BEFORE_PARAMS) |
54 | | #define SFPARSE_SET_STATE_INNER_LIST_BEFORE(NAME) \ |
55 | 12.4k | (SFPARSE_STATE_##NAME | SFPARSE_STATE_INNER_LIST | SFPARSE_STATE_BEFORE) |
56 | | |
57 | 59.5k | #define SFPARSE_STATE_DICT_AFTER SFPARSE_SET_STATE_AFTER(DICT) |
58 | 115k | #define SFPARSE_STATE_DICT_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(DICT) |
59 | | #define SFPARSE_STATE_DICT_INNER_LIST_BEFORE \ |
60 | 12.4k | SFPARSE_SET_STATE_INNER_LIST_BEFORE(DICT) |
61 | | |
62 | 0 | #define SFPARSE_STATE_LIST_AFTER SFPARSE_SET_STATE_AFTER(LIST) |
63 | 0 | #define SFPARSE_STATE_LIST_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(LIST) |
64 | | #define SFPARSE_STATE_LIST_INNER_LIST_BEFORE \ |
65 | 0 | SFPARSE_SET_STATE_INNER_LIST_BEFORE(LIST) |
66 | | |
67 | 0 | #define SFPARSE_STATE_ITEM_AFTER SFPARSE_SET_STATE_AFTER(ITEM) |
68 | 0 | #define SFPARSE_STATE_ITEM_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(ITEM) |
69 | | #define SFPARSE_STATE_ITEM_INNER_LIST_BEFORE \ |
70 | 0 | SFPARSE_SET_STATE_INNER_LIST_BEFORE(ITEM) |
71 | | |
72 | 151k | #define SFPARSE_STATE_INITIAL 0x00u |
73 | | |
74 | | #define DIGIT_CASES \ |
75 | 94.8k | case '0': \ |
76 | 112k | case '1': \ |
77 | 128k | case '2': \ |
78 | 138k | case '3': \ |
79 | 150k | case '4': \ |
80 | 160k | case '5': \ |
81 | 170k | case '6': \ |
82 | 180k | case '7': \ |
83 | 192k | case '8': \ |
84 | 202k | case '9' |
85 | | |
86 | | #define LCALPHA_CASES \ |
87 | 257k | case 'a': \ |
88 | 269k | case 'b': \ |
89 | 285k | case 'c': \ |
90 | 292k | case 'd': \ |
91 | 311k | case 'e': \ |
92 | 321k | case 'f': \ |
93 | 332k | case 'g': \ |
94 | 345k | case 'h': \ |
95 | 363k | case 'i': \ |
96 | 372k | case 'j': \ |
97 | 380k | case 'k': \ |
98 | 390k | case 'l': \ |
99 | 397k | case 'm': \ |
100 | 408k | case 'n': \ |
101 | 420k | case 'o': \ |
102 | 429k | case 'p': \ |
103 | 438k | case 'q': \ |
104 | 449k | case 'r': \ |
105 | 471k | case 's': \ |
106 | 483k | case 't': \ |
107 | 492k | case 'u': \ |
108 | 501k | case 'v': \ |
109 | 510k | case 'w': \ |
110 | 519k | case 'x': \ |
111 | 532k | case 'y': \ |
112 | 540k | case 'z' |
113 | | |
114 | | #define UCALPHA_CASES \ |
115 | 107k | case 'A': \ |
116 | 111k | case 'B': \ |
117 | 114k | case 'C': \ |
118 | 117k | case 'D': \ |
119 | 120k | case 'E': \ |
120 | 123k | case 'F': \ |
121 | 125k | case 'G': \ |
122 | 129k | case 'H': \ |
123 | 132k | case 'I': \ |
124 | 135k | case 'J': \ |
125 | 138k | case 'K': \ |
126 | 141k | case 'L': \ |
127 | 144k | case 'M': \ |
128 | 147k | case 'N': \ |
129 | 150k | case 'O': \ |
130 | 155k | case 'P': \ |
131 | 158k | case 'Q': \ |
132 | 162k | case 'R': \ |
133 | 165k | case 'S': \ |
134 | 169k | case 'T': \ |
135 | 172k | case 'U': \ |
136 | 175k | case 'V': \ |
137 | 179k | case 'W': \ |
138 | 182k | case 'X': \ |
139 | 186k | case 'Y': \ |
140 | 188k | case 'Z' |
141 | | |
142 | | #define ALPHA_CASES \ |
143 | 59.6k | UCALPHA_CASES: \ |
144 | 61.9k | LCALPHA_CASES |
145 | | |
146 | | #define TOKEN_CASES \ |
147 | 883 | case '!': \ |
148 | 1.88k | case '#': \ |
149 | 2.57k | case '$': \ |
150 | 4.92k | case '%': \ |
151 | 6.26k | case '&': \ |
152 | 7.41k | case '\'': \ |
153 | 9.03k | case '*': \ |
154 | 10.3k | case '+': \ |
155 | 11.6k | case '-': \ |
156 | 14.0k | case '.': \ |
157 | 16.0k | case '/': \ |
158 | 48.6k | DIGIT_CASES: \ |
159 | 50.8k | case ':': \ |
160 | 83.5k | UCALPHA_CASES: \ |
161 | 84.0k | case '^': \ |
162 | 85.0k | case '_': \ |
163 | 85.7k | case '`': \ |
164 | 136k | LCALPHA_CASES: \ |
165 | 137k | case '|': \ |
166 | 137k | case '~' |
167 | | |
168 | | #define LCHEXALPHA_CASES \ |
169 | 2.50k | case 'a': \ |
170 | 5.23k | case 'b': \ |
171 | 8.12k | case 'c': \ |
172 | 11.5k | case 'd': \ |
173 | 13.3k | case 'e': \ |
174 | 14.8k | case 'f' |
175 | | |
176 | | #define X00_1F_CASES \ |
177 | 0 | case 0x00: \ |
178 | 0 | case 0x01: \ |
179 | 0 | case 0x02: \ |
180 | 0 | case 0x03: \ |
181 | 0 | case 0x04: \ |
182 | 0 | case 0x05: \ |
183 | 0 | case 0x06: \ |
184 | 0 | case 0x07: \ |
185 | 0 | case 0x08: \ |
186 | 132 | case 0x09: \ |
187 | 132 | case 0x0a: \ |
188 | 132 | case 0x0b: \ |
189 | 132 | case 0x0c: \ |
190 | 132 | case 0x0d: \ |
191 | 132 | case 0x0e: \ |
192 | 132 | case 0x0f: \ |
193 | 132 | case 0x10: \ |
194 | 132 | case 0x11: \ |
195 | 132 | case 0x12: \ |
196 | 132 | case 0x13: \ |
197 | 132 | case 0x14: \ |
198 | 132 | case 0x15: \ |
199 | 132 | case 0x16: \ |
200 | 132 | case 0x17: \ |
201 | 132 | case 0x18: \ |
202 | 132 | case 0x19: \ |
203 | 132 | case 0x1a: \ |
204 | 132 | case 0x1b: \ |
205 | 132 | case 0x1c: \ |
206 | 132 | case 0x1d: \ |
207 | 132 | case 0x1e: \ |
208 | 132 | case 0x1f |
209 | | |
210 | | #define X20_21_CASES \ |
211 | 909 | case ' ': \ |
212 | 1.52k | case '!' |
213 | | |
214 | | #define X23_5B_CASES \ |
215 | 2.26k | case '#': \ |
216 | 3.04k | case '$': \ |
217 | 5.71k | case '%': \ |
218 | 6.36k | case '&': \ |
219 | 7.02k | case '\'': \ |
220 | 7.76k | case '(': \ |
221 | 8.39k | case ')': \ |
222 | 9.10k | case '*': \ |
223 | 9.93k | case '+': \ |
224 | 10.7k | case ',': \ |
225 | 11.3k | case '-': \ |
226 | 12.2k | case '.': \ |
227 | 13.0k | case '/': \ |
228 | 24.1k | DIGIT_CASES: \ |
229 | 24.9k | case ':': \ |
230 | 25.6k | case ';': \ |
231 | 26.2k | case '<': \ |
232 | 27.0k | case '=': \ |
233 | 27.6k | case '>': \ |
234 | 28.4k | case '?': \ |
235 | 29.2k | case '@': \ |
236 | 45.7k | UCALPHA_CASES: \ |
237 | 46.3k | case '[' |
238 | | |
239 | | #define X5D_7E_CASES \ |
240 | 46.9k | case ']': \ |
241 | 47.7k | case '^': \ |
242 | 48.2k | case '_': \ |
243 | 48.7k | case '`': \ |
244 | 68.4k | LCALPHA_CASES: \ |
245 | 69.1k | case '{': \ |
246 | 69.6k | case '|': \ |
247 | 70.1k | case '}': \ |
248 | 70.7k | case '~' |
249 | | |
250 | | #define X7F_FF_CASES \ |
251 | 132 | case 0x7f: \ |
252 | 264 | case 0x80: \ |
253 | 396 | case 0x81: \ |
254 | 528 | case 0x82: \ |
255 | 660 | case 0x83: \ |
256 | 792 | case 0x84: \ |
257 | 924 | case 0x85: \ |
258 | 1.05k | case 0x86: \ |
259 | 1.18k | case 0x87: \ |
260 | 1.32k | case 0x88: \ |
261 | 1.45k | case 0x89: \ |
262 | 1.58k | case 0x8a: \ |
263 | 1.71k | case 0x8b: \ |
264 | 1.84k | case 0x8c: \ |
265 | 1.98k | case 0x8d: \ |
266 | 2.11k | case 0x8e: \ |
267 | 2.25k | case 0x8f: \ |
268 | 2.39k | case 0x90: \ |
269 | 2.52k | case 0x91: \ |
270 | 2.65k | case 0x92: \ |
271 | 2.78k | case 0x93: \ |
272 | 2.91k | case 0x94: \ |
273 | 3.05k | case 0x95: \ |
274 | 3.18k | case 0x96: \ |
275 | 3.31k | case 0x97: \ |
276 | 3.44k | case 0x98: \ |
277 | 3.58k | case 0x99: \ |
278 | 3.71k | case 0x9a: \ |
279 | 3.84k | case 0x9b: \ |
280 | 3.98k | case 0x9c: \ |
281 | 4.11k | case 0x9d: \ |
282 | 4.24k | case 0x9e: \ |
283 | 4.37k | case 0x9f: \ |
284 | 4.50k | case 0xa0: \ |
285 | 4.64k | case 0xa1: \ |
286 | 4.77k | case 0xa2: \ |
287 | 4.90k | case 0xa3: \ |
288 | 5.06k | case 0xa4: \ |
289 | 5.19k | case 0xa5: \ |
290 | 5.32k | case 0xa6: \ |
291 | 5.46k | case 0xa7: \ |
292 | 5.59k | case 0xa8: \ |
293 | 5.72k | case 0xa9: \ |
294 | 5.85k | case 0xaa: \ |
295 | 5.98k | case 0xab: \ |
296 | 6.12k | case 0xac: \ |
297 | 6.25k | case 0xad: \ |
298 | 6.38k | case 0xae: \ |
299 | 6.51k | case 0xaf: \ |
300 | 6.64k | case 0xb0: \ |
301 | 6.78k | case 0xb1: \ |
302 | 6.91k | case 0xb2: \ |
303 | 7.04k | case 0xb3: \ |
304 | 7.17k | case 0xb4: \ |
305 | 7.30k | case 0xb5: \ |
306 | 7.44k | case 0xb6: \ |
307 | 7.57k | case 0xb7: \ |
308 | 7.70k | case 0xb8: \ |
309 | 7.83k | case 0xb9: \ |
310 | 7.97k | case 0xba: \ |
311 | 8.11k | case 0xbb: \ |
312 | 8.24k | case 0xbc: \ |
313 | 8.37k | case 0xbd: \ |
314 | 8.50k | case 0xbe: \ |
315 | 8.66k | case 0xbf: \ |
316 | 8.79k | case 0xc0: \ |
317 | 8.93k | case 0xc1: \ |
318 | 9.09k | case 0xc2: \ |
319 | 9.22k | case 0xc3: \ |
320 | 9.36k | case 0xc4: \ |
321 | 9.49k | case 0xc5: \ |
322 | 9.63k | case 0xc6: \ |
323 | 9.76k | case 0xc7: \ |
324 | 9.89k | case 0xc8: \ |
325 | 10.0k | case 0xc9: \ |
326 | 10.1k | case 0xca: \ |
327 | 10.3k | case 0xcb: \ |
328 | 10.4k | case 0xcc: \ |
329 | 10.5k | case 0xcd: \ |
330 | 10.7k | case 0xce: \ |
331 | 10.8k | case 0xcf: \ |
332 | 10.9k | case 0xd0: \ |
333 | 11.0k | case 0xd1: \ |
334 | 11.2k | case 0xd2: \ |
335 | 11.3k | case 0xd3: \ |
336 | 11.4k | case 0xd4: \ |
337 | 11.6k | case 0xd5: \ |
338 | 11.7k | case 0xd6: \ |
339 | 11.8k | case 0xd7: \ |
340 | 12.0k | case 0xd8: \ |
341 | 12.1k | case 0xd9: \ |
342 | 12.3k | case 0xda: \ |
343 | 12.4k | case 0xdb: \ |
344 | 12.5k | case 0xdc: \ |
345 | 12.7k | case 0xdd: \ |
346 | 12.8k | case 0xde: \ |
347 | 12.9k | case 0xdf: \ |
348 | 13.1k | case 0xe0: \ |
349 | 13.2k | case 0xe1: \ |
350 | 13.3k | case 0xe2: \ |
351 | 13.5k | case 0xe3: \ |
352 | 13.6k | case 0xe4: \ |
353 | 13.7k | case 0xe5: \ |
354 | 13.8k | case 0xe6: \ |
355 | 14.0k | case 0xe7: \ |
356 | 14.1k | case 0xe8: \ |
357 | 14.3k | case 0xe9: \ |
358 | 14.4k | case 0xea: \ |
359 | 14.5k | case 0xeb: \ |
360 | 14.7k | case 0xec: \ |
361 | 14.8k | case 0xed: \ |
362 | 14.9k | case 0xee: \ |
363 | 15.1k | case 0xef: \ |
364 | 15.2k | case 0xf0: \ |
365 | 15.3k | case 0xf1: \ |
366 | 15.5k | case 0xf2: \ |
367 | 15.6k | case 0xf3: \ |
368 | 15.7k | case 0xf4: \ |
369 | 15.8k | case 0xf5: \ |
370 | 16.0k | case 0xf6: \ |
371 | 16.1k | case 0xf7: \ |
372 | 16.2k | case 0xf8: \ |
373 | 16.4k | case 0xf9: \ |
374 | 16.5k | case 0xfa: \ |
375 | 16.6k | case 0xfb: \ |
376 | 16.8k | case 0xfc: \ |
377 | 16.9k | case 0xfd: \ |
378 | 17.0k | case 0xfe: \ |
379 | 17.2k | case 0xff |
380 | | |
381 | 21.1k | static int is_ws(uint8_t c) { |
382 | 21.1k | switch (c) { |
383 | 598 | case ' ': |
384 | 1.40k | case '\t': |
385 | 1.40k | return 1; |
386 | 19.7k | default: |
387 | 19.7k | return 0; |
388 | 21.1k | } |
389 | 21.1k | } |
390 | | |
391 | | #ifdef __AVX2__ |
392 | | # ifdef _MSC_VER |
393 | | # include <intrin.h> |
394 | | |
395 | | static int ctz(unsigned int v) { |
396 | | unsigned long n; |
397 | | |
398 | | /* Assume that v is not 0. */ |
399 | | _BitScanForward(&n, v); |
400 | | |
401 | | return (int)n; |
402 | | } |
403 | | # else /* !_MSC_VER */ |
404 | | # define ctz __builtin_ctz |
405 | | # endif /* !_MSC_VER */ |
406 | | #endif /* __AVX2__ */ |
407 | | |
408 | 1.53M | static int parser_eof(sfparse_parser *sfp) { return sfp->pos == sfp->end; } |
409 | | |
410 | 69.3k | static void parser_discard_ows(sfparse_parser *sfp) { |
411 | 70.7k | for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos) |
412 | 1.40k | ; |
413 | 69.3k | } |
414 | | |
415 | 113k | static void parser_discard_sp(sfparse_parser *sfp) { |
416 | 128k | for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos) |
417 | 14.9k | ; |
418 | 113k | } |
419 | | |
420 | 174k | static void parser_set_op_state(sfparse_parser *sfp, uint32_t op) { |
421 | 174k | sfp->state &= ~SFPARSE_STATE_OP_MASK; |
422 | 174k | sfp->state |= op; |
423 | 174k | } |
424 | | |
425 | 4.76k | static void parser_unset_inner_list_state(sfparse_parser *sfp) { |
426 | 4.76k | sfp->state &= ~SFPARSE_STATE_INNER_LIST; |
427 | 4.76k | } |
428 | | |
429 | | #ifdef __AVX2__ |
430 | | static const uint8_t *find_char_key(const uint8_t *first, const uint8_t *last) { |
431 | | const __m256i us = _mm256_set1_epi8('_'); |
432 | | const __m256i ds = _mm256_set1_epi8('-'); |
433 | | const __m256i dot = _mm256_set1_epi8('.'); |
434 | | const __m256i ast = _mm256_set1_epi8('*'); |
435 | | const __m256i r0l = _mm256_set1_epi8('0' - 1); |
436 | | const __m256i r0r = _mm256_set1_epi8('9' + 1); |
437 | | const __m256i r1l = _mm256_set1_epi8('a' - 1); |
438 | | const __m256i r1r = _mm256_set1_epi8('z' + 1); |
439 | | __m256i s, x; |
440 | | uint32_t m; |
441 | | |
442 | | for (; first != last; first += 32) { |
443 | | s = _mm256_loadu_si256((void *)first); |
444 | | |
445 | | x = _mm256_cmpeq_epi8(s, us); |
446 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ds), x); |
447 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dot), x); |
448 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ast), x); |
449 | | x = _mm256_or_si256( |
450 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)), |
451 | | x); |
452 | | x = _mm256_or_si256( |
453 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)), |
454 | | x); |
455 | | |
456 | | m = ~(uint32_t)_mm256_movemask_epi8(x); |
457 | | if (m) { |
458 | | return first + ctz(m); |
459 | | } |
460 | | } |
461 | | |
462 | | return last; |
463 | | } |
464 | | #endif /* __AVX2__ */ |
465 | | |
466 | 104k | static int parser_key(sfparse_parser *sfp, sfparse_vec *dest) { |
467 | 104k | const uint8_t *base; |
468 | | #ifdef __AVX2__ |
469 | | const uint8_t *last; |
470 | | #endif /* __AVX2__ */ |
471 | | |
472 | 104k | switch (*sfp->pos) { |
473 | 5.28k | case '*': |
474 | 103k | LCALPHA_CASES: |
475 | 103k | break; |
476 | 300 | default: |
477 | 300 | return SFPARSE_ERR_PARSE; |
478 | 104k | } |
479 | | |
480 | 103k | base = sfp->pos++; |
481 | | |
482 | | #ifdef __AVX2__ |
483 | | if (sfp->end - sfp->pos >= 32) { |
484 | | last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); |
485 | | |
486 | | sfp->pos = find_char_key(sfp->pos, last); |
487 | | if (sfp->pos != last) { |
488 | | goto fin; |
489 | | } |
490 | | } |
491 | | #endif /* __AVX2__ */ |
492 | | |
493 | 229k | for (; !parser_eof(sfp); ++sfp->pos) { |
494 | 222k | switch (*sfp->pos) { |
495 | 777 | case '_': |
496 | 3.98k | case '-': |
497 | 6.11k | case '.': |
498 | 9.66k | case '*': |
499 | 313k | DIGIT_CASES: |
500 | 2.17M | LCALPHA_CASES: |
501 | 2.17M | continue; |
502 | 222k | } |
503 | | |
504 | 96.6k | break; |
505 | 222k | } |
506 | | |
507 | | #ifdef __AVX2__ |
508 | | fin: |
509 | | #endif /* __AVX2__ */ |
510 | 103k | if (dest) { |
511 | 84.6k | dest->base = (uint8_t *)base; |
512 | 84.6k | dest->len = (size_t)(sfp->pos - dest->base); |
513 | 84.6k | } |
514 | | |
515 | 103k | return 0; |
516 | 103k | } |
517 | | |
518 | 14.0k | static int parser_number(sfparse_parser *sfp, sfparse_value *dest) { |
519 | 14.0k | int sign = 1; |
520 | 14.0k | int64_t value = 0; |
521 | 14.0k | size_t len = 0; |
522 | 14.0k | size_t fpos = 0; |
523 | | |
524 | 14.0k | if (*sfp->pos == '-') { |
525 | 938 | ++sfp->pos; |
526 | 938 | if (parser_eof(sfp)) { |
527 | 134 | return SFPARSE_ERR_PARSE; |
528 | 134 | } |
529 | | |
530 | 804 | sign = -1; |
531 | 804 | } |
532 | | |
533 | 13.8k | assert(!parser_eof(sfp)); |
534 | | |
535 | 38.2k | for (; !parser_eof(sfp); ++sfp->pos) { |
536 | 34.6k | switch (*sfp->pos) { |
537 | 171k | DIGIT_CASES: |
538 | 171k | if (++len > 15) { |
539 | 132 | return SFPARSE_ERR_PARSE; |
540 | 132 | } |
541 | | |
542 | 24.3k | value *= 10; |
543 | 24.3k | value += *sfp->pos - '0'; |
544 | | |
545 | 24.3k | continue; |
546 | 34.6k | } |
547 | | |
548 | 10.1k | break; |
549 | 34.6k | } |
550 | | |
551 | 13.7k | if (len == 0) { |
552 | 165 | return SFPARSE_ERR_PARSE; |
553 | 165 | } |
554 | | |
555 | 13.5k | if (parser_eof(sfp) || *sfp->pos != '.') { |
556 | 10.1k | if (dest) { |
557 | 5.53k | dest->type = SFPARSE_TYPE_INTEGER; |
558 | 5.53k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
559 | 5.53k | dest->integer = value * sign; |
560 | 5.53k | } |
561 | | |
562 | 10.1k | return 0; |
563 | 10.1k | } |
564 | | |
565 | | /* decimal */ |
566 | | |
567 | 3.40k | if (len > 12) { |
568 | 132 | return SFPARSE_ERR_PARSE; |
569 | 132 | } |
570 | | |
571 | 3.26k | fpos = len; |
572 | | |
573 | 3.26k | ++sfp->pos; |
574 | | |
575 | 10.3k | for (; !parser_eof(sfp); ++sfp->pos) { |
576 | 8.87k | switch (*sfp->pos) { |
577 | 59.7k | DIGIT_CASES: |
578 | 59.7k | if (++len > 15) { |
579 | 132 | return SFPARSE_ERR_PARSE; |
580 | 132 | } |
581 | | |
582 | 7.05k | value *= 10; |
583 | 7.05k | value += *sfp->pos - '0'; |
584 | | |
585 | 7.05k | continue; |
586 | 8.87k | } |
587 | | |
588 | 1.68k | break; |
589 | 8.87k | } |
590 | | |
591 | 3.13k | if (fpos == len || len - fpos > 3) { |
592 | 345 | return SFPARSE_ERR_PARSE; |
593 | 345 | } |
594 | | |
595 | 2.79k | if (dest) { |
596 | 1.76k | dest->type = SFPARSE_TYPE_DECIMAL; |
597 | 1.76k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
598 | 1.76k | dest->decimal.numer = value * sign; |
599 | | |
600 | 1.76k | switch (len - fpos) { |
601 | 785 | case 1: |
602 | 785 | dest->decimal.denom = 10; |
603 | | |
604 | 785 | break; |
605 | 503 | case 2: |
606 | 503 | dest->decimal.denom = 100; |
607 | | |
608 | 503 | break; |
609 | 475 | case 3: |
610 | 475 | dest->decimal.denom = 1000; |
611 | | |
612 | 475 | break; |
613 | 1.76k | } |
614 | 1.76k | } |
615 | | |
616 | 2.79k | return 0; |
617 | 2.79k | } |
618 | | |
619 | 1.55k | static int parser_date(sfparse_parser *sfp, sfparse_value *dest) { |
620 | 1.55k | int rv; |
621 | 1.55k | sfparse_value val; |
622 | | |
623 | | /* The first byte has already been validated by the caller. */ |
624 | 1.55k | assert('@' == *sfp->pos); |
625 | | |
626 | 1.55k | ++sfp->pos; |
627 | | |
628 | 1.55k | if (parser_eof(sfp)) { |
629 | 133 | return SFPARSE_ERR_PARSE; |
630 | 133 | } |
631 | | |
632 | 1.42k | rv = parser_number(sfp, &val); |
633 | 1.42k | if (rv != 0) { |
634 | 219 | return rv; |
635 | 219 | } |
636 | | |
637 | 1.20k | if (val.type != SFPARSE_TYPE_INTEGER) { |
638 | 101 | return SFPARSE_ERR_PARSE; |
639 | 101 | } |
640 | | |
641 | 1.10k | if (dest) { |
642 | 528 | *dest = val; |
643 | 528 | dest->type = SFPARSE_TYPE_DATE; |
644 | 528 | } |
645 | | |
646 | 1.10k | return 0; |
647 | 1.20k | } |
648 | | |
649 | | #ifdef __AVX2__ |
650 | | static const uint8_t *find_char_string(const uint8_t *first, |
651 | | const uint8_t *last) { |
652 | | const __m256i bs = _mm256_set1_epi8('\\'); |
653 | | const __m256i dq = _mm256_set1_epi8('"'); |
654 | | const __m256i del = _mm256_set1_epi8(0x7f); |
655 | | const __m256i sp = _mm256_set1_epi8(' '); |
656 | | __m256i s, x; |
657 | | uint32_t m; |
658 | | |
659 | | for (; first != last; first += 32) { |
660 | | s = _mm256_loadu_si256((void *)first); |
661 | | |
662 | | x = _mm256_cmpgt_epi8(sp, s); |
663 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, bs), x); |
664 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dq), x); |
665 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, del), x); |
666 | | |
667 | | m = (uint32_t)_mm256_movemask_epi8(x); |
668 | | if (m) { |
669 | | return first + ctz(m); |
670 | | } |
671 | | } |
672 | | |
673 | | return last; |
674 | | } |
675 | | #endif /* __AVX2__ */ |
676 | | |
677 | 7.81k | static int parser_string(sfparse_parser *sfp, sfparse_value *dest) { |
678 | 7.81k | const uint8_t *base; |
679 | | #ifdef __AVX2__ |
680 | | const uint8_t *last; |
681 | | #endif /* __AVX2__ */ |
682 | 7.81k | uint32_t flags = SFPARSE_VALUE_FLAG_NONE; |
683 | | |
684 | | /* The first byte has already been validated by the caller. */ |
685 | 7.81k | assert('"' == *sfp->pos); |
686 | | |
687 | 7.81k | base = ++sfp->pos; |
688 | | |
689 | | #ifdef __AVX2__ |
690 | | for (; sfp->end - sfp->pos >= 32; ++sfp->pos) { |
691 | | last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); |
692 | | |
693 | | sfp->pos = find_char_string(sfp->pos, last); |
694 | | if (sfp->pos == last) { |
695 | | break; |
696 | | } |
697 | | |
698 | | switch (*sfp->pos) { |
699 | | case '\\': |
700 | | ++sfp->pos; |
701 | | if (parser_eof(sfp)) { |
702 | | return SFPARSE_ERR_PARSE; |
703 | | } |
704 | | |
705 | | switch (*sfp->pos) { |
706 | | case '"': |
707 | | case '\\': |
708 | | flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING; |
709 | | |
710 | | break; |
711 | | default: |
712 | | return SFPARSE_ERR_PARSE; |
713 | | } |
714 | | |
715 | | break; |
716 | | case '"': |
717 | | goto fin; |
718 | | default: |
719 | | return SFPARSE_ERR_PARSE; |
720 | | } |
721 | | } |
722 | | #endif /* __AVX2__ */ |
723 | | |
724 | 79.4k | for (; !parser_eof(sfp); ++sfp->pos) { |
725 | 78.5k | switch (*sfp->pos) { |
726 | 2.43k | X20_21_CASES: |
727 | 1.53M | X23_5B_CASES: |
728 | 1.53M | X5D_7E_CASES: |
729 | 70.7k | break; |
730 | 1.14k | case '\\': |
731 | 1.14k | ++sfp->pos; |
732 | 1.14k | if (parser_eof(sfp)) { |
733 | 140 | return SFPARSE_ERR_PARSE; |
734 | 140 | } |
735 | | |
736 | 1.00k | switch (*sfp->pos) { |
737 | 442 | case '"': |
738 | 875 | case '\\': |
739 | 875 | flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING; |
740 | | |
741 | 875 | break; |
742 | 133 | default: |
743 | 133 | return SFPARSE_ERR_PARSE; |
744 | 1.00k | } |
745 | | |
746 | 875 | break; |
747 | 6.45k | case '"': |
748 | 6.45k | goto fin; |
749 | 198 | default: |
750 | 198 | return SFPARSE_ERR_PARSE; |
751 | 78.5k | } |
752 | 78.5k | } |
753 | | |
754 | 881 | return SFPARSE_ERR_PARSE; |
755 | | |
756 | 6.45k | fin: |
757 | 6.45k | if (dest) { |
758 | 5.63k | dest->type = SFPARSE_TYPE_STRING; |
759 | 5.63k | dest->flags = flags; |
760 | 5.63k | dest->vec.len = (size_t)(sfp->pos - base); |
761 | 5.63k | dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; |
762 | 5.63k | } |
763 | | |
764 | 6.45k | ++sfp->pos; |
765 | | |
766 | 6.45k | return 0; |
767 | 7.81k | } |
768 | | |
769 | | #ifdef __AVX2__ |
770 | | static const uint8_t *find_char_token(const uint8_t *first, |
771 | | const uint8_t *last) { |
772 | | /* r0: !..:, excluding "(), |
773 | | r1: A..Z |
774 | | r2: ^..~, excluding {} */ |
775 | | const __m256i r0l = _mm256_set1_epi8('!' - 1); |
776 | | const __m256i r0r = _mm256_set1_epi8(':' + 1); |
777 | | const __m256i dq = _mm256_set1_epi8('"'); |
778 | | const __m256i prl = _mm256_set1_epi8('('); |
779 | | const __m256i prr = _mm256_set1_epi8(')'); |
780 | | const __m256i comma = _mm256_set1_epi8(','); |
781 | | const __m256i r1l = _mm256_set1_epi8('A' - 1); |
782 | | const __m256i r1r = _mm256_set1_epi8('Z' + 1); |
783 | | const __m256i r2l = _mm256_set1_epi8('^' - 1); |
784 | | const __m256i r2r = _mm256_set1_epi8('~' + 1); |
785 | | const __m256i cbl = _mm256_set1_epi8('{'); |
786 | | const __m256i cbr = _mm256_set1_epi8('}'); |
787 | | __m256i s, x; |
788 | | uint32_t m; |
789 | | |
790 | | for (; first != last; first += 32) { |
791 | | s = _mm256_loadu_si256((void *)first); |
792 | | |
793 | | x = _mm256_andnot_si256( |
794 | | _mm256_cmpeq_epi8(s, comma), |
795 | | _mm256_andnot_si256( |
796 | | _mm256_cmpeq_epi8(s, prr), |
797 | | _mm256_andnot_si256( |
798 | | _mm256_cmpeq_epi8(s, prl), |
799 | | _mm256_andnot_si256(_mm256_cmpeq_epi8(s, dq), |
800 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), |
801 | | _mm256_cmpgt_epi8(r0r, s)))))); |
802 | | x = _mm256_or_si256( |
803 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)), |
804 | | x); |
805 | | x = _mm256_or_si256( |
806 | | _mm256_andnot_si256( |
807 | | _mm256_cmpeq_epi8(s, cbr), |
808 | | _mm256_andnot_si256(_mm256_cmpeq_epi8(s, cbl), |
809 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), |
810 | | _mm256_cmpgt_epi8(r2r, s)))), |
811 | | x); |
812 | | |
813 | | m = ~(uint32_t)_mm256_movemask_epi8(x); |
814 | | if (m) { |
815 | | return first + ctz(m); |
816 | | } |
817 | | } |
818 | | |
819 | | return last; |
820 | | } |
821 | | #endif /* __AVX2__ */ |
822 | | |
823 | 34.6k | static int parser_token(sfparse_parser *sfp, sfparse_value *dest) { |
824 | 34.6k | const uint8_t *base; |
825 | | #ifdef __AVX2__ |
826 | | const uint8_t *last; |
827 | | #endif /* __AVX2__ */ |
828 | | |
829 | | /* The first byte has already been validated by the caller. */ |
830 | 34.6k | base = sfp->pos++; |
831 | | |
832 | | #ifdef __AVX2__ |
833 | | if (sfp->end - sfp->pos >= 32) { |
834 | | last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); |
835 | | |
836 | | sfp->pos = find_char_token(sfp->pos, last); |
837 | | if (sfp->pos != last) { |
838 | | goto fin; |
839 | | } |
840 | | } |
841 | | #endif /* __AVX2__ */ |
842 | | |
843 | 172k | for (; !parser_eof(sfp); ++sfp->pos) { |
844 | 158k | switch (*sfp->pos) { |
845 | 5.75M | TOKEN_CASES: |
846 | 5.75M | continue; |
847 | 158k | } |
848 | | |
849 | 20.1k | break; |
850 | 158k | } |
851 | | |
852 | | #ifdef __AVX2__ |
853 | | fin: |
854 | | #endif /* __AVX2__ */ |
855 | 34.6k | if (dest) { |
856 | 16.2k | dest->type = SFPARSE_TYPE_TOKEN; |
857 | 16.2k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
858 | 16.2k | dest->vec.base = (uint8_t *)base; |
859 | 16.2k | dest->vec.len = (size_t)(sfp->pos - base); |
860 | 16.2k | } |
861 | | |
862 | 34.6k | return 0; |
863 | 34.6k | } |
864 | | |
865 | | #ifdef __AVX2__ |
866 | | static const uint8_t *find_char_byteseq(const uint8_t *first, |
867 | | const uint8_t *last) { |
868 | | const __m256i pls = _mm256_set1_epi8('+'); |
869 | | const __m256i fs = _mm256_set1_epi8('/'); |
870 | | const __m256i r0l = _mm256_set1_epi8('0' - 1); |
871 | | const __m256i r0r = _mm256_set1_epi8('9' + 1); |
872 | | const __m256i r1l = _mm256_set1_epi8('A' - 1); |
873 | | const __m256i r1r = _mm256_set1_epi8('Z' + 1); |
874 | | const __m256i r2l = _mm256_set1_epi8('a' - 1); |
875 | | const __m256i r2r = _mm256_set1_epi8('z' + 1); |
876 | | __m256i s, x; |
877 | | uint32_t m; |
878 | | |
879 | | for (; first != last; first += 32) { |
880 | | s = _mm256_loadu_si256((void *)first); |
881 | | |
882 | | x = _mm256_cmpeq_epi8(s, pls); |
883 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, fs), x); |
884 | | x = _mm256_or_si256( |
885 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)), |
886 | | x); |
887 | | x = _mm256_or_si256( |
888 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)), |
889 | | x); |
890 | | x = _mm256_or_si256( |
891 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), _mm256_cmpgt_epi8(r2r, s)), |
892 | | x); |
893 | | |
894 | | m = ~(uint32_t)_mm256_movemask_epi8(x); |
895 | | if (m) { |
896 | | return first + ctz(m); |
897 | | } |
898 | | } |
899 | | |
900 | | return last; |
901 | | } |
902 | | #endif /* __AVX2__ */ |
903 | | |
904 | 8.41k | static int parser_byteseq(sfparse_parser *sfp, sfparse_value *dest) { |
905 | 8.41k | const uint8_t *base; |
906 | | #ifdef __AVX2__ |
907 | | const uint8_t *last; |
908 | | #endif /* __AVX2__ */ |
909 | | |
910 | | /* The first byte has already been validated by the caller. */ |
911 | 8.41k | assert(':' == *sfp->pos); |
912 | | |
913 | 8.41k | base = ++sfp->pos; |
914 | | |
915 | | #ifdef __AVX2__ |
916 | | if (sfp->end - sfp->pos >= 32) { |
917 | | last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); |
918 | | sfp->pos = find_char_byteseq(sfp->pos, last); |
919 | | } |
920 | | #endif /* __AVX2__ */ |
921 | | |
922 | 79.6k | for (; !parser_eof(sfp); ++sfp->pos) { |
923 | 79.0k | switch (*sfp->pos) { |
924 | 920 | case '+': |
925 | 1.98k | case '/': |
926 | 171k | DIGIT_CASES: |
927 | 171k | ALPHA_CASES: |
928 | 71.2k | continue; |
929 | 2.26k | case '=': |
930 | 2.26k | switch ((sfp->pos - base) & 0x3) { |
931 | 133 | case 0: |
932 | 265 | case 1: |
933 | 265 | return SFPARSE_ERR_PARSE; |
934 | 1.21k | case 2: |
935 | 1.21k | ++sfp->pos; |
936 | | |
937 | 1.21k | if (parser_eof(sfp)) { |
938 | 132 | return SFPARSE_ERR_PARSE; |
939 | 132 | } |
940 | | |
941 | 1.08k | if (*sfp->pos == '=') { |
942 | 484 | ++sfp->pos; |
943 | 484 | } |
944 | | |
945 | 1.08k | break; |
946 | 788 | case 3: |
947 | 788 | ++sfp->pos; |
948 | | |
949 | 788 | break; |
950 | 2.26k | } |
951 | | |
952 | 1.86k | if (parser_eof(sfp) || *sfp->pos != ':') { |
953 | 274 | return SFPARSE_ERR_PARSE; |
954 | 274 | } |
955 | | |
956 | 1.59k | goto fin; |
957 | 5.37k | case ':': |
958 | 5.37k | if (((sfp->pos - base) & 0x3) == 1) { |
959 | 132 | return SFPARSE_ERR_PARSE; |
960 | 132 | } |
961 | | |
962 | 5.24k | goto fin; |
963 | 5.24k | default: |
964 | 179 | return SFPARSE_ERR_PARSE; |
965 | 79.0k | } |
966 | 79.0k | } |
967 | | |
968 | 594 | return SFPARSE_ERR_PARSE; |
969 | | |
970 | 6.83k | fin: |
971 | 6.83k | if (dest) { |
972 | 6.34k | dest->type = SFPARSE_TYPE_BYTESEQ; |
973 | 6.34k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
974 | 6.34k | dest->vec.len = (size_t)(sfp->pos - base); |
975 | 6.34k | dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; |
976 | 6.34k | } |
977 | | |
978 | 6.83k | ++sfp->pos; |
979 | | |
980 | 6.83k | return 0; |
981 | 8.41k | } |
982 | | |
983 | 2.00k | static int parser_boolean(sfparse_parser *sfp, sfparse_value *dest) { |
984 | 2.00k | int b; |
985 | | |
986 | | /* The first byte has already been validated by the caller. */ |
987 | 2.00k | assert('?' == *sfp->pos); |
988 | | |
989 | 2.00k | ++sfp->pos; |
990 | | |
991 | 2.00k | if (parser_eof(sfp)) { |
992 | 132 | return SFPARSE_ERR_PARSE; |
993 | 132 | } |
994 | | |
995 | 1.86k | switch (*sfp->pos) { |
996 | 1.01k | case '0': |
997 | 1.01k | b = 0; |
998 | | |
999 | 1.01k | break; |
1000 | 727 | case '1': |
1001 | 727 | b = 1; |
1002 | | |
1003 | 727 | break; |
1004 | 132 | default: |
1005 | 132 | return SFPARSE_ERR_PARSE; |
1006 | 1.86k | } |
1007 | | |
1008 | 1.73k | ++sfp->pos; |
1009 | | |
1010 | 1.73k | if (dest) { |
1011 | 805 | dest->type = SFPARSE_TYPE_BOOLEAN; |
1012 | 805 | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
1013 | 805 | dest->boolean = b; |
1014 | 805 | } |
1015 | | |
1016 | 1.73k | return 0; |
1017 | 1.86k | } |
1018 | | |
1019 | 18.9k | static int pctdecode(uint8_t *pc, const uint8_t **ppos) { |
1020 | 18.9k | uint8_t c, b = **ppos; |
1021 | | |
1022 | 18.9k | switch (b) { |
1023 | 11.5k | DIGIT_CASES: |
1024 | 11.5k | c = (uint8_t)((b - '0') << 4); |
1025 | | |
1026 | 11.5k | break; |
1027 | 7.25k | LCHEXALPHA_CASES: |
1028 | 7.25k | c = (uint8_t)((b - 'a' + 10) << 4); |
1029 | | |
1030 | 7.25k | break; |
1031 | 146 | default: |
1032 | 146 | return -1; |
1033 | 18.9k | } |
1034 | | |
1035 | 18.8k | b = *++*ppos; |
1036 | | |
1037 | 18.8k | switch (b) { |
1038 | 11.0k | DIGIT_CASES: |
1039 | 11.0k | c |= (uint8_t)(b - '0'); |
1040 | | |
1041 | 11.0k | break; |
1042 | 7.62k | LCHEXALPHA_CASES: |
1043 | 7.62k | c |= (uint8_t)(b - 'a' + 10); |
1044 | | |
1045 | 7.62k | break; |
1046 | 145 | default: |
1047 | 145 | return -1; |
1048 | 18.8k | } |
1049 | | |
1050 | 18.6k | *pc = c; |
1051 | 18.6k | ++*ppos; |
1052 | | |
1053 | 18.6k | return 0; |
1054 | 18.8k | } |
1055 | | |
1056 | | /* Start of utf8 dfa */ |
1057 | | /* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de> |
1058 | | * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. |
1059 | | * |
1060 | | * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> |
1061 | | * |
1062 | | * Permission is hereby granted, free of charge, to any person |
1063 | | * obtaining a copy of this software and associated documentation |
1064 | | * files (the "Software"), to deal in the Software without |
1065 | | * restriction, including without limitation the rights to use, copy, |
1066 | | * modify, merge, publish, distribute, sublicense, and/or sell copies |
1067 | | * of the Software, and to permit persons to whom the Software is |
1068 | | * furnished to do so, subject to the following conditions: |
1069 | | * |
1070 | | * The above copyright notice and this permission notice shall be |
1071 | | * included in all copies or substantial portions of the Software. |
1072 | | * |
1073 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
1074 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
1075 | | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
1076 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
1077 | | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
1078 | | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
1079 | | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
1080 | | * SOFTWARE. |
1081 | | */ |
1082 | 175k | #define UTF8_ACCEPT 0 |
1083 | 18.6k | #define UTF8_REJECT 12 |
1084 | | |
1085 | | /* clang-format off */ |
1086 | | static const uint8_t utf8d[] = { |
1087 | | /* |
1088 | | * The first part of the table maps bytes to character classes that |
1089 | | * to reduce the size of the transition table and create bitmasks. |
1090 | | */ |
1091 | | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
1092 | | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
1093 | | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
1094 | | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
1095 | | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, |
1096 | | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, |
1097 | | 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
1098 | | 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, |
1099 | | |
1100 | | /* |
1101 | | * The second part is a transition table that maps a combination |
1102 | | * of a state of the automaton and a character class to a state. |
1103 | | */ |
1104 | | 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, |
1105 | | 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, |
1106 | | 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, |
1107 | | 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, |
1108 | | 12,36,12,12,12,12,12,12,12,12,12,12, |
1109 | | }; |
1110 | | /* clang-format on */ |
1111 | | |
1112 | 18.6k | static void utf8_decode(uint32_t *state, uint8_t byte) { |
1113 | 18.6k | *state = utf8d[256 + *state + utf8d[byte]]; |
1114 | 18.6k | } |
1115 | | |
1116 | | /* End of utf8 dfa */ |
1117 | | |
1118 | 25.2k | static int parser_dispstring(sfparse_parser *sfp, sfparse_value *dest) { |
1119 | 25.2k | const uint8_t *base; |
1120 | 25.2k | uint8_t c; |
1121 | 25.2k | uint32_t utf8state = UTF8_ACCEPT; |
1122 | | |
1123 | 25.2k | assert('%' == *sfp->pos); |
1124 | | |
1125 | 25.2k | ++sfp->pos; |
1126 | | |
1127 | 25.2k | if (parser_eof(sfp) || *sfp->pos != '"') { |
1128 | 288 | return SFPARSE_ERR_PARSE; |
1129 | 288 | } |
1130 | | |
1131 | 24.9k | base = ++sfp->pos; |
1132 | | |
1133 | 186k | for (; !parser_eof(sfp);) { |
1134 | 186k | switch (*sfp->pos) { |
1135 | 3.03k | X00_1F_CASES: |
1136 | 17.2k | X7F_FF_CASES: |
1137 | 17.2k | return SFPARSE_ERR_PARSE; |
1138 | 19.1k | case '%': |
1139 | 19.1k | ++sfp->pos; |
1140 | | |
1141 | 19.1k | if (sfp->pos + 2 > sfp->end) { |
1142 | 165 | return SFPARSE_ERR_PARSE; |
1143 | 165 | } |
1144 | | |
1145 | 18.9k | if (pctdecode(&c, &sfp->pos) != 0) { |
1146 | 291 | return SFPARSE_ERR_PARSE; |
1147 | 291 | } |
1148 | | |
1149 | 18.6k | utf8_decode(&utf8state, c); |
1150 | 18.6k | if (utf8state == UTF8_REJECT) { |
1151 | 217 | return SFPARSE_ERR_PARSE; |
1152 | 217 | } |
1153 | | |
1154 | 18.4k | break; |
1155 | 18.4k | case '"': |
1156 | 6.62k | if (utf8state != UTF8_ACCEPT) { |
1157 | 137 | return SFPARSE_ERR_PARSE; |
1158 | 137 | } |
1159 | | |
1160 | 6.49k | if (dest) { |
1161 | 5.24k | dest->type = SFPARSE_TYPE_DISPSTRING; |
1162 | 5.24k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
1163 | 5.24k | dest->vec.len = (size_t)(sfp->pos - base); |
1164 | 5.24k | dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; |
1165 | 5.24k | } |
1166 | | |
1167 | 6.49k | ++sfp->pos; |
1168 | | |
1169 | 6.49k | return 0; |
1170 | 143k | default: |
1171 | 143k | if (utf8state != UTF8_ACCEPT) { |
1172 | 164 | return SFPARSE_ERR_PARSE; |
1173 | 164 | } |
1174 | | |
1175 | 143k | ++sfp->pos; |
1176 | 186k | } |
1177 | 186k | } |
1178 | | |
1179 | 280 | return SFPARSE_ERR_PARSE; |
1180 | 24.9k | } |
1181 | | |
1182 | 92.4k | static int parser_bare_item(sfparse_parser *sfp, sfparse_value *dest) { |
1183 | 92.4k | switch (*sfp->pos) { |
1184 | 7.81k | case '"': |
1185 | 7.81k | return parser_string(sfp, dest); |
1186 | 906 | case '-': |
1187 | 12.5k | DIGIT_CASES: |
1188 | 12.5k | return parser_number(sfp, dest); |
1189 | 1.55k | case '@': |
1190 | 1.55k | return parser_date(sfp, dest); |
1191 | 8.41k | case ':': |
1192 | 8.41k | return parser_byteseq(sfp, dest); |
1193 | 2.00k | case '?': |
1194 | 2.00k | return parser_boolean(sfp, dest); |
1195 | 824 | case '*': |
1196 | 34.6k | ALPHA_CASES: |
1197 | 34.6k | return parser_token(sfp, dest); |
1198 | 25.2k | case '%': |
1199 | 25.2k | return parser_dispstring(sfp, dest); |
1200 | 153 | default: |
1201 | 153 | return SFPARSE_ERR_PARSE; |
1202 | 92.4k | } |
1203 | 92.4k | } |
1204 | | |
1205 | | static int parser_skip_inner_list(sfparse_parser *sfp); |
1206 | | |
1207 | | int sfparse_parser_param(sfparse_parser *sfp, sfparse_vec *dest_key, |
1208 | 95.6k | sfparse_value *dest_value) { |
1209 | 95.6k | int rv; |
1210 | | |
1211 | 95.6k | switch (sfp->state & SFPARSE_STATE_OP_MASK) { |
1212 | 0 | case SFPARSE_STATE_BEFORE: |
1213 | 0 | rv = parser_skip_inner_list(sfp); |
1214 | 0 | if (rv != 0) { |
1215 | 0 | return rv; |
1216 | 0 | } |
1217 | | |
1218 | | /* fall through */ |
1219 | 76.7k | case SFPARSE_STATE_BEFORE_PARAMS: |
1220 | 76.7k | parser_set_op_state(sfp, SFPARSE_STATE_PARAMS); |
1221 | | |
1222 | 76.7k | break; |
1223 | 18.8k | case SFPARSE_STATE_PARAMS: |
1224 | 18.8k | break; |
1225 | 0 | default: |
1226 | 0 | assert(0); |
1227 | 0 | abort(); |
1228 | 95.6k | } |
1229 | | |
1230 | 95.6k | if (parser_eof(sfp) || *sfp->pos != ';') { |
1231 | 76.1k | parser_set_op_state(sfp, SFPARSE_STATE_AFTER); |
1232 | | |
1233 | 76.1k | return SFPARSE_ERR_EOF; |
1234 | 76.1k | } |
1235 | | |
1236 | 19.4k | ++sfp->pos; |
1237 | | |
1238 | 19.4k | parser_discard_sp(sfp); |
1239 | 19.4k | if (parser_eof(sfp)) { |
1240 | 145 | return SFPARSE_ERR_PARSE; |
1241 | 145 | } |
1242 | | |
1243 | 19.3k | rv = parser_key(sfp, dest_key); |
1244 | 19.3k | if (rv != 0) { |
1245 | 143 | return rv; |
1246 | 143 | } |
1247 | | |
1248 | 19.1k | if (parser_eof(sfp) || *sfp->pos != '=') { |
1249 | 7.54k | if (dest_value) { |
1250 | 0 | dest_value->type = SFPARSE_TYPE_BOOLEAN; |
1251 | 0 | dest_value->flags = SFPARSE_VALUE_FLAG_NONE; |
1252 | 0 | dest_value->boolean = 1; |
1253 | 0 | } |
1254 | | |
1255 | 7.54k | return 0; |
1256 | 7.54k | } |
1257 | | |
1258 | 11.6k | ++sfp->pos; |
1259 | | |
1260 | 11.6k | if (parser_eof(sfp)) { |
1261 | 132 | return SFPARSE_ERR_PARSE; |
1262 | 132 | } |
1263 | | |
1264 | 11.5k | return parser_bare_item(sfp, dest_value); |
1265 | 11.6k | } |
1266 | | |
1267 | 76.7k | static int parser_skip_params(sfparse_parser *sfp) { |
1268 | 76.7k | int rv; |
1269 | | |
1270 | 95.6k | for (;;) { |
1271 | 95.6k | rv = sfparse_parser_param(sfp, NULL, NULL); |
1272 | 95.6k | switch (rv) { |
1273 | 18.8k | case 0: |
1274 | 18.8k | break; |
1275 | 76.1k | case SFPARSE_ERR_EOF: |
1276 | 76.1k | return 0; |
1277 | 598 | case SFPARSE_ERR_PARSE: |
1278 | 598 | return rv; |
1279 | 0 | default: |
1280 | 0 | assert(0); |
1281 | 0 | abort(); |
1282 | 95.6k | } |
1283 | 95.6k | } |
1284 | 76.7k | } |
1285 | | |
1286 | 22.9k | int sfparse_parser_inner_list(sfparse_parser *sfp, sfparse_value *dest) { |
1287 | 22.9k | int rv; |
1288 | | |
1289 | 22.9k | switch (sfp->state & SFPARSE_STATE_OP_MASK) { |
1290 | 6.20k | case SFPARSE_STATE_BEFORE: |
1291 | 6.20k | parser_discard_sp(sfp); |
1292 | 6.20k | if (parser_eof(sfp)) { |
1293 | 146 | return SFPARSE_ERR_PARSE; |
1294 | 146 | } |
1295 | | |
1296 | 6.05k | break; |
1297 | 16.7k | case SFPARSE_STATE_BEFORE_PARAMS: |
1298 | 16.7k | rv = parser_skip_params(sfp); |
1299 | 16.7k | if (rv != 0) { |
1300 | 219 | return rv; |
1301 | 219 | } |
1302 | | |
1303 | | /* Technically, we are entering SFPARSE_STATE_AFTER, but we will set |
1304 | | another state without reading the state. */ |
1305 | | /* parser_set_op_state(sfp, SFPARSE_STATE_AFTER); */ |
1306 | | |
1307 | | /* fall through */ |
1308 | 16.5k | case SFPARSE_STATE_AFTER: |
1309 | 16.5k | if (parser_eof(sfp)) { |
1310 | 149 | return SFPARSE_ERR_PARSE; |
1311 | 149 | } |
1312 | | |
1313 | 16.4k | switch (*sfp->pos) { |
1314 | 11.8k | case ' ': |
1315 | 11.8k | parser_discard_sp(sfp); |
1316 | 11.8k | if (parser_eof(sfp)) { |
1317 | 0 | return SFPARSE_ERR_PARSE; |
1318 | 0 | } |
1319 | | |
1320 | 11.8k | break; |
1321 | 11.8k | case ')': |
1322 | 4.38k | break; |
1323 | 138 | default: |
1324 | 138 | return SFPARSE_ERR_PARSE; |
1325 | 16.4k | } |
1326 | | |
1327 | 16.2k | break; |
1328 | 16.2k | default: |
1329 | 0 | assert(0); |
1330 | 0 | abort(); |
1331 | 22.9k | } |
1332 | | |
1333 | 22.3k | if (*sfp->pos == ')') { |
1334 | 4.76k | ++sfp->pos; |
1335 | | |
1336 | 4.76k | parser_unset_inner_list_state(sfp); |
1337 | 4.76k | parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS); |
1338 | | |
1339 | 4.76k | return SFPARSE_ERR_EOF; |
1340 | 4.76k | } |
1341 | | |
1342 | 17.5k | rv = parser_bare_item(sfp, dest); |
1343 | 17.5k | if (rv != 0) { |
1344 | 787 | return rv; |
1345 | 787 | } |
1346 | | |
1347 | 16.7k | parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS); |
1348 | | |
1349 | 16.7k | return 0; |
1350 | 17.5k | } |
1351 | | |
1352 | 6.20k | static int parser_skip_inner_list(sfparse_parser *sfp) { |
1353 | 6.20k | int rv; |
1354 | | |
1355 | 22.9k | for (;;) { |
1356 | 22.9k | rv = sfparse_parser_inner_list(sfp, NULL); |
1357 | 22.9k | switch (rv) { |
1358 | 16.7k | case 0: |
1359 | 16.7k | break; |
1360 | 4.76k | case SFPARSE_ERR_EOF: |
1361 | 4.76k | return 0; |
1362 | 1.43k | case SFPARSE_ERR_PARSE: |
1363 | 1.43k | return rv; |
1364 | 0 | default: |
1365 | 0 | assert(0); |
1366 | 0 | abort(); |
1367 | 22.9k | } |
1368 | 22.9k | } |
1369 | 6.20k | } |
1370 | | |
1371 | 59.5k | static int parser_next_key_or_item(sfparse_parser *sfp) { |
1372 | 59.5k | parser_discard_ows(sfp); |
1373 | | |
1374 | 59.5k | if (parser_eof(sfp)) { |
1375 | 49.5k | return SFPARSE_ERR_EOF; |
1376 | 49.5k | } |
1377 | | |
1378 | 10.0k | if (*sfp->pos != ',') { |
1379 | 282 | return SFPARSE_ERR_PARSE; |
1380 | 282 | } |
1381 | | |
1382 | 9.80k | ++sfp->pos; |
1383 | | |
1384 | 9.80k | parser_discard_ows(sfp); |
1385 | 9.80k | if (parser_eof(sfp)) { |
1386 | 133 | return SFPARSE_ERR_PARSE; |
1387 | 133 | } |
1388 | | |
1389 | 9.66k | return 0; |
1390 | 9.80k | } |
1391 | | |
1392 | 84.6k | static int parser_dict_value(sfparse_parser *sfp, sfparse_value *dest) { |
1393 | 84.6k | int rv; |
1394 | | |
1395 | 84.6k | if (parser_eof(sfp) || *(sfp->pos) != '=') { |
1396 | | /* Boolean true */ |
1397 | 14.9k | if (dest) { |
1398 | 14.9k | dest->type = SFPARSE_TYPE_BOOLEAN; |
1399 | 14.9k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
1400 | 14.9k | dest->boolean = 1; |
1401 | 14.9k | } |
1402 | | |
1403 | 14.9k | sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS; |
1404 | | |
1405 | 14.9k | return 0; |
1406 | 14.9k | } |
1407 | | |
1408 | 69.6k | ++sfp->pos; |
1409 | | |
1410 | 69.6k | if (parser_eof(sfp)) { |
1411 | 139 | return SFPARSE_ERR_PARSE; |
1412 | 139 | } |
1413 | | |
1414 | 69.5k | if (*sfp->pos == '(') { |
1415 | 6.20k | if (dest) { |
1416 | 6.20k | dest->type = SFPARSE_TYPE_INNER_LIST; |
1417 | 6.20k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
1418 | 6.20k | } |
1419 | | |
1420 | 6.20k | ++sfp->pos; |
1421 | | |
1422 | 6.20k | sfp->state = SFPARSE_STATE_DICT_INNER_LIST_BEFORE; |
1423 | | |
1424 | 6.20k | return 0; |
1425 | 6.20k | } |
1426 | | |
1427 | 63.3k | rv = parser_bare_item(sfp, dest); |
1428 | 63.3k | if (rv != 0) { |
1429 | 22.4k | return rv; |
1430 | 22.4k | } |
1431 | | |
1432 | 40.9k | sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS; |
1433 | | |
1434 | 40.9k | return 0; |
1435 | 63.3k | } |
1436 | | |
1437 | | int sfparse_parser_dict(sfparse_parser *sfp, sfparse_vec *dest_key, |
1438 | 137k | sfparse_value *dest_value) { |
1439 | 137k | int rv; |
1440 | | |
1441 | 137k | switch (sfp->state) { |
1442 | 6.20k | case SFPARSE_STATE_DICT_INNER_LIST_BEFORE: |
1443 | 6.20k | rv = parser_skip_inner_list(sfp); |
1444 | 6.20k | if (rv != 0) { |
1445 | 1.43k | return rv; |
1446 | 1.43k | } |
1447 | | |
1448 | | /* fall through */ |
1449 | 59.9k | case SFPARSE_STATE_DICT_BEFORE_PARAMS: |
1450 | 59.9k | rv = parser_skip_params(sfp); |
1451 | 59.9k | if (rv != 0) { |
1452 | 379 | return rv; |
1453 | 379 | } |
1454 | | |
1455 | | /* fall through */ |
1456 | 59.5k | case SFPARSE_STATE_DICT_AFTER: |
1457 | 59.5k | rv = parser_next_key_or_item(sfp); |
1458 | 59.5k | if (rv != 0) { |
1459 | 49.9k | return rv; |
1460 | 49.9k | } |
1461 | | |
1462 | 9.66k | break; |
1463 | 75.6k | case SFPARSE_STATE_INITIAL: |
1464 | 75.6k | parser_discard_sp(sfp); |
1465 | | |
1466 | 75.6k | if (parser_eof(sfp)) { |
1467 | 532 | return SFPARSE_ERR_EOF; |
1468 | 532 | } |
1469 | | |
1470 | 75.1k | break; |
1471 | 75.1k | default: |
1472 | 0 | assert(0); |
1473 | 0 | abort(); |
1474 | 137k | } |
1475 | | |
1476 | 84.8k | rv = parser_key(sfp, dest_key); |
1477 | 84.8k | if (rv != 0) { |
1478 | 157 | return rv; |
1479 | 157 | } |
1480 | | |
1481 | 84.6k | return parser_dict_value(sfp, dest_value); |
1482 | 84.8k | } |
1483 | | |
1484 | 0 | int sfparse_parser_list(sfparse_parser *sfp, sfparse_value *dest) { |
1485 | 0 | int rv; |
1486 | |
|
1487 | 0 | switch (sfp->state) { |
1488 | 0 | case SFPARSE_STATE_LIST_INNER_LIST_BEFORE: |
1489 | 0 | rv = parser_skip_inner_list(sfp); |
1490 | 0 | if (rv != 0) { |
1491 | 0 | return rv; |
1492 | 0 | } |
1493 | | |
1494 | | /* fall through */ |
1495 | 0 | case SFPARSE_STATE_LIST_BEFORE_PARAMS: |
1496 | 0 | rv = parser_skip_params(sfp); |
1497 | 0 | if (rv != 0) { |
1498 | 0 | return rv; |
1499 | 0 | } |
1500 | | |
1501 | | /* fall through */ |
1502 | 0 | case SFPARSE_STATE_LIST_AFTER: |
1503 | 0 | rv = parser_next_key_or_item(sfp); |
1504 | 0 | if (rv != 0) { |
1505 | 0 | return rv; |
1506 | 0 | } |
1507 | | |
1508 | 0 | break; |
1509 | 0 | case SFPARSE_STATE_INITIAL: |
1510 | 0 | parser_discard_sp(sfp); |
1511 | |
|
1512 | 0 | if (parser_eof(sfp)) { |
1513 | 0 | return SFPARSE_ERR_EOF; |
1514 | 0 | } |
1515 | | |
1516 | 0 | break; |
1517 | 0 | default: |
1518 | 0 | assert(0); |
1519 | 0 | abort(); |
1520 | 0 | } |
1521 | | |
1522 | 0 | if (*sfp->pos == '(') { |
1523 | 0 | if (dest) { |
1524 | 0 | dest->type = SFPARSE_TYPE_INNER_LIST; |
1525 | 0 | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
1526 | 0 | } |
1527 | |
|
1528 | 0 | ++sfp->pos; |
1529 | |
|
1530 | 0 | sfp->state = SFPARSE_STATE_LIST_INNER_LIST_BEFORE; |
1531 | |
|
1532 | 0 | return 0; |
1533 | 0 | } |
1534 | | |
1535 | 0 | rv = parser_bare_item(sfp, dest); |
1536 | 0 | if (rv != 0) { |
1537 | 0 | return rv; |
1538 | 0 | } |
1539 | | |
1540 | 0 | sfp->state = SFPARSE_STATE_LIST_BEFORE_PARAMS; |
1541 | |
|
1542 | 0 | return 0; |
1543 | 0 | } |
1544 | | |
1545 | 0 | int sfparse_parser_item(sfparse_parser *sfp, sfparse_value *dest) { |
1546 | 0 | int rv; |
1547 | |
|
1548 | 0 | switch (sfp->state) { |
1549 | 0 | case SFPARSE_STATE_INITIAL: |
1550 | 0 | parser_discard_sp(sfp); |
1551 | |
|
1552 | 0 | if (parser_eof(sfp)) { |
1553 | 0 | return SFPARSE_ERR_PARSE; |
1554 | 0 | } |
1555 | | |
1556 | 0 | break; |
1557 | 0 | case SFPARSE_STATE_ITEM_INNER_LIST_BEFORE: |
1558 | 0 | rv = parser_skip_inner_list(sfp); |
1559 | 0 | if (rv != 0) { |
1560 | 0 | return rv; |
1561 | 0 | } |
1562 | | |
1563 | | /* fall through */ |
1564 | 0 | case SFPARSE_STATE_ITEM_BEFORE_PARAMS: |
1565 | 0 | rv = parser_skip_params(sfp); |
1566 | 0 | if (rv != 0) { |
1567 | 0 | return rv; |
1568 | 0 | } |
1569 | | |
1570 | | /* fall through */ |
1571 | 0 | case SFPARSE_STATE_ITEM_AFTER: |
1572 | 0 | parser_discard_sp(sfp); |
1573 | |
|
1574 | 0 | if (!parser_eof(sfp)) { |
1575 | 0 | return SFPARSE_ERR_PARSE; |
1576 | 0 | } |
1577 | | |
1578 | 0 | return SFPARSE_ERR_EOF; |
1579 | 0 | default: |
1580 | 0 | assert(0); |
1581 | 0 | abort(); |
1582 | 0 | } |
1583 | | |
1584 | 0 | if (*sfp->pos == '(') { |
1585 | 0 | if (dest) { |
1586 | 0 | dest->type = SFPARSE_TYPE_INNER_LIST; |
1587 | 0 | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
1588 | 0 | } |
1589 | |
|
1590 | 0 | ++sfp->pos; |
1591 | |
|
1592 | 0 | sfp->state = SFPARSE_STATE_ITEM_INNER_LIST_BEFORE; |
1593 | |
|
1594 | 0 | return 0; |
1595 | 0 | } |
1596 | | |
1597 | 0 | rv = parser_bare_item(sfp, dest); |
1598 | 0 | if (rv != 0) { |
1599 | 0 | return rv; |
1600 | 0 | } |
1601 | | |
1602 | 0 | sfp->state = SFPARSE_STATE_ITEM_BEFORE_PARAMS; |
1603 | |
|
1604 | 0 | return 0; |
1605 | 0 | } |
1606 | | |
1607 | | void sfparse_parser_init(sfparse_parser *sfp, const uint8_t *data, |
1608 | 75.6k | size_t datalen) { |
1609 | 75.6k | if (datalen == 0) { |
1610 | 532 | sfp->pos = sfp->end = NULL; |
1611 | 75.1k | } else { |
1612 | 75.1k | sfp->pos = data; |
1613 | 75.1k | sfp->end = data + datalen; |
1614 | 75.1k | } |
1615 | | |
1616 | 75.6k | sfp->state = SFPARSE_STATE_INITIAL; |
1617 | 75.6k | } |
1618 | | |
1619 | 0 | void sfparse_unescape(sfparse_vec *dest, const sfparse_vec *src) { |
1620 | 0 | const uint8_t *p, *q; |
1621 | 0 | uint8_t *o; |
1622 | 0 | size_t len, slen; |
1623 | |
|
1624 | 0 | if (src->len == 0) { |
1625 | 0 | dest->len = 0; |
1626 | |
|
1627 | 0 | return; |
1628 | 0 | } |
1629 | | |
1630 | 0 | o = dest->base; |
1631 | 0 | p = src->base; |
1632 | 0 | len = src->len; |
1633 | |
|
1634 | 0 | for (;;) { |
1635 | 0 | q = memchr(p, '\\', len); |
1636 | 0 | if (q == NULL) { |
1637 | 0 | memcpy(o, p, len); |
1638 | 0 | o += len; |
1639 | |
|
1640 | 0 | dest->len = (size_t)(o - dest->base); |
1641 | |
|
1642 | 0 | return; |
1643 | 0 | } |
1644 | | |
1645 | 0 | slen = (size_t)(q - p); |
1646 | 0 | memcpy(o, p, slen); |
1647 | 0 | o += slen; |
1648 | |
|
1649 | 0 | p = q + 1; |
1650 | 0 | *o++ = *p++; |
1651 | 0 | len -= slen + 2; |
1652 | 0 | } |
1653 | 0 | } |
1654 | | |
1655 | 0 | void sfparse_base64decode(sfparse_vec *dest, const sfparse_vec *src) { |
1656 | 0 | static const int index_tbl[] = { |
1657 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1658 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1659 | 0 | -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, |
1660 | 0 | 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, |
1661 | 0 | 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, |
1662 | 0 | -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, |
1663 | 0 | 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1664 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1665 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1666 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1667 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1668 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1669 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1670 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1}; |
1671 | 0 | uint8_t *o; |
1672 | 0 | const uint8_t *p, *end; |
1673 | 0 | uint32_t n; |
1674 | 0 | size_t i, left; |
1675 | 0 | int idx; |
1676 | |
|
1677 | 0 | if (src->len == 0) { |
1678 | 0 | dest->len = 0; |
1679 | |
|
1680 | 0 | return; |
1681 | 0 | } |
1682 | | |
1683 | 0 | o = dest->base; |
1684 | 0 | p = src->base; |
1685 | 0 | left = src->len & 0x3; |
1686 | 0 | if (left == 0 && src->base[src->len - 1] == '=') { |
1687 | 0 | left = 4; |
1688 | 0 | } |
1689 | 0 | end = src->base + src->len - left; |
1690 | |
|
1691 | 0 | for (; p != end;) { |
1692 | 0 | n = 0; |
1693 | |
|
1694 | 0 | for (i = 1; i <= 4; ++i, ++p) { |
1695 | 0 | idx = index_tbl[*p]; |
1696 | |
|
1697 | 0 | assert(idx != -1); |
1698 | | |
1699 | 0 | n += (uint32_t)(idx << (24 - i * 6)); |
1700 | 0 | } |
1701 | | |
1702 | 0 | *o++ = (uint8_t)(n >> 16); |
1703 | 0 | *o++ = (n >> 8) & 0xffu; |
1704 | 0 | *o++ = n & 0xffu; |
1705 | 0 | } |
1706 | | |
1707 | 0 | switch (left) { |
1708 | 0 | case 0: |
1709 | 0 | goto fin; |
1710 | 0 | case 1: |
1711 | 0 | assert(0); |
1712 | 0 | abort(); |
1713 | 0 | case 3: |
1714 | 0 | if (src->base[src->len - 1] == '=') { |
1715 | 0 | left = 2; |
1716 | 0 | } |
1717 | |
|
1718 | 0 | break; |
1719 | 0 | case 4: |
1720 | 0 | assert('=' == src->base[src->len - 1]); |
1721 | | |
1722 | 0 | if (src->base[src->len - 2] == '=') { |
1723 | 0 | left = 2; |
1724 | 0 | } else { |
1725 | 0 | left = 3; |
1726 | 0 | } |
1727 | |
|
1728 | 0 | break; |
1729 | 0 | } |
1730 | | |
1731 | 0 | switch (left) { |
1732 | 0 | case 2: |
1733 | 0 | *o = (uint8_t)(index_tbl[*p++] << 2); |
1734 | 0 | *o++ |= (uint8_t)(index_tbl[*p++] >> 4); |
1735 | |
|
1736 | 0 | break; |
1737 | 0 | case 3: |
1738 | 0 | n = (uint32_t)(index_tbl[*p++] << 10); |
1739 | 0 | n += (uint32_t)(index_tbl[*p++] << 4); |
1740 | 0 | n += (uint32_t)(index_tbl[*p++] >> 2); |
1741 | 0 | *o++ = (n >> 8) & 0xffu; |
1742 | 0 | *o++ = n & 0xffu; |
1743 | |
|
1744 | 0 | break; |
1745 | 0 | } |
1746 | | |
1747 | 0 | fin: |
1748 | 0 | dest->len = (size_t)(o - dest->base); |
1749 | 0 | } |
1750 | | |
1751 | 0 | void sfparse_pctdecode(sfparse_vec *dest, const sfparse_vec *src) { |
1752 | 0 | const uint8_t *p, *q; |
1753 | 0 | uint8_t *o; |
1754 | 0 | size_t len, slen; |
1755 | |
|
1756 | 0 | if (src->len == 0) { |
1757 | 0 | dest->len = 0; |
1758 | |
|
1759 | 0 | return; |
1760 | 0 | } |
1761 | | |
1762 | 0 | o = dest->base; |
1763 | 0 | p = src->base; |
1764 | 0 | len = src->len; |
1765 | |
|
1766 | 0 | for (;;) { |
1767 | 0 | q = memchr(p, '%', len); |
1768 | 0 | if (q == NULL) { |
1769 | 0 | memcpy(o, p, len); |
1770 | 0 | o += len; |
1771 | |
|
1772 | 0 | dest->len = (size_t)(o - dest->base); |
1773 | |
|
1774 | 0 | return; |
1775 | 0 | } |
1776 | | |
1777 | 0 | slen = (size_t)(q - p); |
1778 | 0 | memcpy(o, p, slen); |
1779 | 0 | o += slen; |
1780 | |
|
1781 | 0 | p = q + 1; |
1782 | |
|
1783 | 0 | pctdecode(o++, &p); |
1784 | |
|
1785 | 0 | len -= slen + 3; |
1786 | 0 | } |
1787 | 0 | } |