/src/nghttp2/lib/sfparse.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * sfparse |
3 | | * |
4 | | * Copyright (c) 2023 sfparse contributors |
5 | | * Copyright (c) 2019 nghttp3 contributors |
6 | | * Copyright (c) 2015 nghttp2 contributors |
7 | | * |
8 | | * Permission is hereby granted, free of charge, to any person obtaining |
9 | | * a copy of this software and associated documentation files (the |
10 | | * "Software"), to deal in the Software without restriction, including |
11 | | * without limitation the rights to use, copy, modify, merge, publish, |
12 | | * distribute, sublicense, and/or sell copies of the Software, and to |
13 | | * permit persons to whom the Software is furnished to do so, subject to |
14 | | * the following conditions: |
15 | | * |
16 | | * The above copyright notice and this permission notice shall be |
17 | | * included in all copies or substantial portions of the Software. |
18 | | * |
19 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
20 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
21 | | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
22 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
23 | | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
24 | | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
25 | | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
26 | | */ |
27 | | #include "sfparse.h" |
28 | | |
29 | | #include <string.h> |
30 | | #include <assert.h> |
31 | | #include <stdlib.h> |
32 | | |
33 | | #ifdef __AVX2__ |
34 | | # include <immintrin.h> |
35 | | #endif /* __AVX2__ */ |
36 | | |
37 | 172k | #define SFPARSE_STATE_DICT 0x08u |
38 | 0 | #define SFPARSE_STATE_LIST 0x10u |
39 | 0 | #define SFPARSE_STATE_ITEM 0x18u |
40 | | |
41 | 16.1k | #define SFPARSE_STATE_INNER_LIST 0x04u |
42 | | |
43 | 16.2k | #define SFPARSE_STATE_BEFORE 0x00u |
44 | 218k | #define SFPARSE_STATE_BEFORE_PARAMS 0x01u |
45 | 89.5k | #define SFPARSE_STATE_PARAMS 0x02u |
46 | 144k | #define SFPARSE_STATE_AFTER 0x03u |
47 | | |
48 | 279k | #define SFPARSE_STATE_OP_MASK 0x03u |
49 | | |
50 | | #define SFPARSE_SET_STATE_AFTER(NAME) \ |
51 | 55.3k | (SFPARSE_STATE_##NAME | SFPARSE_STATE_AFTER) |
52 | | #define SFPARSE_SET_STATE_BEFORE_PARAMS(NAME) \ |
53 | 106k | (SFPARSE_STATE_##NAME | SFPARSE_STATE_BEFORE_PARAMS) |
54 | | #define SFPARSE_SET_STATE_INNER_LIST_BEFORE(NAME) \ |
55 | 10.8k | (SFPARSE_STATE_##NAME | SFPARSE_STATE_INNER_LIST | SFPARSE_STATE_BEFORE) |
56 | | |
57 | 55.3k | #define SFPARSE_STATE_DICT_AFTER SFPARSE_SET_STATE_AFTER(DICT) |
58 | 106k | #define SFPARSE_STATE_DICT_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(DICT) |
59 | | #define SFPARSE_STATE_DICT_INNER_LIST_BEFORE \ |
60 | 10.8k | SFPARSE_SET_STATE_INNER_LIST_BEFORE(DICT) |
61 | | |
62 | 0 | #define SFPARSE_STATE_LIST_AFTER SFPARSE_SET_STATE_AFTER(LIST) |
63 | 0 | #define SFPARSE_STATE_LIST_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(LIST) |
64 | | #define SFPARSE_STATE_LIST_INNER_LIST_BEFORE \ |
65 | 0 | SFPARSE_SET_STATE_INNER_LIST_BEFORE(LIST) |
66 | | |
67 | 0 | #define SFPARSE_STATE_ITEM_AFTER SFPARSE_SET_STATE_AFTER(ITEM) |
68 | 0 | #define SFPARSE_STATE_ITEM_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(ITEM) |
69 | | #define SFPARSE_STATE_ITEM_INNER_LIST_BEFORE \ |
70 | 0 | SFPARSE_SET_STATE_INNER_LIST_BEFORE(ITEM) |
71 | | |
72 | 102k | #define SFPARSE_STATE_INITIAL 0x00u |
73 | | |
74 | | #define DIGIT_CASES \ |
75 | 48.3k | case '0': \ |
76 | 61.5k | case '1': \ |
77 | 72.4k | case '2': \ |
78 | 79.7k | case '3': \ |
79 | 86.5k | case '4': \ |
80 | 92.5k | case '5': \ |
81 | 99.7k | case '6': \ |
82 | 106k | case '7': \ |
83 | 114k | case '8': \ |
84 | 121k | case '9' |
85 | | |
86 | | #define LCALPHA_CASES \ |
87 | 170k | case 'a': \ |
88 | 175k | case 'b': \ |
89 | 185k | case 'c': \ |
90 | 191k | case 'd': \ |
91 | 201k | case 'e': \ |
92 | 208k | case 'f': \ |
93 | 215k | case 'g': \ |
94 | 223k | case 'h': \ |
95 | 234k | case 'i': \ |
96 | 240k | case 'j': \ |
97 | 246k | case 'k': \ |
98 | 256k | case 'l': \ |
99 | 285k | case 'm': \ |
100 | 293k | case 'n': \ |
101 | 306k | case 'o': \ |
102 | 313k | case 'p': \ |
103 | 320k | case 'q': \ |
104 | 329k | case 'r': \ |
105 | 338k | case 's': \ |
106 | 351k | case 't': \ |
107 | 359k | case 'u': \ |
108 | 366k | case 'v': \ |
109 | 373k | case 'w': \ |
110 | 382k | case 'x': \ |
111 | 395k | case 'y': \ |
112 | 402k | case 'z' |
113 | | |
114 | | #define UCALPHA_CASES \ |
115 | 57.5k | case 'A': \ |
116 | 59.9k | case 'B': \ |
117 | 62.2k | case 'C': \ |
118 | 64.9k | case 'D': \ |
119 | 67.3k | case 'E': \ |
120 | 69.6k | case 'F': \ |
121 | 71.7k | case 'G': \ |
122 | 74.4k | case 'H': \ |
123 | 76.9k | case 'I': \ |
124 | 79.6k | case 'J': \ |
125 | 81.7k | case 'K': \ |
126 | 84.4k | case 'L': \ |
127 | 86.9k | case 'M': \ |
128 | 89.0k | case 'N': \ |
129 | 91.5k | case 'O': \ |
130 | 94.9k | case 'P': \ |
131 | 97.1k | case 'Q': \ |
132 | 99.8k | case 'R': \ |
133 | 102k | case 'S': \ |
134 | 105k | case 'T': \ |
135 | 107k | case 'U': \ |
136 | 109k | case 'V': \ |
137 | 112k | case 'W': \ |
138 | 115k | case 'X': \ |
139 | 117k | case 'Y': \ |
140 | 119k | case 'Z' |
141 | | |
142 | | #define ALPHA_CASES \ |
143 | 39.5k | UCALPHA_CASES: \ |
144 | 40.9k | LCALPHA_CASES |
145 | | |
146 | | #define TOKEN_CASES \ |
147 | 777 | case '!': \ |
148 | 1.31k | case '#': \ |
149 | 2.03k | case '$': \ |
150 | 3.10k | case '%': \ |
151 | 3.80k | case '&': \ |
152 | 4.49k | case '\'': \ |
153 | 5.73k | case '*': \ |
154 | 6.55k | case '+': \ |
155 | 7.34k | case '-': \ |
156 | 8.87k | case '.': \ |
157 | 9.94k | case '/': \ |
158 | 22.7k | DIGIT_CASES: \ |
159 | 24.2k | case ':': \ |
160 | 46.4k | UCALPHA_CASES: \ |
161 | 46.9k | case '^': \ |
162 | 47.5k | case '_': \ |
163 | 48.2k | case '`': \ |
164 | 77.4k | LCALPHA_CASES: \ |
165 | 78.0k | case '|': \ |
166 | 78.5k | case '~' |
167 | | |
168 | | #define LCHEXALPHA_CASES \ |
169 | 3.24k | case 'a': \ |
170 | 5.51k | case 'b': \ |
171 | 7.70k | case 'c': \ |
172 | 10.0k | case 'd': \ |
173 | 11.5k | case 'e': \ |
174 | 12.6k | case 'f' |
175 | | |
176 | | #define X00_1F_CASES \ |
177 | 0 | case 0x00: \ |
178 | 0 | case 0x01: \ |
179 | 0 | case 0x02: \ |
180 | 0 | case 0x03: \ |
181 | 0 | case 0x04: \ |
182 | 0 | case 0x05: \ |
183 | 0 | case 0x06: \ |
184 | 0 | case 0x07: \ |
185 | 0 | case 0x08: \ |
186 | 11 | case 0x09: \ |
187 | 11 | case 0x0a: \ |
188 | 11 | case 0x0b: \ |
189 | 11 | case 0x0c: \ |
190 | 11 | case 0x0d: \ |
191 | 11 | case 0x0e: \ |
192 | 11 | case 0x0f: \ |
193 | 11 | case 0x10: \ |
194 | 11 | case 0x11: \ |
195 | 11 | case 0x12: \ |
196 | 11 | case 0x13: \ |
197 | 11 | case 0x14: \ |
198 | 11 | case 0x15: \ |
199 | 11 | case 0x16: \ |
200 | 11 | case 0x17: \ |
201 | 11 | case 0x18: \ |
202 | 11 | case 0x19: \ |
203 | 11 | case 0x1a: \ |
204 | 11 | case 0x1b: \ |
205 | 11 | case 0x1c: \ |
206 | 11 | case 0x1d: \ |
207 | 11 | case 0x1e: \ |
208 | 11 | case 0x1f |
209 | | |
210 | | #define X20_21_CASES \ |
211 | 722 | case ' ': \ |
212 | 1.27k | case '!' |
213 | | |
214 | | #define X23_5B_CASES \ |
215 | 2.03k | case '#': \ |
216 | 2.55k | case '$': \ |
217 | 3.85k | case '%': \ |
218 | 4.35k | case '&': \ |
219 | 4.97k | case '\'': \ |
220 | 5.54k | case '(': \ |
221 | 6.07k | case ')': \ |
222 | 6.78k | case '*': \ |
223 | 7.36k | case '+': \ |
224 | 7.99k | case ',': \ |
225 | 8.56k | case '-': \ |
226 | 9.11k | case '.': \ |
227 | 9.84k | case '/': \ |
228 | 16.3k | DIGIT_CASES: \ |
229 | 16.9k | case ':': \ |
230 | 17.5k | case ';': \ |
231 | 18.1k | case '<': \ |
232 | 18.6k | case '=': \ |
233 | 19.2k | case '>': \ |
234 | 19.8k | case '?': \ |
235 | 20.3k | case '@': \ |
236 | 33.6k | UCALPHA_CASES: \ |
237 | 34.1k | case '[' |
238 | | |
239 | | #define X5D_7E_CASES \ |
240 | 34.7k | case ']': \ |
241 | 35.3k | case '^': \ |
242 | 35.9k | case '_': \ |
243 | 36.3k | case '`': \ |
244 | 52.1k | LCALPHA_CASES: \ |
245 | 52.6k | case '{': \ |
246 | 53.1k | case '|': \ |
247 | 53.5k | case '}': \ |
248 | 54.1k | case '~' |
249 | | |
250 | | #define X7F_FF_CASES \ |
251 | 11 | case 0x7f: \ |
252 | 27 | case 0x80: \ |
253 | 43 | case 0x81: \ |
254 | 59 | case 0x82: \ |
255 | 79 | case 0x83: \ |
256 | 95 | case 0x84: \ |
257 | 111 | case 0x85: \ |
258 | 128 | case 0x86: \ |
259 | 144 | case 0x87: \ |
260 | 160 | case 0x88: \ |
261 | 176 | case 0x89: \ |
262 | 196 | case 0x8a: \ |
263 | 212 | case 0x8b: \ |
264 | 228 | case 0x8c: \ |
265 | 244 | case 0x8d: \ |
266 | 260 | case 0x8e: \ |
267 | 276 | case 0x8f: \ |
268 | 292 | case 0x90: \ |
269 | 308 | case 0x91: \ |
270 | 324 | case 0x92: \ |
271 | 340 | case 0x93: \ |
272 | 356 | case 0x94: \ |
273 | 372 | case 0x95: \ |
274 | 388 | case 0x96: \ |
275 | 404 | case 0x97: \ |
276 | 420 | case 0x98: \ |
277 | 436 | case 0x99: \ |
278 | 452 | case 0x9a: \ |
279 | 468 | case 0x9b: \ |
280 | 484 | case 0x9c: \ |
281 | 500 | case 0x9d: \ |
282 | 516 | case 0x9e: \ |
283 | 532 | case 0x9f: \ |
284 | 548 | case 0xa0: \ |
285 | 568 | case 0xa1: \ |
286 | 584 | case 0xa2: \ |
287 | 600 | case 0xa3: \ |
288 | 616 | case 0xa4: \ |
289 | 632 | case 0xa5: \ |
290 | 648 | case 0xa6: \ |
291 | 664 | case 0xa7: \ |
292 | 680 | case 0xa8: \ |
293 | 696 | case 0xa9: \ |
294 | 712 | case 0xaa: \ |
295 | 728 | case 0xab: \ |
296 | 748 | case 0xac: \ |
297 | 764 | case 0xad: \ |
298 | 780 | case 0xae: \ |
299 | 796 | case 0xaf: \ |
300 | 812 | case 0xb0: \ |
301 | 828 | case 0xb1: \ |
302 | 844 | case 0xb2: \ |
303 | 860 | case 0xb3: \ |
304 | 876 | case 0xb4: \ |
305 | 892 | case 0xb5: \ |
306 | 908 | case 0xb6: \ |
307 | 924 | case 0xb7: \ |
308 | 940 | case 0xb8: \ |
309 | 956 | case 0xb9: \ |
310 | 972 | case 0xba: \ |
311 | 989 | case 0xbb: \ |
312 | 1.00k | case 0xbc: \ |
313 | 1.02k | case 0xbd: \ |
314 | 1.04k | case 0xbe: \ |
315 | 1.05k | case 0xbf: \ |
316 | 1.07k | case 0xc0: \ |
317 | 1.09k | case 0xc1: \ |
318 | 1.11k | case 0xc2: \ |
319 | 1.12k | case 0xc3: \ |
320 | 1.14k | case 0xc4: \ |
321 | 1.16k | case 0xc5: \ |
322 | 1.17k | case 0xc6: \ |
323 | 1.19k | case 0xc7: \ |
324 | 1.21k | case 0xc8: \ |
325 | 1.22k | case 0xc9: \ |
326 | 1.24k | case 0xca: \ |
327 | 1.26k | case 0xcb: \ |
328 | 1.27k | case 0xcc: \ |
329 | 1.29k | case 0xcd: \ |
330 | 1.30k | case 0xce: \ |
331 | 1.32k | case 0xcf: \ |
332 | 1.34k | case 0xd0: \ |
333 | 1.35k | case 0xd1: \ |
334 | 1.37k | case 0xd2: \ |
335 | 1.39k | case 0xd3: \ |
336 | 1.40k | case 0xd4: \ |
337 | 1.42k | case 0xd5: \ |
338 | 1.44k | case 0xd6: \ |
339 | 1.45k | case 0xd7: \ |
340 | 1.47k | case 0xd8: \ |
341 | 1.49k | case 0xd9: \ |
342 | 1.50k | case 0xda: \ |
343 | 1.52k | case 0xdb: \ |
344 | 1.53k | case 0xdc: \ |
345 | 1.55k | case 0xdd: \ |
346 | 1.57k | case 0xde: \ |
347 | 1.59k | case 0xdf: \ |
348 | 1.60k | case 0xe0: \ |
349 | 1.62k | case 0xe1: \ |
350 | 1.63k | case 0xe2: \ |
351 | 1.65k | case 0xe3: \ |
352 | 1.67k | case 0xe4: \ |
353 | 1.69k | case 0xe5: \ |
354 | 1.70k | case 0xe6: \ |
355 | 1.72k | case 0xe7: \ |
356 | 1.73k | case 0xe8: \ |
357 | 1.75k | case 0xe9: \ |
358 | 1.77k | case 0xea: \ |
359 | 1.79k | case 0xeb: \ |
360 | 1.80k | case 0xec: \ |
361 | 1.82k | case 0xed: \ |
362 | 1.83k | case 0xee: \ |
363 | 1.85k | case 0xef: \ |
364 | 1.87k | case 0xf0: \ |
365 | 1.88k | case 0xf1: \ |
366 | 1.90k | case 0xf2: \ |
367 | 1.91k | case 0xf3: \ |
368 | 1.93k | case 0xf4: \ |
369 | 1.95k | case 0xf5: \ |
370 | 1.96k | case 0xf6: \ |
371 | 1.98k | case 0xf7: \ |
372 | 1.99k | case 0xf8: \ |
373 | 2.01k | case 0xf9: \ |
374 | 2.03k | case 0xfa: \ |
375 | 2.05k | case 0xfb: \ |
376 | 2.06k | case 0xfc: \ |
377 | 2.08k | case 0xfd: \ |
378 | 2.09k | case 0xfe: \ |
379 | 2.11k | case 0xff |
380 | | |
381 | 19.8k | static int is_ws(uint8_t c) { |
382 | 19.8k | switch (c) { |
383 | 740 | case ' ': |
384 | 1.45k | case '\t': |
385 | 1.45k | return 1; |
386 | 18.3k | default: |
387 | 18.3k | return 0; |
388 | 19.8k | } |
389 | 19.8k | } |
390 | | |
391 | | #ifdef __AVX2__ |
392 | | # ifdef _MSC_VER |
393 | | # include <intrin.h> |
394 | | |
395 | | static int ctz(unsigned int v) { |
396 | | unsigned long n; |
397 | | |
398 | | /* Assume that v is not 0. */ |
399 | | _BitScanForward(&n, v); |
400 | | |
401 | | return (int)n; |
402 | | } |
403 | | # else /* !_MSC_VER */ |
404 | | # define ctz __builtin_ctz |
405 | | # endif /* !_MSC_VER */ |
406 | | #endif /* __AVX2__ */ |
407 | | |
408 | 1.09M | static int parser_eof(sfparse_parser *sfp) { return sfp->pos == sfp->end; } |
409 | | |
410 | 64.4k | static void parser_discard_ows(sfparse_parser *sfp) { |
411 | 65.9k | for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos) |
412 | 1.45k | ; |
413 | 64.4k | } |
414 | | |
415 | 85.9k | static void parser_discard_sp(sfparse_parser *sfp) { |
416 | 101k | for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos) |
417 | 15.1k | ; |
418 | 85.9k | } |
419 | | |
420 | 167k | static void parser_set_op_state(sfparse_parser *sfp, uint32_t op) { |
421 | 167k | sfp->state &= ~SFPARSE_STATE_OP_MASK; |
422 | 167k | sfp->state |= op; |
423 | 167k | } |
424 | | |
425 | 5.25k | static void parser_unset_inner_list_state(sfparse_parser *sfp) { |
426 | 5.25k | sfp->state &= ~SFPARSE_STATE_INNER_LIST; |
427 | 5.25k | } |
428 | | |
429 | | #ifdef __AVX2__ |
430 | | static const uint8_t *find_char_key(const uint8_t *first, const uint8_t *last) { |
431 | | const __m256i us = _mm256_set1_epi8('_'); |
432 | | const __m256i ds = _mm256_set1_epi8('-'); |
433 | | const __m256i dot = _mm256_set1_epi8('.'); |
434 | | const __m256i ast = _mm256_set1_epi8('*'); |
435 | | const __m256i r0l = _mm256_set1_epi8('0' - 1); |
436 | | const __m256i r0r = _mm256_set1_epi8('9' + 1); |
437 | | const __m256i r1l = _mm256_set1_epi8('a' - 1); |
438 | | const __m256i r1r = _mm256_set1_epi8('z' + 1); |
439 | | __m256i s, x; |
440 | | uint32_t m; |
441 | | |
442 | | for (; first != last; first += 32) { |
443 | | s = _mm256_loadu_si256((void *)first); |
444 | | |
445 | | x = _mm256_cmpeq_epi8(s, us); |
446 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ds), x); |
447 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dot), x); |
448 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ast), x); |
449 | | x = _mm256_or_si256( |
450 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)), |
451 | | x); |
452 | | x = _mm256_or_si256( |
453 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)), |
454 | | x); |
455 | | |
456 | | m = ~(uint32_t)_mm256_movemask_epi8(x); |
457 | | if (m) { |
458 | | return first + ctz(m); |
459 | | } |
460 | | } |
461 | | |
462 | | return last; |
463 | | } |
464 | | #endif /* __AVX2__ */ |
465 | | |
466 | 77.1k | static int parser_key(sfparse_parser *sfp, sfparse_vec *dest) { |
467 | 77.1k | const uint8_t *base; |
468 | | #ifdef __AVX2__ |
469 | | const uint8_t *last; |
470 | | #endif /* __AVX2__ */ |
471 | | |
472 | 77.1k | switch (*sfp->pos) { |
473 | 5.76k | case '*': |
474 | 77.0k | LCALPHA_CASES: |
475 | 77.0k | break; |
476 | 114 | default: |
477 | 114 | return SFPARSE_ERR_PARSE; |
478 | 77.1k | } |
479 | | |
480 | 77.0k | base = sfp->pos++; |
481 | | |
482 | | #ifdef __AVX2__ |
483 | | if (sfp->end - sfp->pos >= 32) { |
484 | | last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); |
485 | | |
486 | | sfp->pos = find_char_key(sfp->pos, last); |
487 | | if (sfp->pos != last) { |
488 | | goto fin; |
489 | | } |
490 | | } |
491 | | #endif /* __AVX2__ */ |
492 | | |
493 | 199k | for (; !parser_eof(sfp); ++sfp->pos) { |
494 | 191k | switch (*sfp->pos) { |
495 | 1.62k | case '_': |
496 | 6.22k | case '-': |
497 | 7.63k | case '.': |
498 | 9.04k | case '*': |
499 | 190k | DIGIT_CASES: |
500 | 1.90M | LCALPHA_CASES: |
501 | 1.90M | continue; |
502 | 191k | } |
503 | | |
504 | 68.8k | break; |
505 | 191k | } |
506 | | |
507 | | #ifdef __AVX2__ |
508 | | fin: |
509 | | #endif /* __AVX2__ */ |
510 | 77.0k | if (dest) { |
511 | 59.7k | dest->base = (uint8_t *)base; |
512 | 59.7k | dest->len = (size_t)(sfp->pos - dest->base); |
513 | 59.7k | } |
514 | | |
515 | 77.0k | return 0; |
516 | 77.0k | } |
517 | | |
518 | 11.3k | static int parser_number(sfparse_parser *sfp, sfparse_value *dest) { |
519 | 11.3k | int sign = 1; |
520 | 11.3k | int64_t value = 0; |
521 | 11.3k | size_t len = 0; |
522 | 11.3k | size_t fpos = 0; |
523 | | |
524 | 11.3k | if (*sfp->pos == '-') { |
525 | 568 | ++sfp->pos; |
526 | 568 | if (parser_eof(sfp)) { |
527 | 36 | return SFPARSE_ERR_PARSE; |
528 | 36 | } |
529 | | |
530 | 532 | sign = -1; |
531 | 532 | } |
532 | | |
533 | 11.3k | assert(!parser_eof(sfp)); |
534 | | |
535 | 27.9k | for (; !parser_eof(sfp); ++sfp->pos) { |
536 | 24.8k | switch (*sfp->pos) { |
537 | 110k | DIGIT_CASES: |
538 | 110k | if (++len > 15) { |
539 | 11 | return SFPARSE_ERR_PARSE; |
540 | 11 | } |
541 | | |
542 | 16.5k | value *= 10; |
543 | 16.5k | value += *sfp->pos - '0'; |
544 | | |
545 | 16.5k | continue; |
546 | 24.8k | } |
547 | | |
548 | 8.26k | break; |
549 | 24.8k | } |
550 | | |
551 | 11.2k | if (len == 0) { |
552 | 32 | return SFPARSE_ERR_PARSE; |
553 | 32 | } |
554 | | |
555 | 11.2k | if (parser_eof(sfp) || *sfp->pos != '.') { |
556 | 8.64k | if (dest) { |
557 | 4.36k | dest->type = SFPARSE_TYPE_INTEGER; |
558 | 4.36k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
559 | 4.36k | dest->integer = value * sign; |
560 | 4.36k | } |
561 | | |
562 | 8.64k | return 0; |
563 | 8.64k | } |
564 | | |
565 | | /* decimal */ |
566 | | |
567 | 2.62k | if (len > 12) { |
568 | 11 | return SFPARSE_ERR_PARSE; |
569 | 11 | } |
570 | | |
571 | 2.61k | fpos = len; |
572 | | |
573 | 2.61k | ++sfp->pos; |
574 | | |
575 | 6.85k | for (; !parser_eof(sfp); ++sfp->pos) { |
576 | 5.65k | switch (*sfp->pos) { |
577 | 32.4k | DIGIT_CASES: |
578 | 32.4k | if (++len > 15) { |
579 | 11 | return SFPARSE_ERR_PARSE; |
580 | 11 | } |
581 | | |
582 | 4.24k | value *= 10; |
583 | 4.24k | value += *sfp->pos - '0'; |
584 | | |
585 | 4.24k | continue; |
586 | 5.65k | } |
587 | | |
588 | 1.39k | break; |
589 | 5.65k | } |
590 | | |
591 | 2.59k | if (fpos == len || len - fpos > 3) { |
592 | 55 | return SFPARSE_ERR_PARSE; |
593 | 55 | } |
594 | | |
595 | 2.54k | if (dest) { |
596 | 1.69k | dest->type = SFPARSE_TYPE_DECIMAL; |
597 | 1.69k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
598 | 1.69k | dest->decimal.numer = value * sign; |
599 | | |
600 | 1.69k | switch (len - fpos) { |
601 | 832 | case 1: |
602 | 832 | dest->decimal.denom = 10; |
603 | | |
604 | 832 | break; |
605 | 419 | case 2: |
606 | 419 | dest->decimal.denom = 100; |
607 | | |
608 | 419 | break; |
609 | 444 | case 3: |
610 | 444 | dest->decimal.denom = 1000; |
611 | | |
612 | 444 | break; |
613 | 1.69k | } |
614 | 1.69k | } |
615 | | |
616 | 2.54k | return 0; |
617 | 2.54k | } |
618 | | |
619 | 1.18k | static int parser_date(sfparse_parser *sfp, sfparse_value *dest) { |
620 | 1.18k | int rv; |
621 | 1.18k | sfparse_value val; |
622 | | |
623 | | /* The first byte has already been validated by the caller. */ |
624 | 1.18k | assert('@' == *sfp->pos); |
625 | | |
626 | 1.18k | ++sfp->pos; |
627 | | |
628 | 1.18k | if (parser_eof(sfp)) { |
629 | 31 | return SFPARSE_ERR_PARSE; |
630 | 31 | } |
631 | | |
632 | 1.15k | rv = parser_number(sfp, &val); |
633 | 1.15k | if (rv != 0) { |
634 | 30 | return rv; |
635 | 30 | } |
636 | | |
637 | 1.12k | if (val.type != SFPARSE_TYPE_INTEGER) { |
638 | 11 | return SFPARSE_ERR_PARSE; |
639 | 11 | } |
640 | | |
641 | 1.10k | if (dest) { |
642 | 466 | *dest = val; |
643 | 466 | dest->type = SFPARSE_TYPE_DATE; |
644 | 466 | } |
645 | | |
646 | 1.10k | return 0; |
647 | 1.12k | } |
648 | | |
649 | | #ifdef __AVX2__ |
650 | | static const uint8_t *find_char_string(const uint8_t *first, |
651 | | const uint8_t *last) { |
652 | | const __m256i bs = _mm256_set1_epi8('\\'); |
653 | | const __m256i dq = _mm256_set1_epi8('"'); |
654 | | const __m256i del = _mm256_set1_epi8(0x7f); |
655 | | const __m256i sp = _mm256_set1_epi8(' '); |
656 | | __m256i s, x; |
657 | | uint32_t m; |
658 | | |
659 | | for (; first != last; first += 32) { |
660 | | s = _mm256_loadu_si256((void *)first); |
661 | | |
662 | | x = _mm256_cmpgt_epi8(sp, s); |
663 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, bs), x); |
664 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dq), x); |
665 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, del), x); |
666 | | |
667 | | m = (uint32_t)_mm256_movemask_epi8(x); |
668 | | if (m) { |
669 | | return first + ctz(m); |
670 | | } |
671 | | } |
672 | | |
673 | | return last; |
674 | | } |
675 | | #endif /* __AVX2__ */ |
676 | | |
677 | 6.74k | static int parser_string(sfparse_parser *sfp, sfparse_value *dest) { |
678 | 6.74k | const uint8_t *base; |
679 | | #ifdef __AVX2__ |
680 | | const uint8_t *last; |
681 | | #endif /* __AVX2__ */ |
682 | 6.74k | uint32_t flags = SFPARSE_VALUE_FLAG_NONE; |
683 | | |
684 | | /* The first byte has already been validated by the caller. */ |
685 | 6.74k | assert('"' == *sfp->pos); |
686 | | |
687 | 6.74k | base = ++sfp->pos; |
688 | | |
689 | | #ifdef __AVX2__ |
690 | | for (; sfp->end - sfp->pos >= 32; ++sfp->pos) { |
691 | | last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); |
692 | | |
693 | | sfp->pos = find_char_string(sfp->pos, last); |
694 | | if (sfp->pos == last) { |
695 | | break; |
696 | | } |
697 | | |
698 | | switch (*sfp->pos) { |
699 | | case '\\': |
700 | | ++sfp->pos; |
701 | | if (parser_eof(sfp)) { |
702 | | return SFPARSE_ERR_PARSE; |
703 | | } |
704 | | |
705 | | switch (*sfp->pos) { |
706 | | case '"': |
707 | | case '\\': |
708 | | flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING; |
709 | | |
710 | | break; |
711 | | default: |
712 | | return SFPARSE_ERR_PARSE; |
713 | | } |
714 | | |
715 | | break; |
716 | | case '"': |
717 | | goto fin; |
718 | | default: |
719 | | return SFPARSE_ERR_PARSE; |
720 | | } |
721 | | } |
722 | | #endif /* __AVX2__ */ |
723 | | |
724 | 61.7k | for (; !parser_eof(sfp); ++sfp->pos) { |
725 | 61.0k | switch (*sfp->pos) { |
726 | 1.99k | X20_21_CASES: |
727 | 1.09M | X23_5B_CASES: |
728 | 1.09M | X5D_7E_CASES: |
729 | 54.1k | break; |
730 | 860 | case '\\': |
731 | 860 | ++sfp->pos; |
732 | 860 | if (parser_eof(sfp)) { |
733 | 17 | return SFPARSE_ERR_PARSE; |
734 | 17 | } |
735 | | |
736 | 843 | switch (*sfp->pos) { |
737 | 388 | case '"': |
738 | 827 | case '\\': |
739 | 827 | flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING; |
740 | | |
741 | 827 | break; |
742 | 16 | default: |
743 | 16 | return SFPARSE_ERR_PARSE; |
744 | 843 | } |
745 | | |
746 | 827 | break; |
747 | 5.93k | case '"': |
748 | 5.93k | goto fin; |
749 | 65 | default: |
750 | 65 | return SFPARSE_ERR_PARSE; |
751 | 61.0k | } |
752 | 61.0k | } |
753 | | |
754 | 713 | return SFPARSE_ERR_PARSE; |
755 | | |
756 | 5.93k | fin: |
757 | 5.93k | if (dest) { |
758 | 5.30k | dest->type = SFPARSE_TYPE_STRING; |
759 | 5.30k | dest->flags = flags; |
760 | 5.30k | dest->vec.len = (size_t)(sfp->pos - base); |
761 | 5.30k | dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; |
762 | 5.30k | } |
763 | | |
764 | 5.93k | ++sfp->pos; |
765 | | |
766 | 5.93k | return 0; |
767 | 6.74k | } |
768 | | |
769 | | #ifdef __AVX2__ |
770 | | static const uint8_t *find_char_token(const uint8_t *first, |
771 | | const uint8_t *last) { |
772 | | /* r0: !..:, excluding "(), |
773 | | r1: A..Z |
774 | | r2: ^..~, excluding {} */ |
775 | | const __m256i r0l = _mm256_set1_epi8('!' - 1); |
776 | | const __m256i r0r = _mm256_set1_epi8(':' + 1); |
777 | | const __m256i dq = _mm256_set1_epi8('"'); |
778 | | const __m256i prl = _mm256_set1_epi8('('); |
779 | | const __m256i prr = _mm256_set1_epi8(')'); |
780 | | const __m256i comma = _mm256_set1_epi8(','); |
781 | | const __m256i r1l = _mm256_set1_epi8('A' - 1); |
782 | | const __m256i r1r = _mm256_set1_epi8('Z' + 1); |
783 | | const __m256i r2l = _mm256_set1_epi8('^' - 1); |
784 | | const __m256i r2r = _mm256_set1_epi8('~' + 1); |
785 | | const __m256i cbl = _mm256_set1_epi8('{'); |
786 | | const __m256i cbr = _mm256_set1_epi8('}'); |
787 | | __m256i s, x; |
788 | | uint32_t m; |
789 | | |
790 | | for (; first != last; first += 32) { |
791 | | s = _mm256_loadu_si256((void *)first); |
792 | | |
793 | | x = _mm256_andnot_si256( |
794 | | _mm256_cmpeq_epi8(s, comma), |
795 | | _mm256_andnot_si256( |
796 | | _mm256_cmpeq_epi8(s, prr), |
797 | | _mm256_andnot_si256( |
798 | | _mm256_cmpeq_epi8(s, prl), |
799 | | _mm256_andnot_si256(_mm256_cmpeq_epi8(s, dq), |
800 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), |
801 | | _mm256_cmpgt_epi8(r0r, s)))))); |
802 | | x = _mm256_or_si256( |
803 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)), |
804 | | x); |
805 | | x = _mm256_or_si256( |
806 | | _mm256_andnot_si256( |
807 | | _mm256_cmpeq_epi8(s, cbr), |
808 | | _mm256_andnot_si256(_mm256_cmpeq_epi8(s, cbl), |
809 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), |
810 | | _mm256_cmpgt_epi8(r2r, s)))), |
811 | | x); |
812 | | |
813 | | m = ~(uint32_t)_mm256_movemask_epi8(x); |
814 | | if (m) { |
815 | | return first + ctz(m); |
816 | | } |
817 | | } |
818 | | |
819 | | return last; |
820 | | } |
821 | | #endif /* __AVX2__ */ |
822 | | |
823 | 31.4k | static int parser_token(sfparse_parser *sfp, sfparse_value *dest) { |
824 | 31.4k | const uint8_t *base; |
825 | | #ifdef __AVX2__ |
826 | | const uint8_t *last; |
827 | | #endif /* __AVX2__ */ |
828 | | |
829 | | /* The first byte has already been validated by the caller. */ |
830 | 31.4k | base = sfp->pos++; |
831 | | |
832 | | #ifdef __AVX2__ |
833 | | if (sfp->end - sfp->pos >= 32) { |
834 | | last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); |
835 | | |
836 | | sfp->pos = find_char_token(sfp->pos, last); |
837 | | if (sfp->pos != last) { |
838 | | goto fin; |
839 | | } |
840 | | } |
841 | | #endif /* __AVX2__ */ |
842 | | |
843 | 110k | for (; !parser_eof(sfp); ++sfp->pos) { |
844 | 97.6k | switch (*sfp->pos) { |
845 | 3.14M | TOKEN_CASES: |
846 | 3.14M | continue; |
847 | 97.6k | } |
848 | | |
849 | 19.0k | break; |
850 | 97.6k | } |
851 | | |
852 | | #ifdef __AVX2__ |
853 | | fin: |
854 | | #endif /* __AVX2__ */ |
855 | 31.4k | if (dest) { |
856 | 14.0k | dest->type = SFPARSE_TYPE_TOKEN; |
857 | 14.0k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
858 | 14.0k | dest->vec.base = (uint8_t *)base; |
859 | 14.0k | dest->vec.len = (size_t)(sfp->pos - base); |
860 | 14.0k | } |
861 | | |
862 | 31.4k | return 0; |
863 | 31.4k | } |
864 | | |
865 | | #ifdef __AVX2__ |
866 | | static const uint8_t *find_char_byteseq(const uint8_t *first, |
867 | | const uint8_t *last) { |
868 | | const __m256i pls = _mm256_set1_epi8('+'); |
869 | | const __m256i fs = _mm256_set1_epi8('/'); |
870 | | const __m256i r0l = _mm256_set1_epi8('0' - 1); |
871 | | const __m256i r0r = _mm256_set1_epi8('9' + 1); |
872 | | const __m256i r1l = _mm256_set1_epi8('A' - 1); |
873 | | const __m256i r1r = _mm256_set1_epi8('Z' + 1); |
874 | | const __m256i r2l = _mm256_set1_epi8('a' - 1); |
875 | | const __m256i r2r = _mm256_set1_epi8('z' + 1); |
876 | | __m256i s, x; |
877 | | uint32_t m; |
878 | | |
879 | | for (; first != last; first += 32) { |
880 | | s = _mm256_loadu_si256((void *)first); |
881 | | |
882 | | x = _mm256_cmpeq_epi8(s, pls); |
883 | | x = _mm256_or_si256(_mm256_cmpeq_epi8(s, fs), x); |
884 | | x = _mm256_or_si256( |
885 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)), |
886 | | x); |
887 | | x = _mm256_or_si256( |
888 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)), |
889 | | x); |
890 | | x = _mm256_or_si256( |
891 | | _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), _mm256_cmpgt_epi8(r2r, s)), |
892 | | x); |
893 | | |
894 | | m = ~(uint32_t)_mm256_movemask_epi8(x); |
895 | | if (m) { |
896 | | return first + ctz(m); |
897 | | } |
898 | | } |
899 | | |
900 | | return last; |
901 | | } |
902 | | #endif /* __AVX2__ */ |
903 | | |
904 | 6.42k | static int parser_byteseq(sfparse_parser *sfp, sfparse_value *dest) { |
905 | 6.42k | const uint8_t *base; |
906 | | #ifdef __AVX2__ |
907 | | const uint8_t *last; |
908 | | #endif /* __AVX2__ */ |
909 | | |
910 | | /* The first byte has already been validated by the caller. */ |
911 | 6.42k | assert(':' == *sfp->pos); |
912 | | |
913 | 6.42k | base = ++sfp->pos; |
914 | | |
915 | | #ifdef __AVX2__ |
916 | | if (sfp->end - sfp->pos >= 32) { |
917 | | last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu); |
918 | | sfp->pos = find_char_byteseq(sfp->pos, last); |
919 | | } |
920 | | #endif /* __AVX2__ */ |
921 | | |
922 | 47.5k | for (; !parser_eof(sfp); ++sfp->pos) { |
923 | 47.1k | switch (*sfp->pos) { |
924 | 550 | case '+': |
925 | 1.24k | case '/': |
926 | 60.1k | DIGIT_CASES: |
927 | 60.1k | ALPHA_CASES: |
928 | 41.1k | continue; |
929 | 1.49k | case '=': |
930 | 1.49k | switch ((sfp->pos - base) & 0x3) { |
931 | 19 | case 0: |
932 | 32 | case 1: |
933 | 32 | return SFPARSE_ERR_PARSE; |
934 | 883 | case 2: |
935 | 883 | ++sfp->pos; |
936 | | |
937 | 883 | if (parser_eof(sfp)) { |
938 | 12 | return SFPARSE_ERR_PARSE; |
939 | 12 | } |
940 | | |
941 | 871 | if (*sfp->pos == '=') { |
942 | 398 | ++sfp->pos; |
943 | 398 | } |
944 | | |
945 | 871 | break; |
946 | 582 | case 3: |
947 | 582 | ++sfp->pos; |
948 | | |
949 | 582 | break; |
950 | 1.49k | } |
951 | | |
952 | 1.45k | if (parser_eof(sfp) || *sfp->pos != ':') { |
953 | 31 | return SFPARSE_ERR_PARSE; |
954 | 31 | } |
955 | | |
956 | 1.42k | goto fin; |
957 | 4.48k | case ':': |
958 | 4.48k | if (((sfp->pos - base) & 0x3) == 1) { |
959 | 13 | return SFPARSE_ERR_PARSE; |
960 | 13 | } |
961 | | |
962 | 4.47k | goto fin; |
963 | 4.47k | default: |
964 | 46 | return SFPARSE_ERR_PARSE; |
965 | 47.1k | } |
966 | 47.1k | } |
967 | | |
968 | 399 | return SFPARSE_ERR_PARSE; |
969 | | |
970 | 5.89k | fin: |
971 | 5.89k | if (dest) { |
972 | 5.27k | dest->type = SFPARSE_TYPE_BYTESEQ; |
973 | 5.27k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
974 | 5.27k | dest->vec.len = (size_t)(sfp->pos - base); |
975 | 5.27k | dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; |
976 | 5.27k | } |
977 | | |
978 | 5.89k | ++sfp->pos; |
979 | | |
980 | 5.89k | return 0; |
981 | 6.42k | } |
982 | | |
983 | 1.58k | static int parser_boolean(sfparse_parser *sfp, sfparse_value *dest) { |
984 | 1.58k | int b; |
985 | | |
986 | | /* The first byte has already been validated by the caller. */ |
987 | 1.58k | assert('?' == *sfp->pos); |
988 | | |
989 | 1.58k | ++sfp->pos; |
990 | | |
991 | 1.58k | if (parser_eof(sfp)) { |
992 | 24 | return SFPARSE_ERR_PARSE; |
993 | 24 | } |
994 | | |
995 | 1.56k | switch (*sfp->pos) { |
996 | 798 | case '0': |
997 | 798 | b = 0; |
998 | | |
999 | 798 | break; |
1000 | 748 | case '1': |
1001 | 748 | b = 1; |
1002 | | |
1003 | 748 | break; |
1004 | 16 | default: |
1005 | 16 | return SFPARSE_ERR_PARSE; |
1006 | 1.56k | } |
1007 | | |
1008 | 1.54k | ++sfp->pos; |
1009 | | |
1010 | 1.54k | if (dest) { |
1011 | 676 | dest->type = SFPARSE_TYPE_BOOLEAN; |
1012 | 676 | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
1013 | 676 | dest->boolean = b; |
1014 | 676 | } |
1015 | | |
1016 | 1.54k | return 0; |
1017 | 1.56k | } |
1018 | | |
1019 | 15.1k | static int pctdecode(uint8_t *pc, const uint8_t **ppos) { |
1020 | 15.1k | uint8_t c, b = **ppos; |
1021 | | |
1022 | 15.1k | switch (b) { |
1023 | 8.76k | DIGIT_CASES: |
1024 | 8.76k | c = (uint8_t)((b - '0') << 4); |
1025 | | |
1026 | 8.76k | break; |
1027 | 6.32k | LCHEXALPHA_CASES: |
1028 | 6.32k | c = (uint8_t)((b - 'a' + 10) << 4); |
1029 | | |
1030 | 6.32k | break; |
1031 | 11 | default: |
1032 | 11 | return -1; |
1033 | 15.1k | } |
1034 | | |
1035 | 15.0k | b = *++*ppos; |
1036 | | |
1037 | 15.0k | switch (b) { |
1038 | 8.71k | DIGIT_CASES: |
1039 | 8.71k | c |= (uint8_t)(b - '0'); |
1040 | | |
1041 | 8.71k | break; |
1042 | 6.36k | LCHEXALPHA_CASES: |
1043 | 6.36k | c |= (uint8_t)(b - 'a' + 10); |
1044 | | |
1045 | 6.36k | break; |
1046 | 14 | default: |
1047 | 14 | return -1; |
1048 | 15.0k | } |
1049 | | |
1050 | 15.0k | *pc = c; |
1051 | 15.0k | ++*ppos; |
1052 | | |
1053 | 15.0k | return 0; |
1054 | 15.0k | } |
1055 | | |
1056 | | /* Start of utf8 dfa */ |
1057 | | /* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de> |
1058 | | * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. |
1059 | | * |
1060 | | * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> |
1061 | | * |
1062 | | * Permission is hereby granted, free of charge, to any person |
1063 | | * obtaining a copy of this software and associated documentation |
1064 | | * files (the "Software"), to deal in the Software without |
1065 | | * restriction, including without limitation the rights to use, copy, |
1066 | | * modify, merge, publish, distribute, sublicense, and/or sell copies |
1067 | | * of the Software, and to permit persons to whom the Software is |
1068 | | * furnished to do so, subject to the following conditions: |
1069 | | * |
1070 | | * The above copyright notice and this permission notice shall be |
1071 | | * included in all copies or substantial portions of the Software. |
1072 | | * |
1073 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
1074 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
1075 | | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
1076 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
1077 | | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
1078 | | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
1079 | | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
1080 | | * SOFTWARE. |
1081 | | */ |
1082 | 42.8k | #define UTF8_ACCEPT 0 |
1083 | 15.0k | #define UTF8_REJECT 12 |
1084 | | |
1085 | | /* clang-format off */ |
1086 | | static const uint8_t utf8d[] = { |
1087 | | /* |
1088 | | * The first part of the table maps bytes to character classes that |
1089 | | * to reduce the size of the transition table and create bitmasks. |
1090 | | */ |
1091 | | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
1092 | | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
1093 | | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
1094 | | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
1095 | | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, |
1096 | | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, |
1097 | | 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
1098 | | 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, |
1099 | | |
1100 | | /* |
1101 | | * The second part is a transition table that maps a combination |
1102 | | * of a state of the automaton and a character class to a state. |
1103 | | */ |
1104 | | 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, |
1105 | | 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, |
1106 | | 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, |
1107 | | 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, |
1108 | | 12,36,12,12,12,12,12,12,12,12,12,12, |
1109 | | }; |
1110 | | /* clang-format on */ |
1111 | | |
1112 | 15.0k | static void utf8_decode(uint32_t *state, uint8_t byte) { |
1113 | 15.0k | *state = utf8d[256 + *state + utf8d[byte]]; |
1114 | 15.0k | } |
1115 | | |
1116 | | /* End of utf8 dfa */ |
1117 | | |
1118 | 8.41k | static int parser_dispstring(sfparse_parser *sfp, sfparse_value *dest) { |
1119 | 8.41k | const uint8_t *base; |
1120 | 8.41k | uint8_t c; |
1121 | 8.41k | uint32_t utf8state = UTF8_ACCEPT; |
1122 | | |
1123 | 8.41k | assert('%' == *sfp->pos); |
1124 | | |
1125 | 8.41k | ++sfp->pos; |
1126 | | |
1127 | 8.41k | if (parser_eof(sfp) || *sfp->pos != '"') { |
1128 | 60 | return SFPARSE_ERR_PARSE; |
1129 | 60 | } |
1130 | | |
1131 | 8.35k | base = ++sfp->pos; |
1132 | | |
1133 | 51.6k | for (; !parser_eof(sfp);) { |
1134 | 51.6k | switch (*sfp->pos) { |
1135 | 253 | X00_1F_CASES: |
1136 | 2.11k | X7F_FF_CASES: |
1137 | 2.11k | return SFPARSE_ERR_PARSE; |
1138 | 15.1k | case '%': |
1139 | 15.1k | ++sfp->pos; |
1140 | | |
1141 | 15.1k | if (sfp->pos + 2 > sfp->end) { |
1142 | 16 | return SFPARSE_ERR_PARSE; |
1143 | 16 | } |
1144 | | |
1145 | 15.1k | if (pctdecode(&c, &sfp->pos) != 0) { |
1146 | 25 | return SFPARSE_ERR_PARSE; |
1147 | 25 | } |
1148 | | |
1149 | 15.0k | utf8_decode(&utf8state, c); |
1150 | 15.0k | if (utf8state == UTF8_REJECT) { |
1151 | 28 | return SFPARSE_ERR_PARSE; |
1152 | 28 | } |
1153 | | |
1154 | 15.0k | break; |
1155 | 15.0k | case '"': |
1156 | 6.10k | if (utf8state != UTF8_ACCEPT) { |
1157 | 12 | return SFPARSE_ERR_PARSE; |
1158 | 12 | } |
1159 | | |
1160 | 6.09k | if (dest) { |
1161 | 5.11k | dest->type = SFPARSE_TYPE_DISPSTRING; |
1162 | 5.11k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
1163 | 5.11k | dest->vec.len = (size_t)(sfp->pos - base); |
1164 | 5.11k | dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; |
1165 | 5.11k | } |
1166 | | |
1167 | 6.09k | ++sfp->pos; |
1168 | | |
1169 | 6.09k | return 0; |
1170 | 28.3k | default: |
1171 | 28.3k | if (utf8state != UTF8_ACCEPT) { |
1172 | 13 | return SFPARSE_ERR_PARSE; |
1173 | 13 | } |
1174 | | |
1175 | 28.2k | ++sfp->pos; |
1176 | 51.6k | } |
1177 | 51.6k | } |
1178 | | |
1179 | 53 | return SFPARSE_ERR_PARSE; |
1180 | 8.35k | } |
1181 | | |
1182 | 66.0k | static int parser_bare_item(sfparse_parser *sfp, sfparse_value *dest) { |
1183 | 66.0k | switch (*sfp->pos) { |
1184 | 6.74k | case '"': |
1185 | 6.74k | return parser_string(sfp, dest); |
1186 | 568 | case '-': |
1187 | 10.1k | DIGIT_CASES: |
1188 | 10.1k | return parser_number(sfp, dest); |
1189 | 1.18k | case '@': |
1190 | 1.18k | return parser_date(sfp, dest); |
1191 | 6.42k | case ':': |
1192 | 6.42k | return parser_byteseq(sfp, dest); |
1193 | 1.58k | case '?': |
1194 | 1.58k | return parser_boolean(sfp, dest); |
1195 | 869 | case '*': |
1196 | 31.4k | ALPHA_CASES: |
1197 | 31.4k | return parser_token(sfp, dest); |
1198 | 8.41k | case '%': |
1199 | 8.41k | return parser_dispstring(sfp, dest); |
1200 | 44 | default: |
1201 | 44 | return SFPARSE_ERR_PARSE; |
1202 | 66.0k | } |
1203 | 66.0k | } |
1204 | | |
1205 | | static int parser_skip_inner_list(sfparse_parser *sfp); |
1206 | | |
1207 | | int sfparse_parser_param(sfparse_parser *sfp, sfparse_vec *dest_key, |
1208 | 89.5k | sfparse_value *dest_value) { |
1209 | 89.5k | int rv; |
1210 | | |
1211 | 89.5k | switch (sfp->state & SFPARSE_STATE_OP_MASK) { |
1212 | 0 | case SFPARSE_STATE_BEFORE: |
1213 | 0 | rv = parser_skip_inner_list(sfp); |
1214 | 0 | if (rv != 0) { |
1215 | 0 | return rv; |
1216 | 0 | } |
1217 | | |
1218 | | /* fall through */ |
1219 | 72.8k | case SFPARSE_STATE_BEFORE_PARAMS: |
1220 | 72.8k | parser_set_op_state(sfp, SFPARSE_STATE_PARAMS); |
1221 | | |
1222 | 72.8k | break; |
1223 | 16.7k | case SFPARSE_STATE_PARAMS: |
1224 | 16.7k | break; |
1225 | 0 | default: |
1226 | 0 | assert(0); |
1227 | 0 | abort(); |
1228 | 89.5k | } |
1229 | | |
1230 | 89.5k | if (parser_eof(sfp) || *sfp->pos != ';') { |
1231 | 72.1k | parser_set_op_state(sfp, SFPARSE_STATE_AFTER); |
1232 | | |
1233 | 72.1k | return SFPARSE_ERR_EOF; |
1234 | 72.1k | } |
1235 | | |
1236 | 17.4k | ++sfp->pos; |
1237 | | |
1238 | 17.4k | parser_discard_sp(sfp); |
1239 | 17.4k | if (parser_eof(sfp)) { |
1240 | 29 | return SFPARSE_ERR_PARSE; |
1241 | 29 | } |
1242 | | |
1243 | 17.3k | rv = parser_key(sfp, dest_key); |
1244 | 17.3k | if (rv != 0) { |
1245 | 44 | return rv; |
1246 | 44 | } |
1247 | | |
1248 | 17.3k | if (parser_eof(sfp) || *sfp->pos != '=') { |
1249 | 7.40k | if (dest_value) { |
1250 | 0 | dest_value->type = SFPARSE_TYPE_BOOLEAN; |
1251 | 0 | dest_value->flags = SFPARSE_VALUE_FLAG_NONE; |
1252 | 0 | dest_value->boolean = 1; |
1253 | 0 | } |
1254 | | |
1255 | 7.40k | return 0; |
1256 | 7.40k | } |
1257 | | |
1258 | 9.92k | ++sfp->pos; |
1259 | | |
1260 | 9.92k | if (parser_eof(sfp)) { |
1261 | 11 | return SFPARSE_ERR_PARSE; |
1262 | 11 | } |
1263 | | |
1264 | 9.91k | return parser_bare_item(sfp, dest_value); |
1265 | 9.92k | } |
1266 | | |
1267 | 72.8k | static int parser_skip_params(sfparse_parser *sfp) { |
1268 | 72.8k | int rv; |
1269 | | |
1270 | 89.5k | for (;;) { |
1271 | 89.5k | rv = sfparse_parser_param(sfp, NULL, NULL); |
1272 | 89.5k | switch (rv) { |
1273 | 16.7k | case 0: |
1274 | 16.7k | break; |
1275 | 72.1k | case SFPARSE_ERR_EOF: |
1276 | 72.1k | return 0; |
1277 | 618 | case SFPARSE_ERR_PARSE: |
1278 | 618 | return rv; |
1279 | 0 | default: |
1280 | 0 | assert(0); |
1281 | 0 | abort(); |
1282 | 89.5k | } |
1283 | 89.5k | } |
1284 | 72.8k | } |
1285 | | |
1286 | 22.3k | int sfparse_parser_inner_list(sfparse_parser *sfp, sfparse_value *dest) { |
1287 | 22.3k | int rv; |
1288 | | |
1289 | 22.3k | switch (sfp->state & SFPARSE_STATE_OP_MASK) { |
1290 | 5.42k | case SFPARSE_STATE_BEFORE: |
1291 | 5.42k | parser_discard_sp(sfp); |
1292 | 5.42k | if (parser_eof(sfp)) { |
1293 | 25 | return SFPARSE_ERR_PARSE; |
1294 | 25 | } |
1295 | | |
1296 | 5.40k | break; |
1297 | 16.8k | case SFPARSE_STATE_BEFORE_PARAMS: |
1298 | 16.8k | rv = parser_skip_params(sfp); |
1299 | 16.8k | if (rv != 0) { |
1300 | 28 | return rv; |
1301 | 28 | } |
1302 | | |
1303 | | /* Technically, we are entering SFPARSE_STATE_AFTER, but we will set |
1304 | | another state without reading the state. */ |
1305 | | /* parser_set_op_state(sfp, SFPARSE_STATE_AFTER); */ |
1306 | | |
1307 | | /* fall through */ |
1308 | 16.8k | case SFPARSE_STATE_AFTER: |
1309 | 16.8k | if (parser_eof(sfp)) { |
1310 | 39 | return SFPARSE_ERR_PARSE; |
1311 | 39 | } |
1312 | | |
1313 | 16.8k | switch (*sfp->pos) { |
1314 | 11.9k | case ' ': |
1315 | 11.9k | parser_discard_sp(sfp); |
1316 | 11.9k | if (parser_eof(sfp)) { |
1317 | 0 | return SFPARSE_ERR_PARSE; |
1318 | 0 | } |
1319 | | |
1320 | 11.9k | break; |
1321 | 11.9k | case ')': |
1322 | 4.90k | break; |
1323 | 18 | default: |
1324 | 18 | return SFPARSE_ERR_PARSE; |
1325 | 16.8k | } |
1326 | | |
1327 | 16.8k | break; |
1328 | 16.8k | default: |
1329 | 0 | assert(0); |
1330 | 0 | abort(); |
1331 | 22.3k | } |
1332 | | |
1333 | 22.2k | if (*sfp->pos == ')') { |
1334 | 5.25k | ++sfp->pos; |
1335 | | |
1336 | 5.25k | parser_unset_inner_list_state(sfp); |
1337 | 5.25k | parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS); |
1338 | | |
1339 | 5.25k | return SFPARSE_ERR_EOF; |
1340 | 5.25k | } |
1341 | | |
1342 | 16.9k | rv = parser_bare_item(sfp, dest); |
1343 | 16.9k | if (rv != 0) { |
1344 | 65 | return rv; |
1345 | 65 | } |
1346 | | |
1347 | 16.8k | parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS); |
1348 | | |
1349 | 16.8k | return 0; |
1350 | 16.9k | } |
1351 | | |
1352 | 5.42k | static int parser_skip_inner_list(sfparse_parser *sfp) { |
1353 | 5.42k | int rv; |
1354 | | |
1355 | 22.3k | for (;;) { |
1356 | 22.3k | rv = sfparse_parser_inner_list(sfp, NULL); |
1357 | 22.3k | switch (rv) { |
1358 | 16.8k | case 0: |
1359 | 16.8k | break; |
1360 | 5.25k | case SFPARSE_ERR_EOF: |
1361 | 5.25k | return 0; |
1362 | 175 | case SFPARSE_ERR_PARSE: |
1363 | 175 | return rv; |
1364 | 0 | default: |
1365 | 0 | assert(0); |
1366 | 0 | abort(); |
1367 | 22.3k | } |
1368 | 22.3k | } |
1369 | 5.42k | } |
1370 | | |
1371 | 55.3k | static int parser_next_key_or_item(sfparse_parser *sfp) { |
1372 | 55.3k | parser_discard_ows(sfp); |
1373 | | |
1374 | 55.3k | if (parser_eof(sfp)) { |
1375 | 46.0k | return SFPARSE_ERR_EOF; |
1376 | 46.0k | } |
1377 | | |
1378 | 9.26k | if (*sfp->pos != ',') { |
1379 | 123 | return SFPARSE_ERR_PARSE; |
1380 | 123 | } |
1381 | | |
1382 | 9.14k | ++sfp->pos; |
1383 | | |
1384 | 9.14k | parser_discard_ows(sfp); |
1385 | 9.14k | if (parser_eof(sfp)) { |
1386 | 25 | return SFPARSE_ERR_PARSE; |
1387 | 25 | } |
1388 | | |
1389 | 9.11k | return 0; |
1390 | 9.14k | } |
1391 | | |
1392 | 59.7k | static int parser_dict_value(sfparse_parser *sfp, sfparse_value *dest) { |
1393 | 59.7k | int rv; |
1394 | | |
1395 | 59.7k | if (parser_eof(sfp) || *(sfp->pos) != '=') { |
1396 | | /* Boolean true */ |
1397 | 15.0k | if (dest) { |
1398 | 15.0k | dest->type = SFPARSE_TYPE_BOOLEAN; |
1399 | 15.0k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
1400 | 15.0k | dest->boolean = 1; |
1401 | 15.0k | } |
1402 | | |
1403 | 15.0k | sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS; |
1404 | | |
1405 | 15.0k | return 0; |
1406 | 15.0k | } |
1407 | | |
1408 | 44.6k | ++sfp->pos; |
1409 | | |
1410 | 44.6k | if (parser_eof(sfp)) { |
1411 | 31 | return SFPARSE_ERR_PARSE; |
1412 | 31 | } |
1413 | | |
1414 | 44.6k | if (*sfp->pos == '(') { |
1415 | 5.44k | if (dest) { |
1416 | 5.44k | dest->type = SFPARSE_TYPE_INNER_LIST; |
1417 | 5.44k | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
1418 | 5.44k | } |
1419 | | |
1420 | 5.44k | ++sfp->pos; |
1421 | | |
1422 | 5.44k | sfp->state = SFPARSE_STATE_DICT_INNER_LIST_BEFORE; |
1423 | | |
1424 | 5.44k | return 0; |
1425 | 5.44k | } |
1426 | | |
1427 | 39.2k | rv = parser_bare_item(sfp, dest); |
1428 | 39.2k | if (rv != 0) { |
1429 | 3.34k | return rv; |
1430 | 3.34k | } |
1431 | | |
1432 | 35.8k | sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS; |
1433 | | |
1434 | 35.8k | return 0; |
1435 | 39.2k | } |
1436 | | |
1437 | | int sfparse_parser_dict(sfparse_parser *sfp, sfparse_vec *dest_key, |
1438 | 107k | sfparse_value *dest_value) { |
1439 | 107k | int rv; |
1440 | | |
1441 | 107k | switch (sfp->state) { |
1442 | 5.42k | case SFPARSE_STATE_DICT_INNER_LIST_BEFORE: |
1443 | 5.42k | rv = parser_skip_inner_list(sfp); |
1444 | 5.42k | if (rv != 0) { |
1445 | 175 | return rv; |
1446 | 175 | } |
1447 | | |
1448 | | /* fall through */ |
1449 | 55.9k | case SFPARSE_STATE_DICT_BEFORE_PARAMS: |
1450 | 55.9k | rv = parser_skip_params(sfp); |
1451 | 55.9k | if (rv != 0) { |
1452 | 590 | return rv; |
1453 | 590 | } |
1454 | | |
1455 | | /* fall through */ |
1456 | 55.3k | case SFPARSE_STATE_DICT_AFTER: |
1457 | 55.3k | rv = parser_next_key_or_item(sfp); |
1458 | 55.3k | if (rv != 0) { |
1459 | 46.2k | return rv; |
1460 | 46.2k | } |
1461 | | |
1462 | 9.11k | break; |
1463 | 51.2k | case SFPARSE_STATE_INITIAL: |
1464 | 51.2k | parser_discard_sp(sfp); |
1465 | | |
1466 | 51.2k | if (parser_eof(sfp)) { |
1467 | 579 | return SFPARSE_ERR_EOF; |
1468 | 579 | } |
1469 | | |
1470 | 50.6k | break; |
1471 | 50.6k | default: |
1472 | 0 | assert(0); |
1473 | 0 | abort(); |
1474 | 107k | } |
1475 | | |
1476 | 59.7k | rv = parser_key(sfp, dest_key); |
1477 | 59.7k | if (rv != 0) { |
1478 | 70 | return rv; |
1479 | 70 | } |
1480 | | |
1481 | 59.7k | return parser_dict_value(sfp, dest_value); |
1482 | 59.7k | } |
1483 | | |
1484 | 0 | int sfparse_parser_list(sfparse_parser *sfp, sfparse_value *dest) { |
1485 | 0 | int rv; |
1486 | |
|
1487 | 0 | switch (sfp->state) { |
1488 | 0 | case SFPARSE_STATE_LIST_INNER_LIST_BEFORE: |
1489 | 0 | rv = parser_skip_inner_list(sfp); |
1490 | 0 | if (rv != 0) { |
1491 | 0 | return rv; |
1492 | 0 | } |
1493 | | |
1494 | | /* fall through */ |
1495 | 0 | case SFPARSE_STATE_LIST_BEFORE_PARAMS: |
1496 | 0 | rv = parser_skip_params(sfp); |
1497 | 0 | if (rv != 0) { |
1498 | 0 | return rv; |
1499 | 0 | } |
1500 | | |
1501 | | /* fall through */ |
1502 | 0 | case SFPARSE_STATE_LIST_AFTER: |
1503 | 0 | rv = parser_next_key_or_item(sfp); |
1504 | 0 | if (rv != 0) { |
1505 | 0 | return rv; |
1506 | 0 | } |
1507 | | |
1508 | 0 | break; |
1509 | 0 | case SFPARSE_STATE_INITIAL: |
1510 | 0 | parser_discard_sp(sfp); |
1511 | |
|
1512 | 0 | if (parser_eof(sfp)) { |
1513 | 0 | return SFPARSE_ERR_EOF; |
1514 | 0 | } |
1515 | | |
1516 | 0 | break; |
1517 | 0 | default: |
1518 | 0 | assert(0); |
1519 | 0 | abort(); |
1520 | 0 | } |
1521 | | |
1522 | 0 | if (*sfp->pos == '(') { |
1523 | 0 | if (dest) { |
1524 | 0 | dest->type = SFPARSE_TYPE_INNER_LIST; |
1525 | 0 | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
1526 | 0 | } |
1527 | |
|
1528 | 0 | ++sfp->pos; |
1529 | |
|
1530 | 0 | sfp->state = SFPARSE_STATE_LIST_INNER_LIST_BEFORE; |
1531 | |
|
1532 | 0 | return 0; |
1533 | 0 | } |
1534 | | |
1535 | 0 | rv = parser_bare_item(sfp, dest); |
1536 | 0 | if (rv != 0) { |
1537 | 0 | return rv; |
1538 | 0 | } |
1539 | | |
1540 | 0 | sfp->state = SFPARSE_STATE_LIST_BEFORE_PARAMS; |
1541 | |
|
1542 | 0 | return 0; |
1543 | 0 | } |
1544 | | |
1545 | 0 | int sfparse_parser_item(sfparse_parser *sfp, sfparse_value *dest) { |
1546 | 0 | int rv; |
1547 | |
|
1548 | 0 | switch (sfp->state) { |
1549 | 0 | case SFPARSE_STATE_INITIAL: |
1550 | 0 | parser_discard_sp(sfp); |
1551 | |
|
1552 | 0 | if (parser_eof(sfp)) { |
1553 | 0 | return SFPARSE_ERR_PARSE; |
1554 | 0 | } |
1555 | | |
1556 | 0 | break; |
1557 | 0 | case SFPARSE_STATE_ITEM_INNER_LIST_BEFORE: |
1558 | 0 | rv = parser_skip_inner_list(sfp); |
1559 | 0 | if (rv != 0) { |
1560 | 0 | return rv; |
1561 | 0 | } |
1562 | | |
1563 | | /* fall through */ |
1564 | 0 | case SFPARSE_STATE_ITEM_BEFORE_PARAMS: |
1565 | 0 | rv = parser_skip_params(sfp); |
1566 | 0 | if (rv != 0) { |
1567 | 0 | return rv; |
1568 | 0 | } |
1569 | | |
1570 | | /* fall through */ |
1571 | 0 | case SFPARSE_STATE_ITEM_AFTER: |
1572 | 0 | parser_discard_sp(sfp); |
1573 | |
|
1574 | 0 | if (!parser_eof(sfp)) { |
1575 | 0 | return SFPARSE_ERR_PARSE; |
1576 | 0 | } |
1577 | | |
1578 | 0 | return SFPARSE_ERR_EOF; |
1579 | 0 | default: |
1580 | 0 | assert(0); |
1581 | 0 | abort(); |
1582 | 0 | } |
1583 | | |
1584 | 0 | if (*sfp->pos == '(') { |
1585 | 0 | if (dest) { |
1586 | 0 | dest->type = SFPARSE_TYPE_INNER_LIST; |
1587 | 0 | dest->flags = SFPARSE_VALUE_FLAG_NONE; |
1588 | 0 | } |
1589 | |
|
1590 | 0 | ++sfp->pos; |
1591 | |
|
1592 | 0 | sfp->state = SFPARSE_STATE_ITEM_INNER_LIST_BEFORE; |
1593 | |
|
1594 | 0 | return 0; |
1595 | 0 | } |
1596 | | |
1597 | 0 | rv = parser_bare_item(sfp, dest); |
1598 | 0 | if (rv != 0) { |
1599 | 0 | return rv; |
1600 | 0 | } |
1601 | | |
1602 | 0 | sfp->state = SFPARSE_STATE_ITEM_BEFORE_PARAMS; |
1603 | |
|
1604 | 0 | return 0; |
1605 | 0 | } |
1606 | | |
1607 | | void sfparse_parser_init(sfparse_parser *sfp, const uint8_t *data, |
1608 | 51.2k | size_t datalen) { |
1609 | 51.2k | if (datalen == 0) { |
1610 | 579 | sfp->pos = sfp->end = NULL; |
1611 | 50.6k | } else { |
1612 | 50.6k | sfp->pos = data; |
1613 | 50.6k | sfp->end = data + datalen; |
1614 | 50.6k | } |
1615 | | |
1616 | 51.2k | sfp->state = SFPARSE_STATE_INITIAL; |
1617 | 51.2k | } |
1618 | | |
1619 | 0 | void sfparse_unescape(sfparse_vec *dest, const sfparse_vec *src) { |
1620 | 0 | const uint8_t *p, *q; |
1621 | 0 | uint8_t *o; |
1622 | 0 | size_t len, slen; |
1623 | |
|
1624 | 0 | if (src->len == 0) { |
1625 | 0 | dest->len = 0; |
1626 | |
|
1627 | 0 | return; |
1628 | 0 | } |
1629 | | |
1630 | 0 | o = dest->base; |
1631 | 0 | p = src->base; |
1632 | 0 | len = src->len; |
1633 | |
|
1634 | 0 | for (;;) { |
1635 | 0 | q = memchr(p, '\\', len); |
1636 | 0 | if (q == NULL) { |
1637 | 0 | memcpy(o, p, len); |
1638 | 0 | o += len; |
1639 | |
|
1640 | 0 | dest->len = (size_t)(o - dest->base); |
1641 | |
|
1642 | 0 | return; |
1643 | 0 | } |
1644 | | |
1645 | 0 | slen = (size_t)(q - p); |
1646 | 0 | memcpy(o, p, slen); |
1647 | 0 | o += slen; |
1648 | |
|
1649 | 0 | p = q + 1; |
1650 | 0 | *o++ = *p++; |
1651 | 0 | len -= slen + 2; |
1652 | 0 | } |
1653 | 0 | } |
1654 | | |
1655 | 0 | void sfparse_base64decode(sfparse_vec *dest, const sfparse_vec *src) { |
1656 | 0 | static const int index_tbl[] = { |
1657 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1658 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1659 | 0 | -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, |
1660 | 0 | 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, |
1661 | 0 | 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, |
1662 | 0 | -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, |
1663 | 0 | 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1664 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1665 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1666 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1667 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1668 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1669 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1670 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1}; |
1671 | 0 | uint8_t *o; |
1672 | 0 | const uint8_t *p, *end; |
1673 | 0 | uint32_t n; |
1674 | 0 | size_t i, left; |
1675 | 0 | int idx; |
1676 | |
|
1677 | 0 | if (src->len == 0) { |
1678 | 0 | dest->len = 0; |
1679 | |
|
1680 | 0 | return; |
1681 | 0 | } |
1682 | | |
1683 | 0 | o = dest->base; |
1684 | 0 | p = src->base; |
1685 | 0 | left = src->len & 0x3; |
1686 | 0 | if (left == 0 && src->base[src->len - 1] == '=') { |
1687 | 0 | left = 4; |
1688 | 0 | } |
1689 | 0 | end = src->base + src->len - left; |
1690 | |
|
1691 | 0 | for (; p != end;) { |
1692 | 0 | n = 0; |
1693 | |
|
1694 | 0 | for (i = 1; i <= 4; ++i, ++p) { |
1695 | 0 | idx = index_tbl[*p]; |
1696 | |
|
1697 | 0 | assert(idx != -1); |
1698 | | |
1699 | 0 | n += (uint32_t)(idx << (24 - i * 6)); |
1700 | 0 | } |
1701 | | |
1702 | 0 | *o++ = (uint8_t)(n >> 16); |
1703 | 0 | *o++ = (n >> 8) & 0xffu; |
1704 | 0 | *o++ = n & 0xffu; |
1705 | 0 | } |
1706 | | |
1707 | 0 | switch (left) { |
1708 | 0 | case 0: |
1709 | 0 | goto fin; |
1710 | 0 | case 1: |
1711 | 0 | assert(0); |
1712 | 0 | abort(); |
1713 | 0 | case 3: |
1714 | 0 | if (src->base[src->len - 1] == '=') { |
1715 | 0 | left = 2; |
1716 | 0 | } |
1717 | |
|
1718 | 0 | break; |
1719 | 0 | case 4: |
1720 | 0 | assert('=' == src->base[src->len - 1]); |
1721 | | |
1722 | 0 | if (src->base[src->len - 2] == '=') { |
1723 | 0 | left = 2; |
1724 | 0 | } else { |
1725 | 0 | left = 3; |
1726 | 0 | } |
1727 | |
|
1728 | 0 | break; |
1729 | 0 | } |
1730 | | |
1731 | 0 | switch (left) { |
1732 | 0 | case 2: |
1733 | 0 | *o = (uint8_t)(index_tbl[*p++] << 2); |
1734 | 0 | *o++ |= (uint8_t)(index_tbl[*p++] >> 4); |
1735 | |
|
1736 | 0 | break; |
1737 | 0 | case 3: |
1738 | 0 | n = (uint32_t)(index_tbl[*p++] << 10); |
1739 | 0 | n += (uint32_t)(index_tbl[*p++] << 4); |
1740 | 0 | n += (uint32_t)(index_tbl[*p++] >> 2); |
1741 | 0 | *o++ = (n >> 8) & 0xffu; |
1742 | 0 | *o++ = n & 0xffu; |
1743 | |
|
1744 | 0 | break; |
1745 | 0 | } |
1746 | | |
1747 | 0 | fin: |
1748 | 0 | dest->len = (size_t)(o - dest->base); |
1749 | 0 | } |
1750 | | |
1751 | 0 | void sfparse_pctdecode(sfparse_vec *dest, const sfparse_vec *src) { |
1752 | 0 | const uint8_t *p, *q; |
1753 | 0 | uint8_t *o; |
1754 | 0 | size_t len, slen; |
1755 | |
|
1756 | 0 | if (src->len == 0) { |
1757 | 0 | dest->len = 0; |
1758 | |
|
1759 | 0 | return; |
1760 | 0 | } |
1761 | | |
1762 | 0 | o = dest->base; |
1763 | 0 | p = src->base; |
1764 | 0 | len = src->len; |
1765 | |
|
1766 | 0 | for (;;) { |
1767 | 0 | q = memchr(p, '%', len); |
1768 | 0 | if (q == NULL) { |
1769 | 0 | memcpy(o, p, len); |
1770 | 0 | o += len; |
1771 | |
|
1772 | 0 | dest->len = (size_t)(o - dest->base); |
1773 | |
|
1774 | 0 | return; |
1775 | 0 | } |
1776 | | |
1777 | 0 | slen = (size_t)(q - p); |
1778 | 0 | memcpy(o, p, slen); |
1779 | 0 | o += slen; |
1780 | |
|
1781 | 0 | p = q + 1; |
1782 | |
|
1783 | 0 | pctdecode(o++, &p); |
1784 | |
|
1785 | 0 | len -= slen + 3; |
1786 | 0 | } |
1787 | 0 | } |