/src/nghttp2/lib/sfparse.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * sfparse |
3 | | * |
4 | | * Copyright (c) 2023 sfparse contributors |
5 | | * Copyright (c) 2019 nghttp3 contributors |
6 | | * Copyright (c) 2015 nghttp2 contributors |
7 | | * |
8 | | * Permission is hereby granted, free of charge, to any person obtaining |
9 | | * a copy of this software and associated documentation files (the |
10 | | * "Software"), to deal in the Software without restriction, including |
11 | | * without limitation the rights to use, copy, modify, merge, publish, |
12 | | * distribute, sublicense, and/or sell copies of the Software, and to |
13 | | * permit persons to whom the Software is furnished to do so, subject to |
14 | | * the following conditions: |
15 | | * |
16 | | * The above copyright notice and this permission notice shall be |
17 | | * included in all copies or substantial portions of the Software. |
18 | | * |
19 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
20 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
21 | | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
22 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
23 | | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
24 | | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
25 | | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
26 | | */ |
27 | | #include "sfparse.h" |
28 | | |
29 | | #include <string.h> |
30 | | #include <assert.h> |
31 | | #include <stdlib.h> |
32 | | |
33 | 0 | #define SF_STATE_DICT 0x08u |
34 | 0 | #define SF_STATE_LIST 0x10u |
35 | 0 | #define SF_STATE_ITEM 0x18u |
36 | | |
37 | 0 | #define SF_STATE_INNER_LIST 0x04u |
38 | | |
39 | 0 | #define SF_STATE_BEFORE 0x00u |
40 | 0 | #define SF_STATE_BEFORE_PARAMS 0x01u |
41 | 0 | #define SF_STATE_PARAMS 0x02u |
42 | 0 | #define SF_STATE_AFTER 0x03u |
43 | | |
44 | 0 | #define SF_STATE_OP_MASK 0x03u |
45 | | |
46 | 0 | #define SF_SET_STATE_AFTER(NAME) (SF_STATE_##NAME | SF_STATE_AFTER) |
47 | | #define SF_SET_STATE_BEFORE_PARAMS(NAME) \ |
48 | 0 | (SF_STATE_##NAME | SF_STATE_BEFORE_PARAMS) |
49 | | #define SF_SET_STATE_INNER_LIST_BEFORE(NAME) \ |
50 | 0 | (SF_STATE_##NAME | SF_STATE_INNER_LIST | SF_STATE_BEFORE) |
51 | | |
52 | 0 | #define SF_STATE_DICT_AFTER SF_SET_STATE_AFTER(DICT) |
53 | 0 | #define SF_STATE_DICT_BEFORE_PARAMS SF_SET_STATE_BEFORE_PARAMS(DICT) |
54 | 0 | #define SF_STATE_DICT_INNER_LIST_BEFORE SF_SET_STATE_INNER_LIST_BEFORE(DICT) |
55 | | |
56 | 0 | #define SF_STATE_LIST_AFTER SF_SET_STATE_AFTER(LIST) |
57 | 0 | #define SF_STATE_LIST_BEFORE_PARAMS SF_SET_STATE_BEFORE_PARAMS(LIST) |
58 | 0 | #define SF_STATE_LIST_INNER_LIST_BEFORE SF_SET_STATE_INNER_LIST_BEFORE(LIST) |
59 | | |
60 | 0 | #define SF_STATE_ITEM_AFTER SF_SET_STATE_AFTER(ITEM) |
61 | 0 | #define SF_STATE_ITEM_BEFORE_PARAMS SF_SET_STATE_BEFORE_PARAMS(ITEM) |
62 | 0 | #define SF_STATE_ITEM_INNER_LIST_BEFORE SF_SET_STATE_INNER_LIST_BEFORE(ITEM) |
63 | | |
64 | 0 | #define SF_STATE_INITIAL 0x00u |
65 | | |
66 | | #define DIGIT_CASES \ |
67 | 0 | case '0': \ |
68 | 0 | case '1': \ |
69 | 0 | case '2': \ |
70 | 0 | case '3': \ |
71 | 0 | case '4': \ |
72 | 0 | case '5': \ |
73 | 0 | case '6': \ |
74 | 0 | case '7': \ |
75 | 0 | case '8': \ |
76 | 0 | case '9' |
77 | | |
78 | | #define LCALPHA_CASES \ |
79 | 0 | case 'a': \ |
80 | 0 | case 'b': \ |
81 | 0 | case 'c': \ |
82 | 0 | case 'd': \ |
83 | 0 | case 'e': \ |
84 | 0 | case 'f': \ |
85 | 0 | case 'g': \ |
86 | 0 | case 'h': \ |
87 | 0 | case 'i': \ |
88 | 0 | case 'j': \ |
89 | 0 | case 'k': \ |
90 | 0 | case 'l': \ |
91 | 0 | case 'm': \ |
92 | 0 | case 'n': \ |
93 | 0 | case 'o': \ |
94 | 0 | case 'p': \ |
95 | 0 | case 'q': \ |
96 | 0 | case 'r': \ |
97 | 0 | case 's': \ |
98 | 0 | case 't': \ |
99 | 0 | case 'u': \ |
100 | 0 | case 'v': \ |
101 | 0 | case 'w': \ |
102 | 0 | case 'x': \ |
103 | 0 | case 'y': \ |
104 | 0 | case 'z' |
105 | | |
106 | | #define UCALPHA_CASES \ |
107 | 0 | case 'A': \ |
108 | 0 | case 'B': \ |
109 | 0 | case 'C': \ |
110 | 0 | case 'D': \ |
111 | 0 | case 'E': \ |
112 | 0 | case 'F': \ |
113 | 0 | case 'G': \ |
114 | 0 | case 'H': \ |
115 | 0 | case 'I': \ |
116 | 0 | case 'J': \ |
117 | 0 | case 'K': \ |
118 | 0 | case 'L': \ |
119 | 0 | case 'M': \ |
120 | 0 | case 'N': \ |
121 | 0 | case 'O': \ |
122 | 0 | case 'P': \ |
123 | 0 | case 'Q': \ |
124 | 0 | case 'R': \ |
125 | 0 | case 'S': \ |
126 | 0 | case 'T': \ |
127 | 0 | case 'U': \ |
128 | 0 | case 'V': \ |
129 | 0 | case 'W': \ |
130 | 0 | case 'X': \ |
131 | 0 | case 'Y': \ |
132 | 0 | case 'Z' |
133 | | |
134 | | #define ALPHA_CASES \ |
135 | 0 | UCALPHA_CASES: \ |
136 | 0 | LCALPHA_CASES |
137 | | |
138 | | #define X20_21_CASES \ |
139 | 0 | case ' ': \ |
140 | 0 | case '!' |
141 | | |
142 | | #define X23_5B_CASES \ |
143 | 0 | case '#': \ |
144 | 0 | case '$': \ |
145 | 0 | case '%': \ |
146 | 0 | case '&': \ |
147 | 0 | case '\'': \ |
148 | 0 | case '(': \ |
149 | 0 | case ')': \ |
150 | 0 | case '*': \ |
151 | 0 | case '+': \ |
152 | 0 | case ',': \ |
153 | 0 | case '-': \ |
154 | 0 | case '.': \ |
155 | 0 | case '/': \ |
156 | 0 | DIGIT_CASES: \ |
157 | 0 | case ':': \ |
158 | 0 | case ';': \ |
159 | 0 | case '<': \ |
160 | 0 | case '=': \ |
161 | 0 | case '>': \ |
162 | 0 | case '?': \ |
163 | 0 | case '@': \ |
164 | 0 | UCALPHA_CASES: \ |
165 | 0 | case '[' |
166 | | |
167 | | #define X5D_7E_CASES \ |
168 | 0 | case ']': \ |
169 | 0 | case '^': \ |
170 | 0 | case '_': \ |
171 | 0 | case '`': \ |
172 | 0 | LCALPHA_CASES: \ |
173 | 0 | case '{': \ |
174 | 0 | case '|': \ |
175 | 0 | case '}': \ |
176 | 0 | case '~' |
177 | | |
178 | 0 | static int is_ws(uint8_t c) { |
179 | 0 | switch (c) { |
180 | 0 | case ' ': |
181 | 0 | case '\t': |
182 | 0 | return 1; |
183 | 0 | default: |
184 | 0 | return 0; |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | 0 | static int parser_eof(sf_parser *sfp) { return sfp->pos == sfp->end; } |
189 | | |
190 | 0 | static void parser_discard_ows(sf_parser *sfp) { |
191 | 0 | for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos) |
192 | 0 | ; |
193 | 0 | } |
194 | | |
195 | 0 | static void parser_discard_sp(sf_parser *sfp) { |
196 | 0 | for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos) |
197 | 0 | ; |
198 | 0 | } |
199 | | |
200 | 0 | static void parser_set_op_state(sf_parser *sfp, uint32_t op) { |
201 | 0 | sfp->state &= ~SF_STATE_OP_MASK; |
202 | 0 | sfp->state |= op; |
203 | 0 | } |
204 | | |
205 | 0 | static void parser_unset_inner_list_state(sf_parser *sfp) { |
206 | 0 | sfp->state &= ~SF_STATE_INNER_LIST; |
207 | 0 | } |
208 | | |
209 | 0 | static int parser_key(sf_parser *sfp, sf_vec *dest) { |
210 | 0 | const uint8_t *base; |
211 | |
|
212 | 0 | switch (*sfp->pos) { |
213 | 0 | case '*': |
214 | 0 | LCALPHA_CASES: |
215 | 0 | break; |
216 | 0 | default: |
217 | 0 | return SF_ERR_PARSE_ERROR; |
218 | 0 | } |
219 | | |
220 | 0 | base = sfp->pos++; |
221 | |
|
222 | 0 | for (; !parser_eof(sfp); ++sfp->pos) { |
223 | 0 | switch (*sfp->pos) { |
224 | 0 | case '_': |
225 | 0 | case '-': |
226 | 0 | case '.': |
227 | 0 | case '*': |
228 | 0 | DIGIT_CASES: |
229 | 0 | LCALPHA_CASES: |
230 | 0 | continue; |
231 | 0 | } |
232 | | |
233 | 0 | break; |
234 | 0 | } |
235 | | |
236 | 0 | if (dest) { |
237 | 0 | dest->base = (uint8_t *)base; |
238 | 0 | dest->len = (size_t)(sfp->pos - dest->base); |
239 | 0 | } |
240 | |
|
241 | 0 | return 0; |
242 | 0 | } |
243 | | |
244 | 0 | static int parser_number(sf_parser *sfp, sf_value *dest) { |
245 | 0 | int sign = 1; |
246 | 0 | int64_t value = 0; |
247 | 0 | size_t len = 0; |
248 | 0 | size_t fpos = 0; |
249 | |
|
250 | 0 | if (*sfp->pos == '-') { |
251 | 0 | ++sfp->pos; |
252 | 0 | if (parser_eof(sfp)) { |
253 | 0 | return SF_ERR_PARSE_ERROR; |
254 | 0 | } |
255 | | |
256 | 0 | sign = -1; |
257 | 0 | } |
258 | | |
259 | 0 | assert(!parser_eof(sfp)); |
260 | | |
261 | 0 | for (; !parser_eof(sfp); ++sfp->pos) { |
262 | 0 | switch (*sfp->pos) { |
263 | 0 | DIGIT_CASES: |
264 | 0 | if (++len > 15) { |
265 | 0 | return SF_ERR_PARSE_ERROR; |
266 | 0 | } |
267 | | |
268 | 0 | value *= 10; |
269 | 0 | value += *sfp->pos - '0'; |
270 | |
|
271 | 0 | continue; |
272 | 0 | } |
273 | | |
274 | 0 | break; |
275 | 0 | } |
276 | | |
277 | 0 | if (len == 0) { |
278 | 0 | return SF_ERR_PARSE_ERROR; |
279 | 0 | } |
280 | | |
281 | 0 | if (parser_eof(sfp) || *sfp->pos != '.') { |
282 | 0 | if (dest) { |
283 | 0 | dest->type = SF_TYPE_INTEGER; |
284 | 0 | dest->flags = SF_VALUE_FLAG_NONE; |
285 | 0 | dest->integer = value * sign; |
286 | 0 | } |
287 | |
|
288 | 0 | return 0; |
289 | 0 | } |
290 | | |
291 | | /* decimal */ |
292 | | |
293 | 0 | if (len > 12) { |
294 | 0 | return SF_ERR_PARSE_ERROR; |
295 | 0 | } |
296 | | |
297 | 0 | fpos = len; |
298 | |
|
299 | 0 | ++sfp->pos; |
300 | |
|
301 | 0 | for (; !parser_eof(sfp); ++sfp->pos) { |
302 | 0 | switch (*sfp->pos) { |
303 | 0 | DIGIT_CASES: |
304 | 0 | if (++len > 15) { |
305 | 0 | return SF_ERR_PARSE_ERROR; |
306 | 0 | } |
307 | | |
308 | 0 | value *= 10; |
309 | 0 | value += *sfp->pos - '0'; |
310 | |
|
311 | 0 | continue; |
312 | 0 | } |
313 | | |
314 | 0 | break; |
315 | 0 | } |
316 | | |
317 | 0 | if (fpos == len || len - fpos > 3) { |
318 | 0 | return SF_ERR_PARSE_ERROR; |
319 | 0 | } |
320 | | |
321 | 0 | if (dest) { |
322 | 0 | dest->type = SF_TYPE_DECIMAL; |
323 | 0 | dest->flags = SF_VALUE_FLAG_NONE; |
324 | 0 | dest->decimal.numer = value * sign; |
325 | |
|
326 | 0 | switch (len - fpos) { |
327 | 0 | case 1: |
328 | 0 | dest->decimal.denom = 10; |
329 | |
|
330 | 0 | break; |
331 | 0 | case 2: |
332 | 0 | dest->decimal.denom = 100; |
333 | |
|
334 | 0 | break; |
335 | 0 | case 3: |
336 | 0 | dest->decimal.denom = 1000; |
337 | |
|
338 | 0 | break; |
339 | 0 | } |
340 | 0 | } |
341 | | |
342 | 0 | return 0; |
343 | 0 | } |
344 | | |
345 | 0 | static int parser_date(sf_parser *sfp, sf_value *dest) { |
346 | 0 | int rv; |
347 | 0 | sf_value val; |
348 | | |
349 | | /* The first byte has already been validated by the caller. */ |
350 | 0 | assert('@' == *sfp->pos); |
351 | | |
352 | 0 | ++sfp->pos; |
353 | |
|
354 | 0 | if (parser_eof(sfp)) { |
355 | 0 | return SF_ERR_PARSE_ERROR; |
356 | 0 | } |
357 | | |
358 | 0 | rv = parser_number(sfp, &val); |
359 | 0 | if (rv != 0) { |
360 | 0 | return rv; |
361 | 0 | } |
362 | | |
363 | 0 | if (val.type != SF_TYPE_INTEGER) { |
364 | 0 | return SF_ERR_PARSE_ERROR; |
365 | 0 | } |
366 | | |
367 | 0 | if (dest) { |
368 | 0 | *dest = val; |
369 | 0 | dest->type = SF_TYPE_DATE; |
370 | 0 | } |
371 | |
|
372 | 0 | return 0; |
373 | 0 | } |
374 | | |
375 | 0 | static int parser_string(sf_parser *sfp, sf_value *dest) { |
376 | 0 | const uint8_t *base; |
377 | 0 | uint32_t flags = SF_VALUE_FLAG_NONE; |
378 | | |
379 | | /* The first byte has already been validated by the caller. */ |
380 | 0 | assert('"' == *sfp->pos); |
381 | | |
382 | 0 | base = ++sfp->pos; |
383 | |
|
384 | 0 | for (; !parser_eof(sfp); ++sfp->pos) { |
385 | 0 | switch (*sfp->pos) { |
386 | 0 | X20_21_CASES: |
387 | 0 | X23_5B_CASES: |
388 | 0 | X5D_7E_CASES: |
389 | 0 | break; |
390 | 0 | case '\\': |
391 | 0 | ++sfp->pos; |
392 | 0 | if (parser_eof(sfp)) { |
393 | 0 | return SF_ERR_PARSE_ERROR; |
394 | 0 | } |
395 | | |
396 | 0 | switch (*sfp->pos) { |
397 | 0 | case '"': |
398 | 0 | case '\\': |
399 | 0 | flags = SF_VALUE_FLAG_ESCAPED_STRING; |
400 | |
|
401 | 0 | break; |
402 | 0 | default: |
403 | 0 | return SF_ERR_PARSE_ERROR; |
404 | 0 | } |
405 | | |
406 | 0 | break; |
407 | 0 | case '"': |
408 | 0 | if (dest) { |
409 | 0 | dest->type = SF_TYPE_STRING; |
410 | 0 | dest->flags = flags; |
411 | 0 | dest->vec.len = (size_t)(sfp->pos - base); |
412 | 0 | dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; |
413 | 0 | } |
414 | |
|
415 | 0 | ++sfp->pos; |
416 | |
|
417 | 0 | return 0; |
418 | 0 | default: |
419 | 0 | return SF_ERR_PARSE_ERROR; |
420 | 0 | } |
421 | 0 | } |
422 | | |
423 | 0 | return SF_ERR_PARSE_ERROR; |
424 | 0 | } |
425 | | |
426 | 0 | static int parser_token(sf_parser *sfp, sf_value *dest) { |
427 | 0 | const uint8_t *base; |
428 | | |
429 | | /* The first byte has already been validated by the caller. */ |
430 | 0 | base = sfp->pos++; |
431 | |
|
432 | 0 | for (; !parser_eof(sfp); ++sfp->pos) { |
433 | 0 | switch (*sfp->pos) { |
434 | 0 | case '!': |
435 | 0 | case '#': |
436 | 0 | case '$': |
437 | 0 | case '%': |
438 | 0 | case '&': |
439 | 0 | case '\'': |
440 | 0 | case '*': |
441 | 0 | case '+': |
442 | 0 | case '-': |
443 | 0 | case '.': |
444 | 0 | case '^': |
445 | 0 | case '_': |
446 | 0 | case '`': |
447 | 0 | case '|': |
448 | 0 | case '~': |
449 | 0 | case ':': |
450 | 0 | case '/': |
451 | 0 | DIGIT_CASES: |
452 | 0 | ALPHA_CASES: |
453 | 0 | continue; |
454 | 0 | } |
455 | | |
456 | 0 | break; |
457 | 0 | } |
458 | | |
459 | 0 | if (dest) { |
460 | 0 | dest->type = SF_TYPE_TOKEN; |
461 | 0 | dest->flags = SF_VALUE_FLAG_NONE; |
462 | 0 | dest->vec.base = (uint8_t *)base; |
463 | 0 | dest->vec.len = (size_t)(sfp->pos - base); |
464 | 0 | } |
465 | |
|
466 | 0 | return 0; |
467 | 0 | } |
468 | | |
469 | 0 | static int parser_byteseq(sf_parser *sfp, sf_value *dest) { |
470 | 0 | const uint8_t *base; |
471 | | |
472 | | /* The first byte has already been validated by the caller. */ |
473 | 0 | assert(':' == *sfp->pos); |
474 | | |
475 | 0 | base = ++sfp->pos; |
476 | |
|
477 | 0 | for (; !parser_eof(sfp); ++sfp->pos) { |
478 | 0 | switch (*sfp->pos) { |
479 | 0 | case '+': |
480 | 0 | case '/': |
481 | 0 | DIGIT_CASES: |
482 | 0 | ALPHA_CASES: |
483 | 0 | continue; |
484 | 0 | case '=': |
485 | 0 | switch ((sfp->pos - base) & 0x3) { |
486 | 0 | case 0: |
487 | 0 | case 1: |
488 | 0 | return SF_ERR_PARSE_ERROR; |
489 | 0 | case 2: |
490 | 0 | switch (*(sfp->pos - 1)) { |
491 | 0 | case 'A': |
492 | 0 | case 'Q': |
493 | 0 | case 'g': |
494 | 0 | case 'w': |
495 | 0 | break; |
496 | 0 | default: |
497 | 0 | return SF_ERR_PARSE_ERROR; |
498 | 0 | } |
499 | | |
500 | 0 | ++sfp->pos; |
501 | |
|
502 | 0 | if (parser_eof(sfp) || *sfp->pos != '=') { |
503 | 0 | return SF_ERR_PARSE_ERROR; |
504 | 0 | } |
505 | | |
506 | 0 | break; |
507 | 0 | case 3: |
508 | 0 | switch (*(sfp->pos - 1)) { |
509 | 0 | case 'A': |
510 | 0 | case 'E': |
511 | 0 | case 'I': |
512 | 0 | case 'M': |
513 | 0 | case 'Q': |
514 | 0 | case 'U': |
515 | 0 | case 'Y': |
516 | 0 | case 'c': |
517 | 0 | case 'g': |
518 | 0 | case 'k': |
519 | 0 | case 'o': |
520 | 0 | case 's': |
521 | 0 | case 'w': |
522 | 0 | case '0': |
523 | 0 | case '4': |
524 | 0 | case '8': |
525 | 0 | break; |
526 | 0 | default: |
527 | 0 | return SF_ERR_PARSE_ERROR; |
528 | 0 | } |
529 | | |
530 | 0 | break; |
531 | 0 | } |
532 | | |
533 | 0 | ++sfp->pos; |
534 | |
|
535 | 0 | if (parser_eof(sfp) || *sfp->pos != ':') { |
536 | 0 | return SF_ERR_PARSE_ERROR; |
537 | 0 | } |
538 | | |
539 | 0 | goto fin; |
540 | 0 | case ':': |
541 | 0 | if ((sfp->pos - base) & 0x3) { |
542 | 0 | return SF_ERR_PARSE_ERROR; |
543 | 0 | } |
544 | | |
545 | 0 | goto fin; |
546 | 0 | default: |
547 | 0 | return SF_ERR_PARSE_ERROR; |
548 | 0 | } |
549 | 0 | } |
550 | | |
551 | 0 | return SF_ERR_PARSE_ERROR; |
552 | | |
553 | 0 | fin: |
554 | 0 | if (dest) { |
555 | 0 | dest->type = SF_TYPE_BYTESEQ; |
556 | 0 | dest->flags = SF_VALUE_FLAG_NONE; |
557 | 0 | dest->vec.len = (size_t)(sfp->pos - base); |
558 | 0 | dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; |
559 | 0 | } |
560 | |
|
561 | 0 | ++sfp->pos; |
562 | |
|
563 | 0 | return 0; |
564 | 0 | } |
565 | | |
566 | 0 | static int parser_boolean(sf_parser *sfp, sf_value *dest) { |
567 | 0 | int b; |
568 | | |
569 | | /* The first byte has already been validated by the caller. */ |
570 | 0 | assert('?' == *sfp->pos); |
571 | | |
572 | 0 | ++sfp->pos; |
573 | |
|
574 | 0 | if (parser_eof(sfp)) { |
575 | 0 | return SF_ERR_PARSE_ERROR; |
576 | 0 | } |
577 | | |
578 | 0 | switch (*sfp->pos) { |
579 | 0 | case '0': |
580 | 0 | b = 0; |
581 | |
|
582 | 0 | break; |
583 | 0 | case '1': |
584 | 0 | b = 1; |
585 | |
|
586 | 0 | break; |
587 | 0 | default: |
588 | 0 | return SF_ERR_PARSE_ERROR; |
589 | 0 | } |
590 | | |
591 | 0 | ++sfp->pos; |
592 | |
|
593 | 0 | if (dest) { |
594 | 0 | dest->type = SF_TYPE_BOOLEAN; |
595 | 0 | dest->flags = SF_VALUE_FLAG_NONE; |
596 | 0 | dest->boolean = b; |
597 | 0 | } |
598 | |
|
599 | 0 | return 0; |
600 | 0 | } |
601 | | |
602 | 0 | static int parser_bare_item(sf_parser *sfp, sf_value *dest) { |
603 | 0 | switch (*sfp->pos) { |
604 | 0 | case '"': |
605 | 0 | return parser_string(sfp, dest); |
606 | 0 | case '-': |
607 | 0 | DIGIT_CASES: |
608 | 0 | return parser_number(sfp, dest); |
609 | 0 | case '@': |
610 | 0 | return parser_date(sfp, dest); |
611 | 0 | case ':': |
612 | 0 | return parser_byteseq(sfp, dest); |
613 | 0 | case '?': |
614 | 0 | return parser_boolean(sfp, dest); |
615 | 0 | case '*': |
616 | 0 | ALPHA_CASES: |
617 | 0 | return parser_token(sfp, dest); |
618 | 0 | default: |
619 | 0 | return SF_ERR_PARSE_ERROR; |
620 | 0 | } |
621 | 0 | } |
622 | | |
623 | | static int parser_skip_inner_list(sf_parser *sfp); |
624 | | |
625 | 0 | int sf_parser_param(sf_parser *sfp, sf_vec *dest_key, sf_value *dest_value) { |
626 | 0 | int rv; |
627 | |
|
628 | 0 | switch (sfp->state & SF_STATE_OP_MASK) { |
629 | 0 | case SF_STATE_BEFORE: |
630 | 0 | rv = parser_skip_inner_list(sfp); |
631 | 0 | if (rv != 0) { |
632 | 0 | return rv; |
633 | 0 | } |
634 | | |
635 | | /* fall through */ |
636 | 0 | case SF_STATE_BEFORE_PARAMS: |
637 | 0 | parser_set_op_state(sfp, SF_STATE_PARAMS); |
638 | |
|
639 | 0 | break; |
640 | 0 | case SF_STATE_PARAMS: |
641 | 0 | break; |
642 | 0 | default: |
643 | 0 | assert(0); |
644 | 0 | abort(); |
645 | 0 | } |
646 | | |
647 | 0 | if (parser_eof(sfp) || *sfp->pos != ';') { |
648 | 0 | parser_set_op_state(sfp, SF_STATE_AFTER); |
649 | |
|
650 | 0 | return SF_ERR_EOF; |
651 | 0 | } |
652 | | |
653 | 0 | ++sfp->pos; |
654 | |
|
655 | 0 | parser_discard_sp(sfp); |
656 | 0 | if (parser_eof(sfp)) { |
657 | 0 | return SF_ERR_PARSE_ERROR; |
658 | 0 | } |
659 | | |
660 | 0 | rv = parser_key(sfp, dest_key); |
661 | 0 | if (rv != 0) { |
662 | 0 | return rv; |
663 | 0 | } |
664 | | |
665 | 0 | if (parser_eof(sfp) || *sfp->pos != '=') { |
666 | 0 | if (dest_value) { |
667 | 0 | dest_value->type = SF_TYPE_BOOLEAN; |
668 | 0 | dest_value->flags = SF_VALUE_FLAG_NONE; |
669 | 0 | dest_value->boolean = 1; |
670 | 0 | } |
671 | |
|
672 | 0 | return 0; |
673 | 0 | } |
674 | | |
675 | 0 | ++sfp->pos; |
676 | |
|
677 | 0 | if (parser_eof(sfp)) { |
678 | 0 | return SF_ERR_PARSE_ERROR; |
679 | 0 | } |
680 | | |
681 | 0 | return parser_bare_item(sfp, dest_value); |
682 | 0 | } |
683 | | |
684 | 0 | static int parser_skip_params(sf_parser *sfp) { |
685 | 0 | int rv; |
686 | |
|
687 | 0 | for (;;) { |
688 | 0 | rv = sf_parser_param(sfp, NULL, NULL); |
689 | 0 | switch (rv) { |
690 | 0 | case 0: |
691 | 0 | break; |
692 | 0 | case SF_ERR_EOF: |
693 | 0 | return 0; |
694 | 0 | case SF_ERR_PARSE_ERROR: |
695 | 0 | return rv; |
696 | 0 | default: |
697 | 0 | assert(0); |
698 | 0 | abort(); |
699 | 0 | } |
700 | 0 | } |
701 | 0 | } |
702 | | |
703 | 0 | int sf_parser_inner_list(sf_parser *sfp, sf_value *dest) { |
704 | 0 | int rv; |
705 | |
|
706 | 0 | switch (sfp->state & SF_STATE_OP_MASK) { |
707 | 0 | case SF_STATE_BEFORE: |
708 | 0 | parser_discard_sp(sfp); |
709 | 0 | if (parser_eof(sfp)) { |
710 | 0 | return SF_ERR_PARSE_ERROR; |
711 | 0 | } |
712 | | |
713 | 0 | break; |
714 | 0 | case SF_STATE_BEFORE_PARAMS: |
715 | 0 | rv = parser_skip_params(sfp); |
716 | 0 | if (rv != 0) { |
717 | 0 | return rv; |
718 | 0 | } |
719 | | |
720 | | /* Technically, we are entering SF_STATE_AFTER, but we will set |
721 | | another state without reading the state. */ |
722 | | /* parser_set_op_state(sfp, SF_STATE_AFTER); */ |
723 | | |
724 | | /* fall through */ |
725 | 0 | case SF_STATE_AFTER: |
726 | 0 | if (parser_eof(sfp)) { |
727 | 0 | return SF_ERR_PARSE_ERROR; |
728 | 0 | } |
729 | | |
730 | 0 | switch (*sfp->pos) { |
731 | 0 | case ' ': |
732 | 0 | parser_discard_sp(sfp); |
733 | 0 | if (parser_eof(sfp)) { |
734 | 0 | return SF_ERR_PARSE_ERROR; |
735 | 0 | } |
736 | | |
737 | 0 | break; |
738 | 0 | case ')': |
739 | 0 | break; |
740 | 0 | default: |
741 | 0 | return SF_ERR_PARSE_ERROR; |
742 | 0 | } |
743 | | |
744 | 0 | break; |
745 | 0 | default: |
746 | 0 | assert(0); |
747 | 0 | abort(); |
748 | 0 | } |
749 | | |
750 | 0 | if (*sfp->pos == ')') { |
751 | 0 | ++sfp->pos; |
752 | |
|
753 | 0 | parser_unset_inner_list_state(sfp); |
754 | 0 | parser_set_op_state(sfp, SF_STATE_BEFORE_PARAMS); |
755 | |
|
756 | 0 | return SF_ERR_EOF; |
757 | 0 | } |
758 | | |
759 | 0 | rv = parser_bare_item(sfp, dest); |
760 | 0 | if (rv != 0) { |
761 | 0 | return rv; |
762 | 0 | } |
763 | | |
764 | 0 | parser_set_op_state(sfp, SF_STATE_BEFORE_PARAMS); |
765 | |
|
766 | 0 | return 0; |
767 | 0 | } |
768 | | |
769 | 0 | static int parser_skip_inner_list(sf_parser *sfp) { |
770 | 0 | int rv; |
771 | |
|
772 | 0 | for (;;) { |
773 | 0 | rv = sf_parser_inner_list(sfp, NULL); |
774 | 0 | switch (rv) { |
775 | 0 | case 0: |
776 | 0 | break; |
777 | 0 | case SF_ERR_EOF: |
778 | 0 | return 0; |
779 | 0 | case SF_ERR_PARSE_ERROR: |
780 | 0 | return rv; |
781 | 0 | default: |
782 | 0 | assert(0); |
783 | 0 | abort(); |
784 | 0 | } |
785 | 0 | } |
786 | 0 | } |
787 | | |
788 | 0 | static int parser_next_key_or_item(sf_parser *sfp) { |
789 | 0 | parser_discard_ows(sfp); |
790 | |
|
791 | 0 | if (parser_eof(sfp)) { |
792 | 0 | return SF_ERR_EOF; |
793 | 0 | } |
794 | | |
795 | 0 | if (*sfp->pos != ',') { |
796 | 0 | return SF_ERR_PARSE_ERROR; |
797 | 0 | } |
798 | | |
799 | 0 | ++sfp->pos; |
800 | |
|
801 | 0 | parser_discard_ows(sfp); |
802 | 0 | if (parser_eof(sfp)) { |
803 | 0 | return SF_ERR_PARSE_ERROR; |
804 | 0 | } |
805 | | |
806 | 0 | return 0; |
807 | 0 | } |
808 | | |
809 | 0 | static int parser_dict_value(sf_parser *sfp, sf_value *dest) { |
810 | 0 | int rv; |
811 | |
|
812 | 0 | if (parser_eof(sfp) || *(sfp->pos) != '=') { |
813 | | /* Boolean true */ |
814 | 0 | if (dest) { |
815 | 0 | dest->type = SF_TYPE_BOOLEAN; |
816 | 0 | dest->flags = SF_VALUE_FLAG_NONE; |
817 | 0 | dest->boolean = 1; |
818 | 0 | } |
819 | |
|
820 | 0 | sfp->state = SF_STATE_DICT_BEFORE_PARAMS; |
821 | |
|
822 | 0 | return 0; |
823 | 0 | } |
824 | | |
825 | 0 | ++sfp->pos; |
826 | |
|
827 | 0 | if (parser_eof(sfp)) { |
828 | 0 | return SF_ERR_PARSE_ERROR; |
829 | 0 | } |
830 | | |
831 | 0 | if (*sfp->pos == '(') { |
832 | 0 | if (dest) { |
833 | 0 | dest->type = SF_TYPE_INNER_LIST; |
834 | 0 | dest->flags = SF_VALUE_FLAG_NONE; |
835 | 0 | } |
836 | |
|
837 | 0 | ++sfp->pos; |
838 | |
|
839 | 0 | sfp->state = SF_STATE_DICT_INNER_LIST_BEFORE; |
840 | |
|
841 | 0 | return 0; |
842 | 0 | } |
843 | | |
844 | 0 | rv = parser_bare_item(sfp, dest); |
845 | 0 | if (rv != 0) { |
846 | 0 | return rv; |
847 | 0 | } |
848 | | |
849 | 0 | sfp->state = SF_STATE_DICT_BEFORE_PARAMS; |
850 | |
|
851 | 0 | return 0; |
852 | 0 | } |
853 | | |
854 | 0 | int sf_parser_dict(sf_parser *sfp, sf_vec *dest_key, sf_value *dest_value) { |
855 | 0 | int rv; |
856 | |
|
857 | 0 | switch (sfp->state) { |
858 | 0 | case SF_STATE_DICT_INNER_LIST_BEFORE: |
859 | 0 | rv = parser_skip_inner_list(sfp); |
860 | 0 | if (rv != 0) { |
861 | 0 | return rv; |
862 | 0 | } |
863 | | |
864 | | /* fall through */ |
865 | 0 | case SF_STATE_DICT_BEFORE_PARAMS: |
866 | 0 | rv = parser_skip_params(sfp); |
867 | 0 | if (rv != 0) { |
868 | 0 | return rv; |
869 | 0 | } |
870 | | |
871 | | /* fall through */ |
872 | 0 | case SF_STATE_DICT_AFTER: |
873 | 0 | rv = parser_next_key_or_item(sfp); |
874 | 0 | if (rv != 0) { |
875 | 0 | return rv; |
876 | 0 | } |
877 | | |
878 | 0 | break; |
879 | 0 | case SF_STATE_INITIAL: |
880 | 0 | parser_discard_sp(sfp); |
881 | |
|
882 | 0 | if (parser_eof(sfp)) { |
883 | 0 | return SF_ERR_EOF; |
884 | 0 | } |
885 | | |
886 | 0 | break; |
887 | 0 | default: |
888 | 0 | assert(0); |
889 | 0 | abort(); |
890 | 0 | } |
891 | | |
892 | 0 | rv = parser_key(sfp, dest_key); |
893 | 0 | if (rv != 0) { |
894 | 0 | return rv; |
895 | 0 | } |
896 | | |
897 | 0 | return parser_dict_value(sfp, dest_value); |
898 | 0 | } |
899 | | |
900 | 0 | int sf_parser_list(sf_parser *sfp, sf_value *dest) { |
901 | 0 | int rv; |
902 | |
|
903 | 0 | switch (sfp->state) { |
904 | 0 | case SF_STATE_LIST_INNER_LIST_BEFORE: |
905 | 0 | rv = parser_skip_inner_list(sfp); |
906 | 0 | if (rv != 0) { |
907 | 0 | return rv; |
908 | 0 | } |
909 | | |
910 | | /* fall through */ |
911 | 0 | case SF_STATE_LIST_BEFORE_PARAMS: |
912 | 0 | rv = parser_skip_params(sfp); |
913 | 0 | if (rv != 0) { |
914 | 0 | return rv; |
915 | 0 | } |
916 | | |
917 | | /* fall through */ |
918 | 0 | case SF_STATE_LIST_AFTER: |
919 | 0 | rv = parser_next_key_or_item(sfp); |
920 | 0 | if (rv != 0) { |
921 | 0 | return rv; |
922 | 0 | } |
923 | | |
924 | 0 | break; |
925 | 0 | case SF_STATE_INITIAL: |
926 | 0 | parser_discard_sp(sfp); |
927 | |
|
928 | 0 | if (parser_eof(sfp)) { |
929 | 0 | return SF_ERR_EOF; |
930 | 0 | } |
931 | | |
932 | 0 | break; |
933 | 0 | default: |
934 | 0 | assert(0); |
935 | 0 | abort(); |
936 | 0 | } |
937 | | |
938 | 0 | if (*sfp->pos == '(') { |
939 | 0 | if (dest) { |
940 | 0 | dest->type = SF_TYPE_INNER_LIST; |
941 | 0 | dest->flags = SF_VALUE_FLAG_NONE; |
942 | 0 | } |
943 | |
|
944 | 0 | ++sfp->pos; |
945 | |
|
946 | 0 | sfp->state = SF_STATE_LIST_INNER_LIST_BEFORE; |
947 | |
|
948 | 0 | return 0; |
949 | 0 | } |
950 | | |
951 | 0 | rv = parser_bare_item(sfp, dest); |
952 | 0 | if (rv != 0) { |
953 | 0 | return rv; |
954 | 0 | } |
955 | | |
956 | 0 | sfp->state = SF_STATE_LIST_BEFORE_PARAMS; |
957 | |
|
958 | 0 | return 0; |
959 | 0 | } |
960 | | |
961 | 0 | int sf_parser_item(sf_parser *sfp, sf_value *dest) { |
962 | 0 | int rv; |
963 | |
|
964 | 0 | switch (sfp->state) { |
965 | 0 | case SF_STATE_INITIAL: |
966 | 0 | parser_discard_sp(sfp); |
967 | |
|
968 | 0 | if (parser_eof(sfp)) { |
969 | 0 | return SF_ERR_PARSE_ERROR; |
970 | 0 | } |
971 | | |
972 | 0 | break; |
973 | 0 | case SF_STATE_ITEM_INNER_LIST_BEFORE: |
974 | 0 | rv = parser_skip_inner_list(sfp); |
975 | 0 | if (rv != 0) { |
976 | 0 | return rv; |
977 | 0 | } |
978 | | |
979 | | /* fall through */ |
980 | 0 | case SF_STATE_ITEM_BEFORE_PARAMS: |
981 | 0 | rv = parser_skip_params(sfp); |
982 | 0 | if (rv != 0) { |
983 | 0 | return rv; |
984 | 0 | } |
985 | | |
986 | | /* fall through */ |
987 | 0 | case SF_STATE_ITEM_AFTER: |
988 | 0 | parser_discard_sp(sfp); |
989 | |
|
990 | 0 | if (!parser_eof(sfp)) { |
991 | 0 | return SF_ERR_PARSE_ERROR; |
992 | 0 | } |
993 | | |
994 | 0 | return SF_ERR_EOF; |
995 | 0 | default: |
996 | 0 | assert(0); |
997 | 0 | abort(); |
998 | 0 | } |
999 | | |
1000 | 0 | if (*sfp->pos == '(') { |
1001 | 0 | if (dest) { |
1002 | 0 | dest->type = SF_TYPE_INNER_LIST; |
1003 | 0 | dest->flags = SF_VALUE_FLAG_NONE; |
1004 | 0 | } |
1005 | |
|
1006 | 0 | ++sfp->pos; |
1007 | |
|
1008 | 0 | sfp->state = SF_STATE_ITEM_INNER_LIST_BEFORE; |
1009 | |
|
1010 | 0 | return 0; |
1011 | 0 | } |
1012 | | |
1013 | 0 | rv = parser_bare_item(sfp, dest); |
1014 | 0 | if (rv != 0) { |
1015 | 0 | return rv; |
1016 | 0 | } |
1017 | | |
1018 | 0 | sfp->state = SF_STATE_ITEM_BEFORE_PARAMS; |
1019 | |
|
1020 | 0 | return 0; |
1021 | 0 | } |
1022 | | |
1023 | 0 | void sf_parser_init(sf_parser *sfp, const uint8_t *data, size_t datalen) { |
1024 | 0 | if (datalen == 0) { |
1025 | 0 | sfp->pos = sfp->end = NULL; |
1026 | 0 | } else { |
1027 | 0 | sfp->pos = data; |
1028 | 0 | sfp->end = data + datalen; |
1029 | 0 | } |
1030 | |
|
1031 | 0 | sfp->state = SF_STATE_INITIAL; |
1032 | 0 | } |
1033 | | |
1034 | 0 | void sf_unescape(sf_vec *dest, const sf_vec *src) { |
1035 | 0 | const uint8_t *p, *q; |
1036 | 0 | uint8_t *o; |
1037 | 0 | size_t len, slen; |
1038 | |
|
1039 | 0 | if (src->len == 0) { |
1040 | 0 | *dest = *src; |
1041 | |
|
1042 | 0 | return; |
1043 | 0 | } |
1044 | | |
1045 | 0 | o = dest->base; |
1046 | 0 | p = src->base; |
1047 | 0 | len = src->len; |
1048 | |
|
1049 | 0 | for (;;) { |
1050 | 0 | q = memchr(p, '\\', len); |
1051 | 0 | if (q == NULL) { |
1052 | 0 | if (len == src->len) { |
1053 | 0 | *dest = *src; |
1054 | |
|
1055 | 0 | return; |
1056 | 0 | } |
1057 | | |
1058 | 0 | memcpy(o, p, len); |
1059 | 0 | o += len; |
1060 | |
|
1061 | 0 | break; |
1062 | 0 | } |
1063 | | |
1064 | 0 | slen = (size_t)(q - p); |
1065 | 0 | memcpy(o, p, slen); |
1066 | 0 | o += slen; |
1067 | |
|
1068 | 0 | p = q + 1; |
1069 | 0 | *o++ = *p++; |
1070 | 0 | len -= slen + 2; |
1071 | 0 | } |
1072 | | |
1073 | 0 | dest->len = (size_t)(o - dest->base); |
1074 | 0 | } |
1075 | | |
1076 | 0 | void sf_base64decode(sf_vec *dest, const sf_vec *src) { |
1077 | 0 | static const int index_tbl[] = { |
1078 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1079 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1080 | 0 | -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, |
1081 | 0 | 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, |
1082 | 0 | 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
1083 | 0 | 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, |
1084 | 0 | 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, |
1085 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1086 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1087 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1088 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1089 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1090 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1091 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1092 | 0 | -1, -1, -1, -1}; |
1093 | 0 | uint8_t *o; |
1094 | 0 | const uint8_t *p, *end; |
1095 | 0 | uint32_t n; |
1096 | 0 | size_t i; |
1097 | 0 | int idx; |
1098 | |
|
1099 | 0 | assert((src->len & 0x3) == 0); |
1100 | | |
1101 | 0 | if (src->len == 0) { |
1102 | 0 | *dest = *src; |
1103 | |
|
1104 | 0 | return; |
1105 | 0 | } |
1106 | | |
1107 | 0 | o = dest->base; |
1108 | 0 | p = src->base; |
1109 | 0 | end = src->base + src->len; |
1110 | |
|
1111 | 0 | for (; p != end;) { |
1112 | 0 | n = 0; |
1113 | |
|
1114 | 0 | for (i = 1; i <= 4; ++i, ++p) { |
1115 | 0 | idx = index_tbl[*p]; |
1116 | |
|
1117 | 0 | if (idx == -1) { |
1118 | 0 | assert(i > 2); |
1119 | | |
1120 | 0 | if (i == 3) { |
1121 | 0 | assert(*p == '=' && *(p + 1) == '=' && p + 2 == end); |
1122 | | |
1123 | 0 | *o++ = (uint8_t)(n >> 16); |
1124 | |
|
1125 | 0 | goto fin; |
1126 | 0 | } |
1127 | | |
1128 | 0 | assert(*p == '=' && p + 1 == end); |
1129 | | |
1130 | 0 | *o++ = (uint8_t)(n >> 16); |
1131 | 0 | *o++ = (n >> 8) & 0xffu; |
1132 | |
|
1133 | 0 | goto fin; |
1134 | 0 | } |
1135 | | |
1136 | 0 | n += (uint32_t)(idx << (24 - i * 6)); |
1137 | 0 | } |
1138 | | |
1139 | 0 | *o++ = (uint8_t)(n >> 16); |
1140 | 0 | *o++ = (n >> 8) & 0xffu; |
1141 | 0 | *o++ = n & 0xffu; |
1142 | 0 | } |
1143 | | |
1144 | 0 | fin: |
1145 | 0 | dest->len = (size_t)(o - dest->base); |
1146 | 0 | } |