/src/pigeonhole/src/lib-sieve/sieve-parser.c
Line | Count | Source |
1 | | /* Copyright (c) 2002-2018 Pigeonhole authors, see the included COPYING file |
2 | | */ |
3 | | |
4 | | #include "lib.h" |
5 | | #include "istream.h" |
6 | | #include "failures.h" |
7 | | |
8 | | #include "sieve-common.h" |
9 | | #include "sieve-limits.h" |
10 | | #include "sieve-script.h" |
11 | | #include "sieve-lexer.h" |
12 | | #include "sieve-parser.h" |
13 | | #include "sieve-error.h" |
14 | | #include "sieve-ast.h" |
15 | | |
16 | | /* |
17 | | * Forward declarations |
18 | | */ |
19 | | |
20 | | static int |
21 | | sieve_parser_recover(struct sieve_parser *parser, |
22 | | enum sieve_token_type end_token); |
23 | | |
24 | | /* |
25 | | * Parser object |
26 | | */ |
27 | | |
28 | | struct sieve_parser { |
29 | | pool_t pool; |
30 | | |
31 | | bool valid; |
32 | | |
33 | | struct sieve_script *script; |
34 | | |
35 | | struct sieve_error_handler *ehandler; |
36 | | |
37 | | const struct sieve_lexer *lexer; |
38 | | struct sieve_ast *ast; |
39 | | }; |
40 | | |
41 | | struct sieve_parser * |
42 | | sieve_parser_create(struct sieve_script *script, |
43 | | struct sieve_error_handler *ehandler, |
44 | | enum sieve_error *error_code_r) |
45 | 0 | { |
46 | 0 | struct sieve_parser *parser; |
47 | 0 | const struct sieve_lexer *lexer; |
48 | |
|
49 | 0 | lexer = sieve_lexer_create(script, ehandler, error_code_r); |
50 | 0 | if (lexer != NULL) { |
51 | 0 | pool_t pool = pool_alloconly_create("sieve_parser", 4096); |
52 | |
|
53 | 0 | parser = p_new(pool, struct sieve_parser, 1); |
54 | 0 | parser->pool = pool; |
55 | 0 | parser->valid = TRUE; |
56 | |
|
57 | 0 | parser->ehandler = ehandler; |
58 | 0 | sieve_error_handler_ref(ehandler); |
59 | |
|
60 | 0 | parser->script = script; |
61 | 0 | sieve_script_ref(script); |
62 | |
|
63 | 0 | parser->lexer = lexer; |
64 | 0 | parser->ast = NULL; |
65 | |
|
66 | 0 | return parser; |
67 | 0 | } |
68 | | |
69 | 0 | return NULL; |
70 | 0 | } |
71 | | |
72 | | void sieve_parser_free(struct sieve_parser **parser) |
73 | 0 | { |
74 | 0 | if ((*parser)->ast != NULL) |
75 | 0 | sieve_ast_unref(&(*parser)->ast); |
76 | |
|
77 | 0 | sieve_lexer_free(&(*parser)->lexer); |
78 | 0 | sieve_script_unref(&(*parser)->script); |
79 | |
|
80 | 0 | sieve_error_handler_unref(&(*parser)->ehandler); |
81 | |
|
82 | 0 | pool_unref(&(*parser)->pool); |
83 | |
|
84 | 0 | *parser = NULL; |
85 | 0 | } |
86 | | |
87 | | /* |
88 | | * Internal error handling |
89 | | */ |
90 | | |
91 | | inline static void ATTR_FORMAT(4, 5) |
92 | | sieve_parser_error(struct sieve_parser *parser, |
93 | | const char *csrc_filename, unsigned int csrc_linenum, |
94 | | const char *fmt, ...) |
95 | 0 | { |
96 | 0 | struct sieve_error_params params = { |
97 | 0 | .log_type = LOG_TYPE_ERROR, |
98 | 0 | .csrc = { |
99 | 0 | .filename = csrc_filename, |
100 | 0 | .linenum = csrc_linenum, |
101 | 0 | }, |
102 | 0 | }; |
103 | 0 | va_list args; |
104 | |
|
105 | 0 | va_start(args, fmt); |
106 | | |
107 | | /* Don't report a parse error if the lexer complained already */ |
108 | 0 | if (sieve_lexer_token_type(parser->lexer) != STT_ERROR) { |
109 | 0 | T_BEGIN { |
110 | 0 | params.location = sieve_error_script_location( |
111 | 0 | parser->script, |
112 | 0 | sieve_lexer_token_line(parser->lexer)); |
113 | 0 | sieve_logv(parser->ehandler, ¶ms, fmt, args); |
114 | 0 | } T_END; |
115 | 0 | } |
116 | | |
117 | 0 | parser->valid = FALSE; |
118 | |
|
119 | 0 | va_end(args); |
120 | 0 | } |
121 | | #define sieve_parser_error(parser, ...) \ |
122 | 0 | sieve_parser_error(parser, __FILE__, __LINE__, __VA_ARGS__) |
123 | | |
124 | | /* |
125 | | * Sieve grammar parsing |
126 | | */ |
127 | | |
128 | | /* sieve_parse_arguments(): |
129 | | |
130 | | Parses both command arguments and sub-tests: |
131 | | arguments = *argument [test / test-list] |
132 | | argument = string-list / number / tag |
133 | | string = quoted-string / multi-line [[implicitly handled in lexer]] |
134 | | string-list = "[" string *("," string) "]" / string ;; if |
135 | | there is only a single string, the brackets are optional |
136 | | test-list = "(" test *("," test) ")" |
137 | | test = identifier arguments |
138 | | */ |
139 | | static int |
140 | | sieve_parse_arguments(struct sieve_parser *parser, struct sieve_ast_node *node, |
141 | | unsigned int depth) |
142 | 0 | { |
143 | 0 | const struct sieve_lexer *lexer = parser->lexer; |
144 | 0 | struct sieve_ast_node *test = NULL; |
145 | 0 | bool test_present = TRUE; |
146 | 0 | bool arg_present = TRUE; |
147 | 0 | int result = 1; /* Indicates whether the parser is in a defined, not |
148 | | necessarily error-free state */ |
149 | | |
150 | | /* Parse arguments */ |
151 | 0 | while (arg_present && result > 0) { |
152 | 0 | struct sieve_ast_argument *arg; |
153 | |
|
154 | 0 | if (!parser->valid && |
155 | 0 | !sieve_errors_more_allowed(parser->ehandler)) { |
156 | 0 | result = 0; |
157 | 0 | break; |
158 | 0 | } |
159 | | |
160 | 0 | switch (sieve_lexer_token_type(lexer)) { |
161 | | /* String list */ |
162 | 0 | case STT_LSQUARE: |
163 | | /* Create stinglist object */ |
164 | 0 | arg = sieve_ast_argument_stringlist_create( |
165 | 0 | node, sieve_lexer_token_line(parser->lexer)); |
166 | 0 | if (arg == NULL) break; |
167 | 0 | sieve_lexer_skip_token(lexer); |
168 | |
|
169 | 0 | if (sieve_lexer_token_type(lexer) == STT_STRING) { |
170 | 0 | bool add_failed = FALSE; |
171 | | |
172 | | /* Add the string to the list */ |
173 | 0 | if (!sieve_ast_stringlist_add( |
174 | 0 | arg, sieve_lexer_token_str(lexer), |
175 | 0 | sieve_lexer_token_line(parser->lexer))) |
176 | 0 | add_failed = TRUE; |
177 | 0 | sieve_lexer_skip_token(lexer); |
178 | |
|
179 | 0 | while (!add_failed && |
180 | 0 | sieve_lexer_token_type(lexer) == STT_COMMA) { |
181 | 0 | sieve_lexer_skip_token(lexer); |
182 | | |
183 | | /* Check parser status */ |
184 | 0 | if (!parser->valid && |
185 | 0 | !sieve_errors_more_allowed(parser->ehandler)) { |
186 | 0 | result = sieve_parser_recover(parser, STT_RSQUARE); |
187 | 0 | break; |
188 | 0 | } |
189 | | |
190 | 0 | if (sieve_lexer_token_type(lexer) == STT_STRING) { |
191 | | /* Add the string to the list */ |
192 | 0 | if (!sieve_ast_stringlist_add( |
193 | 0 | arg, sieve_lexer_token_str(lexer), |
194 | 0 | sieve_lexer_token_line(parser->lexer))) |
195 | 0 | add_failed = TRUE; |
196 | |
|
197 | 0 | sieve_lexer_skip_token(lexer); |
198 | 0 | } else { |
199 | 0 | sieve_parser_error(parser, |
200 | 0 | "expecting string after ',' in string list, " |
201 | 0 | "but found %s", |
202 | 0 | sieve_lexer_token_description(lexer)); |
203 | 0 | result = sieve_parser_recover(parser, STT_RSQUARE); |
204 | 0 | break; |
205 | 0 | } |
206 | 0 | } |
207 | |
|
208 | 0 | if (add_failed) { |
209 | 0 | sieve_parser_error(parser, |
210 | 0 | "failed to accept more items in string list"); |
211 | 0 | return -1; |
212 | 0 | } |
213 | 0 | } else { |
214 | 0 | sieve_parser_error(parser, |
215 | 0 | "expecting string after '[' in string list, " |
216 | 0 | "but found %s", |
217 | 0 | sieve_lexer_token_description(lexer)); |
218 | 0 | result = sieve_parser_recover(parser, STT_RSQUARE); |
219 | 0 | } |
220 | | |
221 | | /* Finish the string list */ |
222 | 0 | if (sieve_lexer_token_type(lexer) == STT_RSQUARE) { |
223 | 0 | sieve_lexer_skip_token(lexer); |
224 | 0 | } else { |
225 | 0 | sieve_parser_error(parser, |
226 | 0 | "expecting ',' or end of string list ']', " |
227 | 0 | "but found %s", |
228 | 0 | sieve_lexer_token_description(lexer)); |
229 | |
|
230 | 0 | if ((result = sieve_parser_recover(parser, STT_RSQUARE)) > 0) |
231 | 0 | sieve_lexer_skip_token(lexer); |
232 | 0 | } |
233 | 0 | break; |
234 | | /* Single string */ |
235 | 0 | case STT_STRING: |
236 | 0 | arg = sieve_ast_argument_string_create( |
237 | 0 | node, sieve_lexer_token_str(lexer), |
238 | 0 | sieve_lexer_token_line(parser->lexer)); |
239 | 0 | sieve_lexer_skip_token(lexer); |
240 | 0 | break; |
241 | | /* Number */ |
242 | 0 | case STT_NUMBER: |
243 | 0 | arg = sieve_ast_argument_number_create( |
244 | 0 | node, sieve_lexer_token_int(lexer), |
245 | 0 | sieve_lexer_token_line(parser->lexer)); |
246 | 0 | sieve_lexer_skip_token(lexer); |
247 | 0 | break; |
248 | | /* Tag */ |
249 | 0 | case STT_TAG: |
250 | 0 | arg = sieve_ast_argument_tag_create( |
251 | 0 | node, sieve_lexer_token_ident(lexer), |
252 | 0 | sieve_lexer_token_line(parser->lexer)); |
253 | 0 | sieve_lexer_skip_token(lexer); |
254 | 0 | break; |
255 | | /* End of argument list, continue with tests */ |
256 | 0 | default: |
257 | 0 | arg_present = FALSE; |
258 | 0 | break; |
259 | 0 | } |
260 | | |
261 | 0 | if (arg_present && arg == NULL) { |
262 | 0 | sieve_parser_error(parser, |
263 | 0 | "failed to accept more arguments for command '%s'", |
264 | 0 | node->identifier); |
265 | 0 | return -1; |
266 | 0 | } |
267 | | |
268 | 0 | if (sieve_ast_argument_count(node) > SIEVE_MAX_COMMAND_ARGUMENTS) { |
269 | 0 | sieve_parser_error(parser, |
270 | 0 | "too many arguments for command '%s'", |
271 | 0 | node->identifier); |
272 | 0 | return 0; |
273 | 0 | } |
274 | 0 | } |
275 | | |
276 | 0 | if (result <= 0) |
277 | 0 | return result; /* Defer recovery to caller */ |
278 | | |
279 | | /* --> [ test / test-list ] |
280 | | test-list = "(" test *("," test) ")" |
281 | | test = identifier arguments |
282 | | */ |
283 | 0 | switch (sieve_lexer_token_type(lexer)) { |
284 | | /* Single test */ |
285 | 0 | case STT_IDENTIFIER: |
286 | 0 | if ((depth + 1) > SIEVE_MAX_TEST_NESTING) { |
287 | 0 | sieve_parser_error(parser, |
288 | 0 | "cannot nest tests deeper than %u levels", |
289 | 0 | SIEVE_MAX_TEST_NESTING); |
290 | 0 | return 0; |
291 | 0 | } |
292 | | |
293 | 0 | test = sieve_ast_test_create( |
294 | 0 | node, sieve_lexer_token_ident(lexer), |
295 | 0 | sieve_lexer_token_line(parser->lexer)); |
296 | 0 | sieve_lexer_skip_token(lexer); |
297 | | |
298 | | /* Theoretically, test can be NULL */ |
299 | 0 | if (test == NULL) |
300 | 0 | break; |
301 | | |
302 | | /* Parse test arguments, which may include more tests (recurse) */ |
303 | 0 | if (sieve_parse_arguments(parser, test, depth + 1) <= 0) { |
304 | 0 | return 0; /* Defer recovery to caller */ |
305 | 0 | } |
306 | | |
307 | 0 | break; |
308 | | |
309 | | /* Test list */ |
310 | 0 | case STT_LBRACKET: |
311 | 0 | sieve_lexer_skip_token(lexer); |
312 | |
|
313 | 0 | if (depth+1 > SIEVE_MAX_TEST_NESTING) { |
314 | 0 | sieve_parser_error(parser, |
315 | 0 | "cannot nest tests deeper than %u levels", |
316 | 0 | SIEVE_MAX_TEST_NESTING); |
317 | 0 | result = sieve_parser_recover(parser, STT_RBRACKET); |
318 | |
|
319 | 0 | if (result > 0) |
320 | 0 | sieve_lexer_skip_token(lexer); |
321 | 0 | return result; |
322 | 0 | } |
323 | | |
324 | 0 | node->test_list = TRUE; |
325 | | |
326 | | /* Test starts with identifier */ |
327 | 0 | if (sieve_lexer_token_type(lexer) == STT_IDENTIFIER) { |
328 | 0 | test = sieve_ast_test_create( |
329 | 0 | node, sieve_lexer_token_ident(lexer), |
330 | 0 | sieve_lexer_token_line(parser->lexer)); |
331 | 0 | sieve_lexer_skip_token(lexer); |
332 | |
|
333 | 0 | if (test == NULL) |
334 | 0 | break; |
335 | | |
336 | | /* Parse test arguments, which may include more tests (recurse) */ |
337 | 0 | if ((result = sieve_parse_arguments(parser, test, depth+1)) > 0) { |
338 | | |
339 | | /* More tests ? */ |
340 | 0 | while (sieve_lexer_token_type(lexer) == STT_COMMA) { |
341 | 0 | sieve_lexer_skip_token(lexer); |
342 | | |
343 | | /* Check parser status */ |
344 | 0 | if (!parser->valid && |
345 | 0 | !sieve_errors_more_allowed(parser->ehandler)) { |
346 | 0 | result = sieve_parser_recover(parser, STT_RBRACKET); |
347 | 0 | break; |
348 | 0 | } |
349 | | |
350 | | /* Test starts with identifier */ |
351 | 0 | if (sieve_lexer_token_type(lexer) == STT_IDENTIFIER) { |
352 | 0 | test = sieve_ast_test_create( |
353 | 0 | node, sieve_lexer_token_ident(lexer), |
354 | 0 | sieve_lexer_token_line(parser->lexer)); |
355 | 0 | sieve_lexer_skip_token(lexer); |
356 | |
|
357 | 0 | if (test == NULL) |
358 | 0 | break; |
359 | | |
360 | | /* Parse test arguments, which may include more tests (recurse) */ |
361 | 0 | if ((result = sieve_parse_arguments(parser, test, depth+1)) <= 0) { |
362 | 0 | if (result < 0) |
363 | 0 | return result; |
364 | 0 | result = sieve_parser_recover(parser, STT_RBRACKET); |
365 | 0 | break; |
366 | 0 | } |
367 | 0 | } else { |
368 | 0 | sieve_parser_error(parser, |
369 | 0 | "expecting test identifier after ',' in test list, " |
370 | 0 | "but found %s", |
371 | 0 | sieve_lexer_token_description(lexer)); |
372 | 0 | result = sieve_parser_recover(parser, STT_RBRACKET); |
373 | 0 | break; |
374 | 0 | } |
375 | 0 | } |
376 | 0 | if (test == NULL) |
377 | 0 | break; |
378 | 0 | } else { |
379 | 0 | if (result < 0) |
380 | 0 | return result; |
381 | | |
382 | 0 | result = sieve_parser_recover(parser, STT_RBRACKET); |
383 | 0 | } |
384 | 0 | } else { |
385 | 0 | sieve_parser_error(parser, |
386 | 0 | "expecting test identifier after '(' in test list, " |
387 | 0 | "but found %s", |
388 | 0 | sieve_lexer_token_description(lexer)); |
389 | |
|
390 | 0 | result = sieve_parser_recover(parser, STT_RBRACKET); |
391 | 0 | } |
392 | | |
393 | | /* The next token should be a ')', indicating the end of the |
394 | | test list |
395 | | --> previous sieve_parser_recover calls try to restore this |
396 | | situation after parse errors. |
397 | | */ |
398 | 0 | if (sieve_lexer_token_type(lexer) == STT_RBRACKET) { |
399 | 0 | sieve_lexer_skip_token(lexer); |
400 | 0 | } else { |
401 | 0 | sieve_parser_error(parser, |
402 | 0 | "expecting ',' or end of test list ')', " |
403 | 0 | "but found %s", |
404 | 0 | sieve_lexer_token_description(lexer)); |
405 | | |
406 | | /* Recover function tries to make next token equal to |
407 | | ')'. If it succeeds we need to skip it. |
408 | | */ |
409 | 0 | if ((result = sieve_parser_recover(parser, STT_RBRACKET)) > 0) |
410 | 0 | sieve_lexer_skip_token(lexer); |
411 | 0 | } |
412 | 0 | break; |
413 | | |
414 | 0 | default: |
415 | | /* Not an error: test / test-list is optional |
416 | | --> any errors are detected by the caller |
417 | | */ |
418 | 0 | test_present = FALSE; |
419 | 0 | break; |
420 | 0 | } |
421 | | |
422 | 0 | if (test_present && test == NULL) { |
423 | 0 | sieve_parser_error(parser, |
424 | 0 | "failed to accept more tests for command '%s'", |
425 | 0 | node->identifier); |
426 | 0 | return -1; |
427 | 0 | } |
428 | | |
429 | 0 | return result; |
430 | 0 | } |
431 | | |
432 | | /* commands = *command |
433 | | command = identifier arguments ( ";" / block ) |
434 | | block = "{" commands "}" |
435 | | */ |
436 | | static int |
437 | | sieve_parse_commands(struct sieve_parser *parser, struct sieve_ast_node *block, |
438 | | unsigned int depth) |
439 | 0 | { |
440 | 0 | const struct sieve_lexer *lexer = parser->lexer; |
441 | 0 | int result = 1; |
442 | |
|
443 | 0 | while (result > 0 && |
444 | 0 | sieve_lexer_token_type(lexer) == STT_IDENTIFIER) { |
445 | 0 | struct sieve_ast_node *command; |
446 | | |
447 | | /* Check parser status */ |
448 | 0 | if (!parser->valid && |
449 | 0 | !sieve_errors_more_allowed(parser->ehandler)) { |
450 | 0 | result = sieve_parser_recover(parser, STT_SEMICOLON); |
451 | 0 | break; |
452 | 0 | } |
453 | | |
454 | | /* Create command node */ |
455 | 0 | command = sieve_ast_command_create( |
456 | 0 | block, sieve_lexer_token_ident(lexer), |
457 | 0 | sieve_lexer_token_line(parser->lexer)); |
458 | 0 | sieve_lexer_skip_token(lexer); |
459 | |
|
460 | 0 | if (command == NULL) { |
461 | 0 | sieve_parser_error(parser, |
462 | 0 | "failed to accept more commands inside the block of command '%s'", |
463 | 0 | block->identifier); |
464 | 0 | return -1; |
465 | 0 | } |
466 | | |
467 | 0 | result = sieve_parse_arguments(parser, command, 1); |
468 | | |
469 | | /* Check whether the command is properly terminated |
470 | | (i.e. with ; or a new block) |
471 | | */ |
472 | 0 | if (result > 0 && |
473 | 0 | sieve_lexer_token_type(lexer) != STT_SEMICOLON && |
474 | 0 | sieve_lexer_token_type(lexer) != STT_LCURLY) { |
475 | |
|
476 | 0 | sieve_parser_error(parser, |
477 | 0 | "expected end of command ';' or the beginning of a compound block '{', " |
478 | 0 | "but found %s", |
479 | 0 | sieve_lexer_token_description(lexer)); |
480 | 0 | result = 0; |
481 | 0 | } |
482 | | |
483 | | /* Try to recover from parse errors to reacquire a defined state |
484 | | */ |
485 | 0 | if (result == 0) |
486 | 0 | result = sieve_parser_recover(parser, STT_SEMICOLON); |
487 | | |
488 | | /* Don't bother to continue if we are not in a defined state */ |
489 | 0 | if (result <= 0) |
490 | 0 | return result; |
491 | | |
492 | 0 | switch (sieve_lexer_token_type(lexer)) { |
493 | | /* End of the command */ |
494 | 0 | case STT_SEMICOLON: |
495 | 0 | sieve_lexer_skip_token(lexer); |
496 | 0 | break; |
497 | | /* Command has a block {...} */ |
498 | 0 | case STT_LCURLY: |
499 | 0 | sieve_lexer_skip_token(lexer); |
500 | | |
501 | | /* Check current depth first */ |
502 | 0 | if ((depth + 1) > SIEVE_MAX_BLOCK_NESTING) { |
503 | 0 | sieve_parser_error(parser, |
504 | 0 | "cannot nest command blocks deeper than %u levels", |
505 | 0 | SIEVE_MAX_BLOCK_NESTING); |
506 | 0 | result = sieve_parser_recover(parser, STT_RCURLY); |
507 | |
|
508 | 0 | if (result > 0) |
509 | 0 | sieve_lexer_skip_token(lexer); |
510 | 0 | break; |
511 | 0 | } |
512 | | |
513 | 0 | command->block = TRUE; |
514 | |
|
515 | 0 | if ((result = sieve_parse_commands(parser, command, depth + 1)) > 0) { |
516 | 0 | if (sieve_lexer_token_type(lexer) != STT_RCURLY) { |
517 | 0 | sieve_parser_error(parser, |
518 | 0 | "expected end of compound block '}', " |
519 | 0 | "but found %s", |
520 | 0 | sieve_lexer_token_description(lexer)); |
521 | 0 | result = sieve_parser_recover(parser, STT_RCURLY); |
522 | 0 | } else { |
523 | 0 | sieve_lexer_skip_token(lexer); |
524 | 0 | } |
525 | 0 | } else { |
526 | 0 | if (result < 0) |
527 | 0 | return result; |
528 | | |
529 | 0 | if ((result = sieve_parser_recover(parser, STT_RCURLY)) > 0) |
530 | 0 | sieve_lexer_skip_token(lexer); |
531 | 0 | } |
532 | | |
533 | 0 | break; |
534 | | |
535 | 0 | default: |
536 | | /* Recovered previously, so this cannot happen */ |
537 | 0 | i_unreached(); |
538 | 0 | } |
539 | 0 | } |
540 | | |
541 | 0 | return result; |
542 | 0 | } |
543 | | |
544 | | bool sieve_parser_run(struct sieve_parser *parser, struct sieve_ast **ast) |
545 | 0 | { |
546 | 0 | if (parser->ast != NULL) |
547 | 0 | sieve_ast_unref(&parser->ast); |
548 | | |
549 | | /* Create AST object if none is provided */ |
550 | 0 | if (*ast == NULL) |
551 | 0 | *ast = sieve_ast_create(parser->script); |
552 | 0 | else |
553 | 0 | sieve_ast_ref(*ast); |
554 | |
|
555 | 0 | parser->ast = *ast; |
556 | | |
557 | | /* Scan first token */ |
558 | 0 | sieve_lexer_skip_token(parser->lexer); |
559 | | |
560 | | /* Parse */ |
561 | 0 | if (sieve_parse_commands(parser, sieve_ast_root(parser->ast), 1) > 0 && |
562 | 0 | parser->valid) { |
563 | | /* Parsed right to EOF ? */ |
564 | 0 | if (sieve_lexer_token_type(parser->lexer) != STT_EOF) { |
565 | 0 | sieve_parser_error(parser, |
566 | 0 | "unexpected %s found at (the presumed) end of file", |
567 | 0 | sieve_lexer_token_description(parser->lexer)); |
568 | 0 | parser->valid = FALSE; |
569 | 0 | } |
570 | 0 | } else { |
571 | 0 | parser->valid = FALSE; |
572 | 0 | } |
573 | | |
574 | | /* Clean up AST if parse failed */ |
575 | 0 | if (!parser->valid) { |
576 | 0 | parser->ast = NULL; |
577 | 0 | sieve_ast_unref(ast); |
578 | 0 | } |
579 | |
|
580 | 0 | return parser->valid; |
581 | 0 | } |
582 | | |
583 | | /* Error recovery: |
584 | | To continue parsing after an error it is important to find the next |
585 | | parsible item in the stream. The recover function skips over the remaining |
586 | | garbage after an error. It tries to find the end of the failed syntax |
587 | | structure and takes nesting of structures into account. |
588 | | */ |
589 | | |
590 | | /* Assign useful names to priorities for readability */ |
591 | | enum sieve_grammatical_prio { |
592 | | SGP_BLOCK = 3, |
593 | | SGP_COMMAND = 2, |
594 | | SGP_TEST_LIST = 1, |
595 | | SGP_STRING_LIST = 0, |
596 | | |
597 | | SGP_OTHER = -1 |
598 | | }; |
599 | | |
600 | | static inline enum sieve_grammatical_prio |
601 | | __get_token_priority(enum sieve_token_type token) |
602 | 0 | { |
603 | 0 | switch (token) { |
604 | 0 | case STT_LCURLY: |
605 | 0 | case STT_RCURLY: |
606 | 0 | return SGP_BLOCK; |
607 | 0 | case STT_SEMICOLON: |
608 | 0 | return SGP_COMMAND; |
609 | 0 | case STT_LBRACKET: |
610 | 0 | case STT_RBRACKET: |
611 | 0 | return SGP_TEST_LIST; |
612 | 0 | case STT_LSQUARE: |
613 | 0 | case STT_RSQUARE: |
614 | 0 | return SGP_STRING_LIST; |
615 | 0 | default: |
616 | 0 | break; |
617 | 0 | } |
618 | | |
619 | 0 | return SGP_OTHER; |
620 | 0 | } |
621 | | |
622 | | static int |
623 | | sieve_parser_recover(struct sieve_parser *parser, |
624 | | enum sieve_token_type end_token) |
625 | 0 | { |
626 | | /* The tokens that begin/end a specific block/command/list in order |
627 | | of ascending grammatical priority. |
628 | | */ |
629 | 0 | static const enum sieve_token_type begin_tokens[4] = { |
630 | 0 | STT_LSQUARE, STT_LBRACKET, STT_NONE, STT_LCURLY }; |
631 | 0 | static const enum sieve_token_type end_tokens[4] = { |
632 | 0 | STT_RSQUARE, STT_RBRACKET, STT_SEMICOLON, STT_RCURLY}; |
633 | 0 | const struct sieve_lexer *lexer = parser->lexer; |
634 | 0 | int nesting = 1; |
635 | 0 | enum sieve_grammatical_prio end_priority = |
636 | 0 | __get_token_priority(end_token); |
637 | |
|
638 | 0 | i_assert(end_priority != SGP_OTHER); |
639 | | |
640 | 0 | while (sieve_lexer_token_type(lexer) != STT_EOF && |
641 | 0 | __get_token_priority(sieve_lexer_token_type(lexer)) |
642 | 0 | <= end_priority) { |
643 | 0 | if (sieve_lexer_token_type(lexer) == |
644 | 0 | begin_tokens[end_priority]) { |
645 | 0 | nesting++; |
646 | 0 | sieve_lexer_skip_token(lexer); |
647 | 0 | continue; |
648 | 0 | } |
649 | 0 | if (sieve_lexer_token_type(lexer) == |
650 | 0 | end_tokens[end_priority]) { |
651 | 0 | nesting--; |
652 | |
|
653 | 0 | if (nesting == 0) { |
654 | | /* Next character is the end */ |
655 | 0 | return 1; |
656 | 0 | } |
657 | 0 | } |
658 | 0 | sieve_lexer_skip_token(lexer); |
659 | 0 | } |
660 | | |
661 | | /* Special case: COMMAND */ |
662 | 0 | if (end_token == STT_SEMICOLON && |
663 | 0 | sieve_lexer_token_type(lexer) == STT_LCURLY) { |
664 | 0 | return 1; |
665 | 0 | } |
666 | | |
667 | | /* End not found before eof or end of surrounding grammatical structure |
668 | | */ |
669 | 0 | return 0; |
670 | 0 | } |