Coverage Report

Created: 2026-04-12 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/pigeonhole/src/lib-sieve/sieve-parser.c
Line
Count
Source
1
/* Copyright (c) 2002-2018 Pigeonhole authors, see the included COPYING file
2
 */
3
4
#include "lib.h"
5
#include "istream.h"
6
#include "failures.h"
7
8
#include "sieve-common.h"
9
#include "sieve-limits.h"
10
#include "sieve-script.h"
11
#include "sieve-lexer.h"
12
#include "sieve-parser.h"
13
#include "sieve-error.h"
14
#include "sieve-ast.h"
15
16
/*
17
 * Forward declarations
18
 */
19
20
static int
21
sieve_parser_recover(struct sieve_parser *parser,
22
         enum sieve_token_type end_token);
23
24
/*
25
 * Parser object
26
 */
27
28
struct sieve_parser {
29
  pool_t pool;
30
31
  bool valid;
32
33
  struct sieve_script *script;
34
35
  struct sieve_error_handler *ehandler;
36
37
  const struct sieve_lexer *lexer;
38
  struct sieve_ast *ast;
39
};
40
41
struct sieve_parser *
42
sieve_parser_create(struct sieve_script *script,
43
        struct sieve_error_handler *ehandler,
44
        enum sieve_error *error_code_r)
45
0
{
46
0
  struct sieve_parser *parser;
47
0
  const struct sieve_lexer *lexer;
48
49
0
  lexer = sieve_lexer_create(script, ehandler, error_code_r);
50
0
  if (lexer != NULL) {
51
0
    pool_t pool = pool_alloconly_create("sieve_parser", 4096);
52
53
0
    parser = p_new(pool, struct sieve_parser, 1);
54
0
    parser->pool = pool;
55
0
    parser->valid = TRUE;
56
57
0
    parser->ehandler = ehandler;
58
0
    sieve_error_handler_ref(ehandler);
59
60
0
    parser->script = script;
61
0
    sieve_script_ref(script);
62
63
0
    parser->lexer = lexer;
64
0
    parser->ast = NULL;
65
66
0
    return parser;
67
0
  }
68
69
0
  return NULL;
70
0
}
71
72
void sieve_parser_free(struct sieve_parser **parser)
73
0
{
74
0
  if ((*parser)->ast != NULL)
75
0
    sieve_ast_unref(&(*parser)->ast);
76
77
0
  sieve_lexer_free(&(*parser)->lexer);
78
0
  sieve_script_unref(&(*parser)->script);
79
80
0
  sieve_error_handler_unref(&(*parser)->ehandler);
81
82
0
  pool_unref(&(*parser)->pool);
83
84
0
  *parser = NULL;
85
0
}
86
87
/*
88
 * Internal error handling
89
 */
90
91
inline static void ATTR_FORMAT(4, 5)
92
sieve_parser_error(struct sieve_parser *parser,
93
       const char *csrc_filename, unsigned int csrc_linenum,
94
       const char *fmt, ...)
95
0
{
96
0
  struct sieve_error_params params = {
97
0
    .log_type = LOG_TYPE_ERROR,
98
0
    .csrc = {
99
0
      .filename = csrc_filename,
100
0
      .linenum = csrc_linenum,
101
0
    },
102
0
  };
103
0
  va_list args;
104
105
0
  va_start(args, fmt);
106
107
  /* Don't report a parse error if the lexer complained already */
108
0
  if (sieve_lexer_token_type(parser->lexer) != STT_ERROR) {
109
0
    T_BEGIN {
110
0
      params.location = sieve_error_script_location(
111
0
        parser->script,
112
0
        sieve_lexer_token_line(parser->lexer));
113
0
      sieve_logv(parser->ehandler, &params, fmt, args);
114
0
    } T_END;
115
0
  }
116
117
0
  parser->valid = FALSE;
118
119
0
  va_end(args);
120
0
}
121
#define sieve_parser_error(parser, ...) \
122
0
  sieve_parser_error(parser, __FILE__, __LINE__, __VA_ARGS__)
123
124
/*
125
 * Sieve grammar parsing
126
 */
127
128
/* sieve_parse_arguments():
129
130
   Parses both command arguments and sub-tests:
131
     arguments = *argument [test / test-list]
132
     argument = string-list / number / tag
133
     string = quoted-string / multi-line   [[implicitly handled in lexer]]
134
     string-list = "[" string *("," string) "]" / string         ;; if
135
       there is only a single string, the brackets are optional
136
     test-list = "(" test *("," test) ")"
137
     test = identifier arguments
138
 */
139
static int
140
sieve_parse_arguments(struct sieve_parser *parser, struct sieve_ast_node *node,
141
          unsigned int depth)
142
0
{
143
0
  const struct sieve_lexer *lexer = parser->lexer;
144
0
  struct sieve_ast_node *test = NULL;
145
0
  bool test_present = TRUE;
146
0
  bool arg_present = TRUE;
147
0
  int result = 1; /* Indicates whether the parser is in a defined, not
148
                      necessarily error-free state */
149
150
  /* Parse arguments */
151
0
  while (arg_present && result > 0) {
152
0
    struct sieve_ast_argument *arg;
153
154
0
    if (!parser->valid &&
155
0
        !sieve_errors_more_allowed(parser->ehandler)) {
156
0
      result = 0;
157
0
      break;
158
0
    }
159
160
0
    switch (sieve_lexer_token_type(lexer)) {
161
    /* String list */
162
0
    case STT_LSQUARE:
163
      /* Create stinglist object */
164
0
      arg = sieve_ast_argument_stringlist_create(
165
0
        node, sieve_lexer_token_line(parser->lexer));
166
0
      if (arg == NULL) break;
167
0
      sieve_lexer_skip_token(lexer);
168
169
0
      if (sieve_lexer_token_type(lexer) == STT_STRING) {
170
0
        bool add_failed = FALSE;
171
172
        /* Add the string to the list */
173
0
        if (!sieve_ast_stringlist_add(
174
0
          arg, sieve_lexer_token_str(lexer),
175
0
          sieve_lexer_token_line(parser->lexer)))
176
0
          add_failed = TRUE;
177
0
        sieve_lexer_skip_token(lexer);
178
179
0
        while (!add_failed &&
180
0
               sieve_lexer_token_type(lexer) == STT_COMMA) {
181
0
          sieve_lexer_skip_token(lexer);
182
183
          /* Check parser status */
184
0
          if (!parser->valid &&
185
0
              !sieve_errors_more_allowed(parser->ehandler)) {
186
0
            result = sieve_parser_recover(parser, STT_RSQUARE);
187
0
            break;
188
0
          }
189
190
0
          if (sieve_lexer_token_type(lexer) == STT_STRING) {
191
            /* Add the string to the list */
192
0
            if (!sieve_ast_stringlist_add(
193
0
              arg, sieve_lexer_token_str(lexer),
194
0
              sieve_lexer_token_line(parser->lexer)))
195
0
              add_failed = TRUE;
196
197
0
            sieve_lexer_skip_token(lexer);
198
0
          } else {
199
0
            sieve_parser_error(parser,
200
0
              "expecting string after ',' in string list, "
201
0
              "but found %s",
202
0
              sieve_lexer_token_description(lexer));
203
0
            result = sieve_parser_recover(parser, STT_RSQUARE);
204
0
            break;
205
0
          }
206
0
        }
207
208
0
        if (add_failed) {
209
0
          sieve_parser_error(parser,
210
0
            "failed to accept more items in string list");
211
0
          return -1;
212
0
        }
213
0
      } else {
214
0
        sieve_parser_error(parser,
215
0
          "expecting string after '[' in string list, "
216
0
          "but found %s",
217
0
          sieve_lexer_token_description(lexer));
218
0
        result = sieve_parser_recover(parser, STT_RSQUARE);
219
0
      }
220
221
      /* Finish the string list */
222
0
      if (sieve_lexer_token_type(lexer) == STT_RSQUARE) {
223
0
        sieve_lexer_skip_token(lexer);
224
0
      } else {
225
0
        sieve_parser_error(parser,
226
0
          "expecting ',' or end of string list ']', "
227
0
          "but found %s",
228
0
          sieve_lexer_token_description(lexer));
229
230
0
        if ((result = sieve_parser_recover(parser, STT_RSQUARE)) > 0)
231
0
          sieve_lexer_skip_token(lexer);
232
0
      }
233
0
      break;
234
    /* Single string */
235
0
    case STT_STRING:
236
0
      arg = sieve_ast_argument_string_create(
237
0
        node, sieve_lexer_token_str(lexer),
238
0
        sieve_lexer_token_line(parser->lexer));
239
0
      sieve_lexer_skip_token(lexer);
240
0
      break;
241
    /* Number */
242
0
    case STT_NUMBER:
243
0
      arg = sieve_ast_argument_number_create(
244
0
        node, sieve_lexer_token_int(lexer),
245
0
        sieve_lexer_token_line(parser->lexer));
246
0
      sieve_lexer_skip_token(lexer);
247
0
      break;
248
    /* Tag */
249
0
    case STT_TAG:
250
0
      arg = sieve_ast_argument_tag_create(
251
0
        node, sieve_lexer_token_ident(lexer),
252
0
        sieve_lexer_token_line(parser->lexer));
253
0
      sieve_lexer_skip_token(lexer);
254
0
      break;
255
    /* End of argument list, continue with tests */
256
0
    default:
257
0
      arg_present = FALSE;
258
0
      break;
259
0
    }
260
261
0
    if (arg_present && arg == NULL) {
262
0
      sieve_parser_error(parser,
263
0
        "failed to accept more arguments for command '%s'",
264
0
        node->identifier);
265
0
      return -1;
266
0
    }
267
268
0
    if (sieve_ast_argument_count(node) > SIEVE_MAX_COMMAND_ARGUMENTS) {
269
0
      sieve_parser_error(parser,
270
0
        "too many arguments for command '%s'",
271
0
        node->identifier);
272
0
      return 0;
273
0
    }
274
0
  }
275
276
0
  if (result <= 0)
277
0
    return result; /* Defer recovery to caller */
278
279
  /* --> [ test / test-list ]
280
     test-list = "(" test *("," test) ")"
281
     test = identifier arguments
282
   */
283
0
  switch (sieve_lexer_token_type(lexer)) {
284
  /* Single test */
285
0
  case STT_IDENTIFIER:
286
0
    if ((depth + 1) > SIEVE_MAX_TEST_NESTING) {
287
0
      sieve_parser_error(parser,
288
0
        "cannot nest tests deeper than %u levels",
289
0
        SIEVE_MAX_TEST_NESTING);
290
0
      return 0;
291
0
    }
292
293
0
    test = sieve_ast_test_create(
294
0
      node, sieve_lexer_token_ident(lexer),
295
0
      sieve_lexer_token_line(parser->lexer));
296
0
    sieve_lexer_skip_token(lexer);
297
298
    /* Theoretically, test can be NULL */
299
0
    if (test == NULL)
300
0
      break;
301
302
    /* Parse test arguments, which may include more tests (recurse) */
303
0
    if (sieve_parse_arguments(parser, test, depth + 1) <= 0) {
304
0
      return 0; /* Defer recovery to caller */
305
0
    }
306
307
0
    break;
308
309
  /* Test list */
310
0
  case STT_LBRACKET:
311
0
    sieve_lexer_skip_token(lexer);
312
313
0
    if (depth+1 > SIEVE_MAX_TEST_NESTING) {
314
0
      sieve_parser_error(parser,
315
0
        "cannot nest tests deeper than %u levels",
316
0
        SIEVE_MAX_TEST_NESTING);
317
0
      result = sieve_parser_recover(parser, STT_RBRACKET);
318
319
0
      if (result > 0)
320
0
        sieve_lexer_skip_token(lexer);
321
0
      return result;
322
0
    }
323
324
0
    node->test_list = TRUE;
325
326
    /* Test starts with identifier */
327
0
    if (sieve_lexer_token_type(lexer) == STT_IDENTIFIER) {
328
0
      test = sieve_ast_test_create(
329
0
        node, sieve_lexer_token_ident(lexer),
330
0
        sieve_lexer_token_line(parser->lexer));
331
0
      sieve_lexer_skip_token(lexer);
332
333
0
      if (test == NULL)
334
0
        break;
335
336
      /* Parse test arguments, which may include more tests (recurse) */
337
0
      if ((result = sieve_parse_arguments(parser, test, depth+1)) > 0) {
338
339
        /* More tests ? */
340
0
        while (sieve_lexer_token_type(lexer) == STT_COMMA) {
341
0
          sieve_lexer_skip_token(lexer);
342
343
          /* Check parser status */
344
0
          if (!parser->valid &&
345
0
              !sieve_errors_more_allowed(parser->ehandler)) {
346
0
            result = sieve_parser_recover(parser, STT_RBRACKET);
347
0
            break;
348
0
          }
349
350
          /* Test starts with identifier */
351
0
          if (sieve_lexer_token_type(lexer) == STT_IDENTIFIER) {
352
0
            test = sieve_ast_test_create(
353
0
              node, sieve_lexer_token_ident(lexer),
354
0
              sieve_lexer_token_line(parser->lexer));
355
0
            sieve_lexer_skip_token(lexer);
356
357
0
            if (test == NULL)
358
0
              break;
359
360
            /* Parse test arguments, which may include more tests (recurse) */
361
0
            if ((result = sieve_parse_arguments(parser, test, depth+1)) <= 0) {
362
0
              if (result < 0)
363
0
                return result;
364
0
              result = sieve_parser_recover(parser, STT_RBRACKET);
365
0
              break;
366
0
            }
367
0
          } else {
368
0
            sieve_parser_error(parser,
369
0
              "expecting test identifier after ',' in test list, "
370
0
              "but found %s",
371
0
              sieve_lexer_token_description(lexer));
372
0
            result = sieve_parser_recover(parser, STT_RBRACKET);
373
0
            break;
374
0
          }
375
0
        }
376
0
        if (test == NULL)
377
0
          break;
378
0
      } else {
379
0
        if (result < 0)
380
0
          return result;
381
382
0
        result = sieve_parser_recover(parser, STT_RBRACKET);
383
0
      }
384
0
    } else {
385
0
      sieve_parser_error(parser,
386
0
        "expecting test identifier after '(' in test list, "
387
0
        "but found %s",
388
0
        sieve_lexer_token_description(lexer));
389
390
0
      result = sieve_parser_recover(parser, STT_RBRACKET);
391
0
    }
392
393
    /* The next token should be a ')', indicating the end of the
394
       test list
395
         --> previous sieve_parser_recover calls try to restore this
396
       situation after parse errors.
397
     */
398
0
    if (sieve_lexer_token_type(lexer) == STT_RBRACKET) {
399
0
      sieve_lexer_skip_token(lexer);
400
0
    } else {
401
0
      sieve_parser_error(parser,
402
0
        "expecting ',' or end of test list ')', "
403
0
        "but found %s",
404
0
        sieve_lexer_token_description(lexer));
405
406
      /* Recover function tries to make next token equal to
407
         ')'. If it succeeds we need to skip it.
408
       */
409
0
      if ((result = sieve_parser_recover(parser, STT_RBRACKET)) > 0)
410
0
        sieve_lexer_skip_token(lexer);
411
0
    }
412
0
    break;
413
414
0
  default:
415
    /* Not an error: test / test-list is optional
416
         --> any errors are detected by the caller
417
     */
418
0
    test_present = FALSE;
419
0
    break;
420
0
  }
421
422
0
  if (test_present && test == NULL) {
423
0
    sieve_parser_error(parser,
424
0
      "failed to accept more tests for command '%s'",
425
0
      node->identifier);
426
0
    return -1;
427
0
  }
428
429
0
  return result;
430
0
}
431
432
/* commands = *command
433
   command = identifier arguments ( ";" / block )
434
   block = "{" commands "}"
435
 */
436
static int
437
sieve_parse_commands(struct sieve_parser *parser, struct sieve_ast_node *block,
438
         unsigned int depth)
439
0
{
440
0
  const struct sieve_lexer *lexer = parser->lexer;
441
0
  int result = 1;
442
443
0
  while (result > 0 &&
444
0
         sieve_lexer_token_type(lexer) == STT_IDENTIFIER) {
445
0
    struct sieve_ast_node *command;
446
447
    /* Check parser status */
448
0
    if (!parser->valid &&
449
0
        !sieve_errors_more_allowed(parser->ehandler)) {
450
0
      result = sieve_parser_recover(parser, STT_SEMICOLON);
451
0
      break;
452
0
    }
453
454
    /* Create command node */
455
0
    command = sieve_ast_command_create(
456
0
      block, sieve_lexer_token_ident(lexer),
457
0
      sieve_lexer_token_line(parser->lexer));
458
0
    sieve_lexer_skip_token(lexer);
459
460
0
    if (command == NULL) {
461
0
      sieve_parser_error(parser,
462
0
        "failed to accept more commands inside the block of command '%s'",
463
0
        block->identifier);
464
0
      return -1;
465
0
    }
466
467
0
    result = sieve_parse_arguments(parser, command, 1);
468
469
    /* Check whether the command is properly terminated
470
       (i.e. with ; or a new block)
471
     */
472
0
    if (result > 0 &&
473
0
        sieve_lexer_token_type(lexer) != STT_SEMICOLON &&
474
0
        sieve_lexer_token_type(lexer) != STT_LCURLY) {
475
476
0
      sieve_parser_error(parser,
477
0
        "expected end of command ';' or the beginning of a compound block '{', "
478
0
        "but found %s",
479
0
        sieve_lexer_token_description(lexer));
480
0
      result = 0;
481
0
    }
482
483
    /* Try to recover from parse errors to reacquire a defined state
484
     */
485
0
    if (result == 0)
486
0
      result = sieve_parser_recover(parser, STT_SEMICOLON);
487
488
    /* Don't bother to continue if we are not in a defined state */
489
0
    if (result <= 0)
490
0
      return result;
491
492
0
    switch (sieve_lexer_token_type(lexer)) {
493
    /* End of the command */
494
0
    case STT_SEMICOLON:
495
0
      sieve_lexer_skip_token(lexer);
496
0
      break;
497
    /* Command has a block {...} */
498
0
    case STT_LCURLY:
499
0
      sieve_lexer_skip_token(lexer);
500
501
      /* Check current depth first */
502
0
      if ((depth + 1) > SIEVE_MAX_BLOCK_NESTING) {
503
0
        sieve_parser_error(parser,
504
0
          "cannot nest command blocks deeper than %u levels",
505
0
          SIEVE_MAX_BLOCK_NESTING);
506
0
        result = sieve_parser_recover(parser, STT_RCURLY);
507
508
0
        if (result > 0)
509
0
          sieve_lexer_skip_token(lexer);
510
0
        break;
511
0
      }
512
513
0
      command->block = TRUE;
514
515
0
      if ((result = sieve_parse_commands(parser, command, depth + 1)) > 0) {
516
0
        if (sieve_lexer_token_type(lexer) != STT_RCURLY) {
517
0
          sieve_parser_error(parser,
518
0
            "expected end of compound block '}', "
519
0
            "but found %s",
520
0
            sieve_lexer_token_description(lexer));
521
0
          result = sieve_parser_recover(parser, STT_RCURLY);
522
0
        } else {
523
0
          sieve_lexer_skip_token(lexer);
524
0
        }
525
0
      } else {
526
0
        if (result < 0)
527
0
          return result;
528
529
0
        if ((result = sieve_parser_recover(parser, STT_RCURLY)) > 0)
530
0
          sieve_lexer_skip_token(lexer);
531
0
      }
532
533
0
      break;
534
535
0
    default:
536
      /* Recovered previously, so this cannot happen */
537
0
      i_unreached();
538
0
    }
539
0
  }
540
541
0
  return result;
542
0
}
543
544
bool sieve_parser_run(struct sieve_parser *parser, struct sieve_ast **ast)
545
0
{
546
0
  if (parser->ast != NULL)
547
0
    sieve_ast_unref(&parser->ast);
548
549
  /* Create AST object if none is provided */
550
0
  if (*ast == NULL)
551
0
    *ast = sieve_ast_create(parser->script);
552
0
  else
553
0
    sieve_ast_ref(*ast);
554
555
0
  parser->ast = *ast;
556
557
  /* Scan first token */
558
0
  sieve_lexer_skip_token(parser->lexer);
559
560
  /* Parse */
561
0
  if (sieve_parse_commands(parser, sieve_ast_root(parser->ast), 1) > 0 &&
562
0
      parser->valid) {
563
    /* Parsed right to EOF ? */
564
0
    if (sieve_lexer_token_type(parser->lexer) != STT_EOF) {
565
0
      sieve_parser_error(parser,
566
0
        "unexpected %s found at (the presumed) end of file",
567
0
        sieve_lexer_token_description(parser->lexer));
568
0
      parser->valid = FALSE;
569
0
    }
570
0
  } else {
571
0
    parser->valid = FALSE;
572
0
  }
573
574
  /* Clean up AST if parse failed */
575
0
  if (!parser->valid) {
576
0
    parser->ast = NULL;
577
0
    sieve_ast_unref(ast);
578
0
  }
579
580
0
  return parser->valid;
581
0
}
582
583
/* Error recovery:
584
     To continue parsing after an error it is important to find the next
585
     parsible item in the stream. The recover function skips over the remaining
586
     garbage after an error. It tries  to find the end of the failed syntax
587
     structure and takes nesting of structures into account.
588
 */
589
590
/* Assign useful names to priorities for readability */
591
enum sieve_grammatical_prio {
592
  SGP_BLOCK = 3,
593
  SGP_COMMAND = 2,
594
  SGP_TEST_LIST = 1,
595
  SGP_STRING_LIST = 0,
596
597
  SGP_OTHER = -1
598
};
599
600
static inline enum sieve_grammatical_prio
601
__get_token_priority(enum sieve_token_type token)
602
0
{
603
0
  switch (token) {
604
0
  case STT_LCURLY:
605
0
  case STT_RCURLY:
606
0
    return SGP_BLOCK;
607
0
  case STT_SEMICOLON:
608
0
    return SGP_COMMAND;
609
0
  case STT_LBRACKET:
610
0
  case STT_RBRACKET:
611
0
    return SGP_TEST_LIST;
612
0
  case STT_LSQUARE:
613
0
  case STT_RSQUARE:
614
0
    return SGP_STRING_LIST;
615
0
  default:
616
0
    break;
617
0
  }
618
619
0
  return SGP_OTHER;
620
0
}
621
622
static int
623
sieve_parser_recover(struct sieve_parser *parser,
624
         enum sieve_token_type end_token)
625
0
{
626
  /* The tokens that begin/end a specific block/command/list in order
627
     of ascending grammatical priority.
628
   */
629
0
  static const enum sieve_token_type begin_tokens[4] = {
630
0
    STT_LSQUARE, STT_LBRACKET, STT_NONE, STT_LCURLY };
631
0
  static const enum sieve_token_type end_tokens[4] = {
632
0
    STT_RSQUARE, STT_RBRACKET, STT_SEMICOLON, STT_RCURLY};
633
0
  const struct sieve_lexer *lexer = parser->lexer;
634
0
  int nesting = 1;
635
0
  enum sieve_grammatical_prio end_priority =
636
0
    __get_token_priority(end_token);
637
638
0
  i_assert(end_priority != SGP_OTHER);
639
640
0
  while (sieve_lexer_token_type(lexer) != STT_EOF &&
641
0
         __get_token_priority(sieve_lexer_token_type(lexer))
642
0
    <= end_priority) {
643
0
    if (sieve_lexer_token_type(lexer) ==
644
0
      begin_tokens[end_priority]) {
645
0
      nesting++;
646
0
      sieve_lexer_skip_token(lexer);
647
0
      continue;
648
0
    }
649
0
    if (sieve_lexer_token_type(lexer) ==
650
0
      end_tokens[end_priority]) {
651
0
      nesting--;
652
653
0
      if (nesting == 0) {
654
        /* Next character is the end */
655
0
        return 1;
656
0
      }
657
0
    }
658
0
    sieve_lexer_skip_token(lexer);
659
0
  }
660
661
  /* Special case: COMMAND */
662
0
  if (end_token == STT_SEMICOLON &&
663
0
      sieve_lexer_token_type(lexer) == STT_LCURLY) {
664
0
    return 1;
665
0
  }
666
667
  /* End not found before eof or end of surrounding grammatical structure
668
   */
669
0
  return 0;
670
0
}