Coverage Report

Created: 2024-02-11 06:14

/src/postfix/postfix/src/util/dict_regexp.c
Line
Count
Source (jump to first uncovered line)
1
/*++
2
/* NAME
3
/*  dict_regexp 3
4
/* SUMMARY
5
/*  dictionary manager interface to REGEXP regular expression library
6
/* SYNOPSIS
7
/*  #include <dict_regexp.h>
8
/*
9
/*  DICT  *dict_regexp_open(name, dummy, dict_flags)
10
/*  const char *name;
11
/*  int dummy;
12
/*  int dict_flags;
13
/* DESCRIPTION
14
/*  dict_regexp_open() opens the named file and compiles the contained
15
/*  regular expressions. The result object can be used to match strings
16
/*  against the table.
17
/* SEE ALSO
18
/*  dict(3) generic dictionary manager
19
/*  regexp_table(5) regular expression table configuration
20
/* AUTHOR(S)
21
/*  LaMont Jones
22
/*  lamont@hp.com
23
/*
24
/*  Based on PCRE dictionary contributed by Andrew McNamara
25
/*  andrewm@connect.com.au
26
/*  connect.com.au Pty. Ltd.
27
/*  Level 3, 213 Miller St
28
/*  North Sydney, NSW, Australia
29
/*
30
/*  Heavily rewritten by Wietse Venema
31
/*  IBM T.J. Watson Research
32
/*  P.O. Box 704
33
/*  Yorktown Heights, NY 10598, USA
34
/*
35
/*  Wietse Venema
36
/*  Google, Inc.
37
/*  111 8th Avenue
38
/*  New York, NY 10011, USA
39
/*--*/
40
41
/* System library. */
42
43
#include "sys_defs.h"
44
45
#ifdef HAS_POSIX_REGEXP
46
47
#include <sys/stat.h>
48
#include <stdlib.h>
49
#include <unistd.h>
50
#include <string.h>
51
#include <ctype.h>
52
#include <regex.h>
53
#ifdef STRCASECMP_IN_STRINGS_H
54
#include <strings.h>
55
#endif
56
57
/* Utility library. */
58
59
#include "mymalloc.h"
60
#include "msg.h"
61
#include "safe.h"
62
#include "vstream.h"
63
#include "vstring.h"
64
#include "stringops.h"
65
#include "readlline.h"
66
#include "dict.h"
67
#include "dict_regexp.h"
68
#include "mac_parse.h"
69
#include "warn_stat.h"
70
#include "mvect.h"
71
72
 /*
73
  * Support for IF/ENDIF based on an idea by Bert Driehuis.
74
  */
75
0
#define DICT_REGEXP_OP_MATCH  1  /* Match this regexp */
76
0
#define DICT_REGEXP_OP_IF 2  /* Increase if/endif nesting on match */
77
0
#define DICT_REGEXP_OP_ENDIF  3  /* Decrease if/endif nesting on match */
78
79
 /*
80
  * Regular expression before compiling.
81
  */
82
typedef struct {
83
    char   *regexp;     /* regular expression */
84
    int     options;      /* regcomp() options */
85
    int     match;      /* positive or negative match */
86
} DICT_REGEXP_PATTERN;
87
88
 /*
89
  * Compiled generic rule, and subclasses that derive from it.
90
  */
91
typedef struct DICT_REGEXP_RULE {
92
    int     op;       /* DICT_REGEXP_OP_MATCH/IF/ENDIF */
93
    int     lineno;     /* source file line number */
94
    struct DICT_REGEXP_RULE *next;  /* next rule in dict */
95
} DICT_REGEXP_RULE;
96
97
typedef struct {
98
    DICT_REGEXP_RULE rule;    /* generic part */
99
    regex_t *first_exp;     /* compiled primary pattern */
100
    int     first_match;    /* positive or negative match */
101
    regex_t *second_exp;    /* compiled secondary pattern */
102
    int     second_match;   /* positive or negative match */
103
    char   *replacement;    /* replacement text */
104
    size_t  max_sub;      /* largest $number in replacement */
105
} DICT_REGEXP_MATCH_RULE;
106
107
typedef struct {
108
    DICT_REGEXP_RULE rule;    /* generic members */
109
    regex_t *expr;      /* the condition */
110
    int     match;      /* positive or negative match */
111
    struct DICT_REGEXP_RULE *endif_rule;/* matching endif rule */
112
} DICT_REGEXP_IF_RULE;
113
114
 /*
115
  * Regexp map.
116
  */
117
typedef struct {
118
    DICT    dict;     /* generic members */
119
    regmatch_t *pmatch;     /* matched substring info */
120
    DICT_REGEXP_RULE *head;   /* first rule */
121
    VSTRING *expansion_buf;   /* lookup result */
122
} DICT_REGEXP;
123
124
 /*
125
  * Macros to make dense code more readable.
126
  */
127
#define NULL_SUBSTITUTIONS  (0)
128
#define NULL_MATCH_RESULT ((regmatch_t *) 0)
129
130
 /*
131
  * Context for $number expansion callback.
132
  */
133
typedef struct {
134
    DICT_REGEXP *dict_regexp;   /* the dictionary handle */
135
    DICT_REGEXP_MATCH_RULE *match_rule; /* the rule we matched */
136
    const char *lookup_string;    /* matched text */
137
} DICT_REGEXP_EXPAND_CONTEXT;
138
139
 /*
140
  * Context for $number pre-scan callback.
141
  */
142
typedef struct {
143
    const char *mapname;    /* name of regexp map */
144
    int     lineno;     /* where in file */
145
    size_t  max_sub;      /* largest $number seen */
146
    char   *literal;      /* constant result, $$ -> $ */
147
} DICT_REGEXP_PRESCAN_CONTEXT;
148
149
 /*
150
  * Compatibility.
151
  */
152
#ifndef MAC_PARSE_OK
153
#define MAC_PARSE_OK 0
154
#endif
155
156
/* dict_regexp_expand - replace $number with substring from matched text */
157
158
static int dict_regexp_expand(int type, VSTRING *buf, void *ptr)
159
0
{
160
0
    DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr;
161
0
    DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule;
162
0
    DICT_REGEXP *dict_regexp = ctxt->dict_regexp;
163
0
    regmatch_t *pmatch;
164
0
    size_t  n;
165
166
    /*
167
     * Replace $number by the corresponding substring from the matched text.
168
     * We pre-scanned the replacement text at compile time, so any out of
169
     * range $number means that something impossible has happened.
170
     */
171
0
    if (type == MAC_PARSE_VARNAME) {
172
0
  n = atoi(vstring_str(buf));
173
0
  if (n < 1 || n > match_rule->max_sub)
174
0
      msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"",
175
0
          dict_regexp->dict.name, match_rule->rule.lineno,
176
0
          vstring_str(buf));
177
0
  pmatch = dict_regexp->pmatch + n;
178
0
  if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo)
179
0
      return (MAC_PARSE_UNDEF);    /* empty or not matched */
180
0
  vstring_strncat(dict_regexp->expansion_buf,
181
0
      ctxt->lookup_string + pmatch->rm_so,
182
0
      pmatch->rm_eo - pmatch->rm_so);
183
0
  return (MAC_PARSE_OK);
184
0
    }
185
186
    /*
187
     * Straight text - duplicate with no substitution.
188
     */
189
0
    else {
190
0
  vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf));
191
0
  return (MAC_PARSE_OK);
192
0
    }
193
0
}
194
195
/* dict_regexp_regerror - report regexp compile/execute error */
196
197
static void dict_regexp_regerror(const char *mapname, int lineno, int error,
198
                 const regex_t *expr)
199
0
{
200
0
    char    errbuf[256];
201
202
0
    (void) regerror(error, expr, errbuf, sizeof(errbuf));
203
0
    msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf);
204
0
}
205
206
 /*
207
  * Inlined to reduce function call overhead in the time-critical loop.
208
  */
209
#define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \
210
0
    ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \
211
0
     ((err) == REG_NOMATCH ? !(match) : \
212
0
      (err) == 0 ? (match) : \
213
0
      (dict_regexp_regerror((map), (line), (err), (expr)), 0)))
214
215
/* dict_regexp_lookup - match string and perform optional substitution */
216
217
static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string)
218
0
{
219
0
    DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
220
0
    DICT_REGEXP_RULE *rule;
221
0
    DICT_REGEXP_IF_RULE *if_rule;
222
0
    DICT_REGEXP_MATCH_RULE *match_rule;
223
0
    DICT_REGEXP_EXPAND_CONTEXT expand_context;
224
0
    int     error;
225
226
0
    dict->error = 0;
227
228
0
    if (msg_verbose)
229
0
  msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string);
230
231
    /*
232
     * Optionally fold the key.
233
     */
234
0
    if (dict->flags & DICT_FLAG_FOLD_MUL) {
235
0
  if (dict->fold_buf == 0)
236
0
      dict->fold_buf = vstring_alloc(10);
237
0
  vstring_strcpy(dict->fold_buf, lookup_string);
238
0
  lookup_string = lowercase(vstring_str(dict->fold_buf));
239
0
    }
240
0
    for (rule = dict_regexp->head; rule; rule = rule->next) {
241
242
0
  switch (rule->op) {
243
244
      /*
245
       * Search for the first matching primary expression. Limit the
246
       * overhead for substring substitution to the bare minimum.
247
       */
248
0
  case DICT_REGEXP_OP_MATCH:
249
0
      match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
250
0
      if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
251
0
             match_rule->first_exp,
252
0
             match_rule->first_match,
253
0
             lookup_string,
254
0
             match_rule->max_sub > 0 ?
255
0
             match_rule->max_sub + 1 : 0,
256
0
             dict_regexp->pmatch))
257
0
    continue;
258
0
      if (match_rule->second_exp
259
0
    && !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
260
0
          match_rule->second_exp,
261
0
          match_rule->second_match,
262
0
          lookup_string,
263
0
          NULL_SUBSTITUTIONS,
264
0
          NULL_MATCH_RESULT))
265
0
    continue;
266
267
      /*
268
       * Skip $number substitutions when the replacement text contains
269
       * no $number strings, as learned during the compile time
270
       * pre-scan. The pre-scan already replaced $$ by $.
271
       */
272
0
      if (match_rule->max_sub == 0)
273
0
    return (match_rule->replacement);
274
275
      /*
276
       * Perform $number substitutions on the replacement text. We
277
       * pre-scanned the replacement text at compile time. Any macro
278
       * expansion errors at this point mean something impossible has
279
       * happened.
280
       */
281
0
      if (!dict_regexp->expansion_buf)
282
0
    dict_regexp->expansion_buf = vstring_alloc(10);
283
0
      VSTRING_RESET(dict_regexp->expansion_buf);
284
0
      expand_context.lookup_string = lookup_string;
285
0
      expand_context.match_rule = match_rule;
286
0
      expand_context.dict_regexp = dict_regexp;
287
288
0
      if (mac_parse(match_rule->replacement, dict_regexp_expand,
289
0
        (void *) &expand_context) & MAC_PARSE_ERROR)
290
0
    msg_panic("regexp map %s, line %d: bad replacement syntax",
291
0
        dict->name, rule->lineno);
292
0
      VSTRING_TERMINATE(dict_regexp->expansion_buf);
293
0
      return (vstring_str(dict_regexp->expansion_buf));
294
295
      /*
296
       * Conditional.
297
       */
298
0
  case DICT_REGEXP_OP_IF:
299
0
      if_rule = (DICT_REGEXP_IF_RULE *) rule;
300
0
      if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
301
0
             if_rule->expr, if_rule->match, lookup_string,
302
0
            NULL_SUBSTITUTIONS, NULL_MATCH_RESULT))
303
0
    continue;
304
      /* An IF without matching ENDIF has no "endif" rule. */
305
0
      if ((rule = if_rule->endif_rule) == 0)
306
0
    return (0);
307
      /* FALLTHROUGH */
308
309
      /*
310
       * ENDIF after IF.
311
       */
312
0
  case DICT_REGEXP_OP_ENDIF:
313
0
      continue;
314
315
0
  default:
316
0
      msg_panic("dict_regexp_lookup: impossible operation %d", rule->op);
317
0
  }
318
0
    }
319
0
    return (0);
320
0
}
321
322
/* dict_regexp_close - close regexp dictionary */
323
324
static void dict_regexp_close(DICT *dict)
325
0
{
326
0
    DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
327
0
    DICT_REGEXP_RULE *rule;
328
0
    DICT_REGEXP_RULE *next;
329
0
    DICT_REGEXP_MATCH_RULE *match_rule;
330
0
    DICT_REGEXP_IF_RULE *if_rule;
331
332
0
    for (rule = dict_regexp->head; rule; rule = next) {
333
0
  next = rule->next;
334
0
  switch (rule->op) {
335
0
  case DICT_REGEXP_OP_MATCH:
336
0
      match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
337
0
      if (match_rule->first_exp) {
338
0
    regfree(match_rule->first_exp);
339
0
    myfree((void *) match_rule->first_exp);
340
0
      }
341
0
      if (match_rule->second_exp) {
342
0
    regfree(match_rule->second_exp);
343
0
    myfree((void *) match_rule->second_exp);
344
0
      }
345
0
      if (match_rule->replacement)
346
0
    myfree((void *) match_rule->replacement);
347
0
      break;
348
0
  case DICT_REGEXP_OP_IF:
349
0
      if_rule = (DICT_REGEXP_IF_RULE *) rule;
350
0
      if (if_rule->expr) {
351
0
    regfree(if_rule->expr);
352
0
    myfree((void *) if_rule->expr);
353
0
      }
354
0
      break;
355
0
  case DICT_REGEXP_OP_ENDIF:
356
0
      break;
357
0
  default:
358
0
      msg_panic("dict_regexp_close: unknown operation %d", rule->op);
359
0
  }
360
0
  myfree((void *) rule);
361
0
    }
362
0
    if (dict_regexp->pmatch)
363
0
  myfree((void *) dict_regexp->pmatch);
364
0
    if (dict_regexp->expansion_buf)
365
0
  vstring_free(dict_regexp->expansion_buf);
366
0
    if (dict->fold_buf)
367
0
  vstring_free(dict->fold_buf);
368
0
    dict_free(dict);
369
0
}
370
371
/* dict_regexp_get_pat - extract one pattern with options from rule */
372
373
static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp,
374
                     DICT_REGEXP_PATTERN *pat)
375
0
{
376
0
    char   *p = *bufp;
377
0
    char    re_delim;
378
379
    /*
380
     * Process negation operators.
381
     */
382
0
    pat->match = 1;
383
0
    for (;;) {
384
0
  if (*p == '!')
385
0
      pat->match = !pat->match;
386
0
  else if (!ISSPACE(*p))
387
0
      break;
388
0
  p++;
389
0
    }
390
0
    if (*p == 0) {
391
0
  msg_warn("regexp map %s, line %d: no regexp: skipping this rule",
392
0
     mapname, lineno);
393
0
  return (0);
394
0
    }
395
396
    /*
397
     * Search for the closing delimiter, handling backslash escape.
398
     */
399
0
    re_delim = *p++;
400
0
    pat->regexp = p;
401
0
    while (*p) {
402
0
  if (*p == '\\') {
403
0
      if (p[1])
404
0
    p++;
405
0
      else
406
0
    break;
407
0
  } else if (*p == re_delim) {
408
0
      break;
409
0
  }
410
0
  ++p;
411
0
    }
412
0
    if (!*p) {
413
0
  msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": "
414
0
     "skipping this rule", mapname, lineno, re_delim);
415
0
  return (0);
416
0
    }
417
0
    *p++ = 0;         /* null terminate */
418
419
    /*
420
     * Search for options.
421
     */
422
0
    pat->options = REG_EXTENDED | REG_ICASE;
423
0
    while (*p && !ISSPACE(*p) && *p != '!') {
424
0
  switch (*p) {
425
0
  case 'i':
426
0
      pat->options ^= REG_ICASE;
427
0
      break;
428
0
  case 'm':
429
0
      pat->options ^= REG_NEWLINE;
430
0
      break;
431
0
  case 'x':
432
0
      pat->options ^= REG_EXTENDED;
433
0
      break;
434
0
  default:
435
0
      msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": "
436
0
         "skipping this rule", mapname, lineno, *p);
437
0
      return (0);
438
0
  }
439
0
  ++p;
440
0
    }
441
0
    *bufp = p;
442
0
    return (1);
443
0
}
444
445
/* dict_regexp_get_pats - get the primary and second patterns and flags */
446
447
static int dict_regexp_get_pats(const char *mapname, int lineno, char **p,
448
                DICT_REGEXP_PATTERN *first_pat,
449
                DICT_REGEXP_PATTERN *second_pat)
450
0
{
451
452
    /*
453
     * Get the primary and optional secondary patterns and their flags.
454
     */
455
0
    if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0)
456
0
  return (0);
457
0
    if (**p == '!') {
458
#if 0
459
  static int bitrot_warned = 0;
460
461
  if (bitrot_warned == 0) {
462
      msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away,"
463
     " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead",
464
         mapname, lineno);
465
      bitrot_warned = 1;
466
  }
467
#endif
468
0
  if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0)
469
0
      return (0);
470
0
    } else {
471
0
  second_pat->regexp = 0;
472
0
    }
473
0
    return (1);
474
0
}
475
476
/* dict_regexp_prescan - find largest $number in replacement text */
477
478
static int dict_regexp_prescan(int type, VSTRING *buf, void *context)
479
0
{
480
0
    DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context;
481
0
    size_t  n;
482
483
    /*
484
     * Keep a copy of literal text (with $$ already replaced by $) if and
485
     * only if the replacement text contains no $number expression. This way
486
     * we can avoid having to scan the replacement text at lookup time.
487
     */
488
0
    if (type == MAC_PARSE_VARNAME) {
489
0
  if (ctxt->literal) {
490
0
      myfree(ctxt->literal);
491
0
      ctxt->literal = 0;
492
0
  }
493
0
  if (!alldig(vstring_str(buf))) {
494
0
      msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"",
495
0
         ctxt->mapname, ctxt->lineno, vstring_str(buf));
496
0
      return (MAC_PARSE_ERROR);
497
0
  }
498
0
  n = atoi(vstring_str(buf));
499
0
  if (n < 1) {
500
0
      msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"",
501
0
         ctxt->mapname, ctxt->lineno, vstring_str(buf));
502
0
      return (MAC_PARSE_ERROR);
503
0
  }
504
0
  if (n > ctxt->max_sub)
505
0
      ctxt->max_sub = n;
506
0
    } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
507
0
  if (ctxt->literal)
508
0
      msg_panic("regexp map %s, line %d: multiple literals but no $number",
509
0
          ctxt->mapname, ctxt->lineno);
510
0
  ctxt->literal = mystrdup(vstring_str(buf));
511
0
    }
512
0
    return (MAC_PARSE_OK);
513
0
}
514
515
/* dict_regexp_compile_pat - compile one pattern */
516
517
static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno,
518
                  DICT_REGEXP_PATTERN *pat)
519
0
{
520
0
    int     error;
521
0
    regex_t *expr;
522
523
0
    expr = (regex_t *) mymalloc(sizeof(*expr));
524
0
    error = regcomp(expr, pat->regexp, pat->options);
525
0
    if (error != 0) {
526
0
  dict_regexp_regerror(mapname, lineno, error, expr);
527
0
  myfree((void *) expr);
528
0
  return (0);
529
0
    }
530
0
    return (expr);
531
0
}
532
533
/* dict_regexp_rule_alloc - fill in a generic rule structure */
534
535
static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int lineno, size_t size)
536
0
{
537
0
    DICT_REGEXP_RULE *rule;
538
539
0
    rule = (DICT_REGEXP_RULE *) mymalloc(size);
540
0
    rule->op = op;
541
0
    rule->lineno = lineno;
542
0
    rule->next = 0;
543
544
0
    return (rule);
545
0
}
546
547
/* dict_regexp_parseline - parse one rule */
548
549
static DICT_REGEXP_RULE *dict_regexp_parseline(DICT *dict, const char *mapname,
550
                       int lineno, char *line,
551
                         int nesting)
552
0
{
553
0
    char   *p;
554
555
0
    p = line;
556
557
    /*
558
     * An ordinary rule takes one or two patterns and replacement text.
559
     */
560
0
    if (!ISALNUM(*p)) {
561
0
  DICT_REGEXP_PATTERN first_pat;
562
0
  DICT_REGEXP_PATTERN second_pat;
563
0
  DICT_REGEXP_PRESCAN_CONTEXT prescan_context;
564
0
  regex_t *first_exp = 0;
565
0
  regex_t *second_exp;
566
0
  DICT_REGEXP_MATCH_RULE *match_rule;
567
568
  /*
569
   * Get the primary and the optional secondary patterns.
570
   */
571
0
  if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat))
572
0
      return (0);
573
574
  /*
575
   * Get the replacement text.
576
   */
577
0
  while (*p && ISSPACE(*p))
578
0
      ++p;
579
0
  if (!*p) {
580
0
      msg_warn("regexp map %s, line %d: no replacement text: "
581
0
         "using empty string", mapname, lineno);
582
0
  }
583
584
  /*
585
   * Find the highest-numbered $number in the replacement text. We can
586
   * speed up pattern matching 1) by passing hints to the regexp
587
   * compiler, setting the REG_NOSUB flag when the replacement text
588
   * contains no $number string; 2) by passing hints to the regexp
589
   * execution code, limiting the amount of text that is made available
590
   * for substitution.
591
   */
592
0
  prescan_context.mapname = mapname;
593
0
  prescan_context.lineno = lineno;
594
0
  prescan_context.max_sub = 0;
595
0
  prescan_context.literal = 0;
596
597
  /*
598
   * The optimizer will eliminate code duplication and/or dead code.
599
   */
600
0
#define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
601
0
  if (first_exp) { \
602
0
      regfree(first_exp); \
603
0
      myfree((void *) first_exp); \
604
0
  } \
605
0
  if (prescan_context.literal) \
606
0
      myfree(prescan_context.literal); \
607
0
  return (rval); \
608
0
    } while (0)
609
610
0
  if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) {
611
0
      VSTRING *base64_buf;
612
0
      char   *err;
613
614
0
      if ((base64_buf = dict_file_to_b64(dict, p)) == 0) {
615
0
    err = dict_file_get_error(dict);
616
0
    msg_warn("regexp map %s, line %d: %s: skipping this rule",
617
0
       mapname, lineno, err);
618
0
    myfree(err);
619
0
    CREATE_MATCHOP_ERROR_RETURN(0);
620
0
      }
621
0
      p = vstring_str(base64_buf);
622
0
  }
623
0
  if (mac_parse(p, dict_regexp_prescan, (void *) &prescan_context)
624
0
      & MAC_PARSE_ERROR) {
625
0
      msg_warn("regexp map %s, line %d: bad replacement syntax: "
626
0
         "skipping this rule", mapname, lineno);
627
0
      CREATE_MATCHOP_ERROR_RETURN(0);
628
0
  }
629
630
  /*
631
   * Compile the primary and the optional secondary pattern. Speed up
632
   * execution when no matched text needs to be substituted into the
633
   * result string, or when the highest numbered substring is less than
634
   * the total number of () subpatterns.
635
   */
636
0
  if (prescan_context.max_sub == 0)
637
0
      first_pat.options |= REG_NOSUB;
638
0
  if (prescan_context.max_sub > 0 && first_pat.match == 0) {
639
0
      msg_warn("regexp map %s, line %d: $number found in negative match "
640
0
       "replacement text: skipping this rule", mapname, lineno);
641
0
      CREATE_MATCHOP_ERROR_RETURN(0);
642
0
  }
643
0
  if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) {
644
0
      msg_warn("regexp map %s, line %d: "
645
0
         "regular expression substitution is not allowed: "
646
0
         "skipping this rule", mapname, lineno);
647
0
      CREATE_MATCHOP_ERROR_RETURN(0);
648
0
  }
649
0
  if ((first_exp = dict_regexp_compile_pat(mapname, lineno,
650
0
             &first_pat)) == 0)
651
0
      CREATE_MATCHOP_ERROR_RETURN(0);
652
0
  if (prescan_context.max_sub > first_exp->re_nsub) {
653
0
      msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": "
654
0
         "skipping this rule", mapname, lineno,
655
0
         (int) prescan_context.max_sub);
656
0
      CREATE_MATCHOP_ERROR_RETURN(0);
657
0
  }
658
0
  if (second_pat.regexp != 0) {
659
0
      second_pat.options |= REG_NOSUB;
660
0
      if ((second_exp = dict_regexp_compile_pat(mapname, lineno,
661
0
                  &second_pat)) == 0)
662
0
    CREATE_MATCHOP_ERROR_RETURN(0);
663
0
  } else {
664
0
      second_exp = 0;
665
0
  }
666
0
  match_rule = (DICT_REGEXP_MATCH_RULE *)
667
0
      dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, lineno,
668
0
           sizeof(DICT_REGEXP_MATCH_RULE));
669
0
  match_rule->first_exp = first_exp;
670
0
  match_rule->first_match = first_pat.match;
671
0
  match_rule->max_sub = prescan_context.max_sub;
672
0
  match_rule->second_exp = second_exp;
673
0
  match_rule->second_match = second_pat.match;
674
0
  if (prescan_context.literal)
675
0
      match_rule->replacement = prescan_context.literal;
676
0
  else
677
0
      match_rule->replacement = mystrdup(p);
678
0
  return ((DICT_REGEXP_RULE *) match_rule);
679
0
    }
680
681
    /*
682
     * The IF operator takes one pattern but no replacement text.
683
     */
684
0
    else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
685
0
  DICT_REGEXP_PATTERN pattern;
686
0
  regex_t *expr;
687
0
  DICT_REGEXP_IF_RULE *if_rule;
688
689
0
  p += 2;
690
0
  while (*p && ISSPACE(*p))
691
0
      p++;
692
0
  if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern))
693
0
      return (0);
694
0
  while (*p && ISSPACE(*p))
695
0
      ++p;
696
0
  if (*p) {
697
0
      msg_warn("regexp map %s, line %d: ignoring extra text after"
698
0
         " IF statement: \"%s\"", mapname, lineno, p);
699
0
      msg_warn("regexp map %s, line %d: do not prepend whitespace"
700
0
         " to statements between IF and ENDIF", mapname, lineno);
701
0
  }
702
0
  if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0)
703
0
      return (0);
704
0
  if_rule = (DICT_REGEXP_IF_RULE *)
705
0
      dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, lineno,
706
0
           sizeof(DICT_REGEXP_IF_RULE));
707
0
  if_rule->expr = expr;
708
0
  if_rule->match = pattern.match;
709
0
  if_rule->endif_rule = 0;
710
0
  return ((DICT_REGEXP_RULE *) if_rule);
711
0
    }
712
713
    /*
714
     * The ENDIF operator takes no patterns and no replacement text.
715
     */
716
0
    else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
717
0
  DICT_REGEXP_RULE *rule;
718
719
0
  p += 5;
720
0
  if (nesting == 0) {
721
0
      msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF",
722
0
         mapname, lineno);
723
0
      return (0);
724
0
  }
725
0
  while (*p && ISSPACE(*p))
726
0
      ++p;
727
0
  if (*p)
728
0
      msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF",
729
0
         mapname, lineno);
730
0
  rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, lineno,
731
0
              sizeof(DICT_REGEXP_RULE));
732
0
  return (rule);
733
0
    }
734
735
    /*
736
     * Unrecognized input.
737
     */
738
0
    else {
739
0
  msg_warn("regexp map %s, line %d: ignoring unrecognized request",
740
0
     mapname, lineno);
741
0
  return (0);
742
0
    }
743
0
}
744
745
/* dict_regexp_open - load and compile a file containing regular expressions */
746
747
DICT   *dict_regexp_open(const char *mapname, int open_flags, int dict_flags)
748
0
{
749
0
    const char myname[] = "dict_regexp_open";
750
0
    DICT_REGEXP *dict_regexp;
751
0
    VSTREAM *map_fp = 0;
752
0
    struct stat st;
753
0
    VSTRING *why = 0;
754
0
    VSTRING *line_buffer = 0;
755
0
    DICT_REGEXP_RULE *rule;
756
0
    DICT_REGEXP_RULE *last_rule = 0;
757
0
    int     lineno;
758
0
    int     last_line = 0;
759
0
    size_t  max_sub = 0;
760
0
    int     nesting = 0;
761
0
    char   *p;
762
0
    DICT_REGEXP_RULE **rule_stack = 0;
763
0
    MVECT   mvect;
764
765
    /*
766
     * Let the optimizer worry about eliminating redundant code.
767
     */
768
0
#define DICT_REGEXP_OPEN_RETURN(d) do { \
769
0
  DICT *__d = (d); \
770
0
  if (line_buffer != 0) \
771
0
      vstring_free(line_buffer); \
772
0
  if (map_fp != 0) \
773
0
      vstream_fclose(map_fp); \
774
0
  if (why != 0) \
775
0
     vstring_free(why); \
776
0
  return (__d); \
777
0
    } while (0)
778
779
    /*
780
     * Sanity checks.
781
     */
782
0
    if (open_flags != O_RDONLY)
783
0
  DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP,
784
0
              mapname, open_flags, dict_flags,
785
0
          "%s:%s map requires O_RDONLY access mode",
786
0
                 DICT_TYPE_REGEXP, mapname));
787
788
    /*
789
     * Open the configuration file.
790
     */
791
0
    if ((map_fp = dict_stream_open(DICT_TYPE_REGEXP, mapname, O_RDONLY,
792
0
           dict_flags, &st, &why)) == 0)
793
0
  DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, mapname,
794
0
                 open_flags, dict_flags,
795
0
                 "%s", vstring_str(why)));
796
0
    line_buffer = vstring_alloc(100);
797
798
0
    dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname,
799
0
               sizeof(*dict_regexp));
800
0
    dict_regexp->dict.lookup = dict_regexp_lookup;
801
0
    dict_regexp->dict.close = dict_regexp_close;
802
0
    dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN;
803
0
    if (dict_flags & DICT_FLAG_FOLD_MUL)
804
0
  dict_regexp->dict.fold_buf = vstring_alloc(10);
805
0
    dict_regexp->head = 0;
806
0
    dict_regexp->pmatch = 0;
807
0
    dict_regexp->expansion_buf = 0;
808
0
    dict_regexp->dict.owner.uid = st.st_uid;
809
0
    dict_regexp->dict.owner.status = (st.st_uid != 0);
810
811
    /*
812
     * Parse the regexp table.
813
     */
814
0
    while (readllines(line_buffer, map_fp, &last_line, &lineno)) {
815
0
  p = vstring_str(line_buffer);
816
0
  trimblanks(p, 0)[0] = 0;
817
0
  if (*p == 0)
818
0
      continue;
819
0
  rule = dict_regexp_parseline(&dict_regexp->dict, mapname, lineno,
820
0
             p, nesting);
821
0
  if (rule == 0)
822
0
      continue;
823
0
  if (rule->op == DICT_REGEXP_OP_MATCH) {
824
0
      if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub)
825
0
    max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub;
826
0
  } else if (rule->op == DICT_REGEXP_OP_IF) {
827
0
      if (rule_stack == 0)
828
0
    rule_stack = (DICT_REGEXP_RULE **) mvect_alloc(&mvect,
829
0
             sizeof(*rule_stack), nesting + 1,
830
0
            (MVECT_FN) 0, (MVECT_FN) 0);
831
0
      else
832
0
    rule_stack =
833
0
        (DICT_REGEXP_RULE **) mvect_realloc(&mvect, nesting + 1);
834
0
      rule_stack[nesting] = rule;
835
0
      nesting++;
836
0
  } else if (rule->op == DICT_REGEXP_OP_ENDIF) {
837
0
      DICT_REGEXP_IF_RULE *if_rule;
838
839
0
      if (nesting-- <= 0)
840
    /* Already handled in dict_regexp_parseline(). */
841
0
    msg_panic("%s: ENDIF without IF", myname);
842
0
      if (rule_stack[nesting]->op != DICT_REGEXP_OP_IF)
843
0
    msg_panic("%s: unexpected rule stack element type %d",
844
0
        myname, rule_stack[nesting]->op);
845
0
      if_rule = (DICT_REGEXP_IF_RULE *) rule_stack[nesting];
846
0
      if_rule->endif_rule = rule;
847
0
  }
848
0
  if (last_rule == 0)
849
0
      dict_regexp->head = rule;
850
0
  else
851
0
      last_rule->next = rule;
852
0
  last_rule = rule;
853
0
    }
854
855
0
    while (nesting-- > 0)
856
0
  msg_warn("regexp map %s, line %d: IF has no matching ENDIF",
857
0
     mapname, rule_stack[nesting]->lineno);
858
859
0
    if (rule_stack)
860
0
  (void) mvect_free(&mvect);
861
862
    /*
863
     * Allocate space for only as many matched substrings as used in the
864
     * replacement text.
865
     */
866
0
    if (max_sub > 0)
867
0
  dict_regexp->pmatch =
868
0
      (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1));
869
870
0
    dict_file_purge_buffers(&dict_regexp->dict);
871
0
    DICT_REGEXP_OPEN_RETURN(DICT_DEBUG (&dict_regexp->dict));
872
0
}
873
874
#endif