/src/postfix/postfix/src/util/dict_regexp.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*++ |
2 | | /* NAME |
3 | | /* dict_regexp 3 |
4 | | /* SUMMARY |
5 | | /* dictionary manager interface to REGEXP regular expression library |
6 | | /* SYNOPSIS |
7 | | /* #include <dict_regexp.h> |
8 | | /* |
9 | | /* DICT *dict_regexp_open(name, dummy, dict_flags) |
10 | | /* const char *name; |
11 | | /* int dummy; |
12 | | /* int dict_flags; |
13 | | /* DESCRIPTION |
14 | | /* dict_regexp_open() opens the named file and compiles the contained |
15 | | /* regular expressions. The result object can be used to match strings |
16 | | /* against the table. |
17 | | /* SEE ALSO |
18 | | /* dict(3) generic dictionary manager |
19 | | /* regexp_table(5) regular expression table configuration |
20 | | /* AUTHOR(S) |
21 | | /* LaMont Jones |
22 | | /* lamont@hp.com |
23 | | /* |
24 | | /* Based on PCRE dictionary contributed by Andrew McNamara |
25 | | /* andrewm@connect.com.au |
26 | | /* connect.com.au Pty. Ltd. |
27 | | /* Level 3, 213 Miller St |
28 | | /* North Sydney, NSW, Australia |
29 | | /* |
30 | | /* Heavily rewritten by Wietse Venema |
31 | | /* IBM T.J. Watson Research |
32 | | /* P.O. Box 704 |
33 | | /* Yorktown Heights, NY 10598, USA |
34 | | /* |
35 | | /* Wietse Venema |
36 | | /* Google, Inc. |
37 | | /* 111 8th Avenue |
38 | | /* New York, NY 10011, USA |
39 | | /*--*/ |
40 | | |
41 | | /* System library. */ |
42 | | |
43 | | #include "sys_defs.h" |
44 | | |
45 | | #ifdef HAS_POSIX_REGEXP |
46 | | |
47 | | #include <sys/stat.h> |
48 | | #include <stdlib.h> |
49 | | #include <unistd.h> |
50 | | #include <string.h> |
51 | | #include <ctype.h> |
52 | | #include <regex.h> |
53 | | #ifdef STRCASECMP_IN_STRINGS_H |
54 | | #include <strings.h> |
55 | | #endif |
56 | | |
57 | | /* Utility library. */ |
58 | | |
59 | | #include "mymalloc.h" |
60 | | #include "msg.h" |
61 | | #include "safe.h" |
62 | | #include "vstream.h" |
63 | | #include "vstring.h" |
64 | | #include "stringops.h" |
65 | | #include "readlline.h" |
66 | | #include "dict.h" |
67 | | #include "dict_regexp.h" |
68 | | #include "mac_parse.h" |
69 | | #include "warn_stat.h" |
70 | | #include "mvect.h" |
71 | | |
72 | | /* |
73 | | * Support for IF/ENDIF based on an idea by Bert Driehuis. |
74 | | */ |
75 | 0 | #define DICT_REGEXP_OP_MATCH 1 /* Match this regexp */ |
76 | 0 | #define DICT_REGEXP_OP_IF 2 /* Increase if/endif nesting on match */ |
77 | 0 | #define DICT_REGEXP_OP_ENDIF 3 /* Decrease if/endif nesting on match */ |
78 | | |
79 | | /* |
80 | | * Regular expression before compiling. |
81 | | */ |
82 | | typedef struct { |
83 | | char *regexp; /* regular expression */ |
84 | | int options; /* regcomp() options */ |
85 | | int match; /* positive or negative match */ |
86 | | } DICT_REGEXP_PATTERN; |
87 | | |
88 | | /* |
89 | | * Compiled generic rule, and subclasses that derive from it. |
90 | | */ |
91 | | typedef struct DICT_REGEXP_RULE { |
92 | | int op; /* DICT_REGEXP_OP_MATCH/IF/ENDIF */ |
93 | | int lineno; /* source file line number */ |
94 | | struct DICT_REGEXP_RULE *next; /* next rule in dict */ |
95 | | } DICT_REGEXP_RULE; |
96 | | |
97 | | typedef struct { |
98 | | DICT_REGEXP_RULE rule; /* generic part */ |
99 | | regex_t *first_exp; /* compiled primary pattern */ |
100 | | int first_match; /* positive or negative match */ |
101 | | regex_t *second_exp; /* compiled secondary pattern */ |
102 | | int second_match; /* positive or negative match */ |
103 | | char *replacement; /* replacement text */ |
104 | | size_t max_sub; /* largest $number in replacement */ |
105 | | } DICT_REGEXP_MATCH_RULE; |
106 | | |
107 | | typedef struct { |
108 | | DICT_REGEXP_RULE rule; /* generic members */ |
109 | | regex_t *expr; /* the condition */ |
110 | | int match; /* positive or negative match */ |
111 | | struct DICT_REGEXP_RULE *endif_rule;/* matching endif rule */ |
112 | | } DICT_REGEXP_IF_RULE; |
113 | | |
114 | | /* |
115 | | * Regexp map. |
116 | | */ |
117 | | typedef struct { |
118 | | DICT dict; /* generic members */ |
119 | | regmatch_t *pmatch; /* matched substring info */ |
120 | | DICT_REGEXP_RULE *head; /* first rule */ |
121 | | VSTRING *expansion_buf; /* lookup result */ |
122 | | } DICT_REGEXP; |
123 | | |
124 | | /* |
125 | | * Macros to make dense code more readable. |
126 | | */ |
127 | | #define NULL_SUBSTITUTIONS (0) |
128 | | #define NULL_MATCH_RESULT ((regmatch_t *) 0) |
129 | | |
130 | | /* |
131 | | * Context for $number expansion callback. |
132 | | */ |
133 | | typedef struct { |
134 | | DICT_REGEXP *dict_regexp; /* the dictionary handle */ |
135 | | DICT_REGEXP_MATCH_RULE *match_rule; /* the rule we matched */ |
136 | | const char *lookup_string; /* matched text */ |
137 | | } DICT_REGEXP_EXPAND_CONTEXT; |
138 | | |
139 | | /* |
140 | | * Context for $number pre-scan callback. |
141 | | */ |
142 | | typedef struct { |
143 | | const char *mapname; /* name of regexp map */ |
144 | | int lineno; /* where in file */ |
145 | | size_t max_sub; /* largest $number seen */ |
146 | | char *literal; /* constant result, $$ -> $ */ |
147 | | } DICT_REGEXP_PRESCAN_CONTEXT; |
148 | | |
149 | | /* |
150 | | * Compatibility. |
151 | | */ |
152 | | #ifndef MAC_PARSE_OK |
153 | | #define MAC_PARSE_OK 0 |
154 | | #endif |
155 | | |
156 | | /* dict_regexp_expand - replace $number with substring from matched text */ |
157 | | |
158 | | static int dict_regexp_expand(int type, VSTRING *buf, void *ptr) |
159 | 0 | { |
160 | 0 | DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr; |
161 | 0 | DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule; |
162 | 0 | DICT_REGEXP *dict_regexp = ctxt->dict_regexp; |
163 | 0 | regmatch_t *pmatch; |
164 | 0 | size_t n; |
165 | | |
166 | | /* |
167 | | * Replace $number by the corresponding substring from the matched text. |
168 | | * We pre-scanned the replacement text at compile time, so any out of |
169 | | * range $number means that something impossible has happened. |
170 | | */ |
171 | 0 | if (type == MAC_PARSE_VARNAME) { |
172 | 0 | n = atoi(vstring_str(buf)); |
173 | 0 | if (n < 1 || n > match_rule->max_sub) |
174 | 0 | msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"", |
175 | 0 | dict_regexp->dict.name, match_rule->rule.lineno, |
176 | 0 | vstring_str(buf)); |
177 | 0 | pmatch = dict_regexp->pmatch + n; |
178 | 0 | if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo) |
179 | 0 | return (MAC_PARSE_UNDEF); /* empty or not matched */ |
180 | 0 | vstring_strncat(dict_regexp->expansion_buf, |
181 | 0 | ctxt->lookup_string + pmatch->rm_so, |
182 | 0 | pmatch->rm_eo - pmatch->rm_so); |
183 | 0 | return (MAC_PARSE_OK); |
184 | 0 | } |
185 | | |
186 | | /* |
187 | | * Straight text - duplicate with no substitution. |
188 | | */ |
189 | 0 | else { |
190 | 0 | vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf)); |
191 | 0 | return (MAC_PARSE_OK); |
192 | 0 | } |
193 | 0 | } |
194 | | |
195 | | /* dict_regexp_regerror - report regexp compile/execute error */ |
196 | | |
197 | | static void dict_regexp_regerror(const char *mapname, int lineno, int error, |
198 | | const regex_t *expr) |
199 | 0 | { |
200 | 0 | char errbuf[256]; |
201 | |
|
202 | 0 | (void) regerror(error, expr, errbuf, sizeof(errbuf)); |
203 | 0 | msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf); |
204 | 0 | } |
205 | | |
206 | | /* |
207 | | * Inlined to reduce function call overhead in the time-critical loop. |
208 | | */ |
209 | | #define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \ |
210 | 0 | ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \ |
211 | 0 | ((err) == REG_NOMATCH ? !(match) : \ |
212 | 0 | (err) == 0 ? (match) : \ |
213 | 0 | (dict_regexp_regerror((map), (line), (err), (expr)), 0))) |
214 | | |
215 | | /* dict_regexp_lookup - match string and perform optional substitution */ |
216 | | |
217 | | static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string) |
218 | 0 | { |
219 | 0 | DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict; |
220 | 0 | DICT_REGEXP_RULE *rule; |
221 | 0 | DICT_REGEXP_IF_RULE *if_rule; |
222 | 0 | DICT_REGEXP_MATCH_RULE *match_rule; |
223 | 0 | DICT_REGEXP_EXPAND_CONTEXT expand_context; |
224 | 0 | int error; |
225 | |
|
226 | 0 | dict->error = 0; |
227 | |
|
228 | 0 | if (msg_verbose) |
229 | 0 | msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string); |
230 | | |
231 | | /* |
232 | | * Optionally fold the key. |
233 | | */ |
234 | 0 | if (dict->flags & DICT_FLAG_FOLD_MUL) { |
235 | 0 | if (dict->fold_buf == 0) |
236 | 0 | dict->fold_buf = vstring_alloc(10); |
237 | 0 | vstring_strcpy(dict->fold_buf, lookup_string); |
238 | 0 | lookup_string = lowercase(vstring_str(dict->fold_buf)); |
239 | 0 | } |
240 | 0 | for (rule = dict_regexp->head; rule; rule = rule->next) { |
241 | |
|
242 | 0 | switch (rule->op) { |
243 | | |
244 | | /* |
245 | | * Search for the first matching primary expression. Limit the |
246 | | * overhead for substring substitution to the bare minimum. |
247 | | */ |
248 | 0 | case DICT_REGEXP_OP_MATCH: |
249 | 0 | match_rule = (DICT_REGEXP_MATCH_RULE *) rule; |
250 | 0 | if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, |
251 | 0 | match_rule->first_exp, |
252 | 0 | match_rule->first_match, |
253 | 0 | lookup_string, |
254 | 0 | match_rule->max_sub > 0 ? |
255 | 0 | match_rule->max_sub + 1 : 0, |
256 | 0 | dict_regexp->pmatch)) |
257 | 0 | continue; |
258 | 0 | if (match_rule->second_exp |
259 | 0 | && !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, |
260 | 0 | match_rule->second_exp, |
261 | 0 | match_rule->second_match, |
262 | 0 | lookup_string, |
263 | 0 | NULL_SUBSTITUTIONS, |
264 | 0 | NULL_MATCH_RESULT)) |
265 | 0 | continue; |
266 | | |
267 | | /* |
268 | | * Skip $number substitutions when the replacement text contains |
269 | | * no $number strings, as learned during the compile time |
270 | | * pre-scan. The pre-scan already replaced $$ by $. |
271 | | */ |
272 | 0 | if (match_rule->max_sub == 0) |
273 | 0 | return (match_rule->replacement); |
274 | | |
275 | | /* |
276 | | * Perform $number substitutions on the replacement text. We |
277 | | * pre-scanned the replacement text at compile time. Any macro |
278 | | * expansion errors at this point mean something impossible has |
279 | | * happened. |
280 | | */ |
281 | 0 | if (!dict_regexp->expansion_buf) |
282 | 0 | dict_regexp->expansion_buf = vstring_alloc(10); |
283 | 0 | VSTRING_RESET(dict_regexp->expansion_buf); |
284 | 0 | expand_context.lookup_string = lookup_string; |
285 | 0 | expand_context.match_rule = match_rule; |
286 | 0 | expand_context.dict_regexp = dict_regexp; |
287 | |
|
288 | 0 | if (mac_parse(match_rule->replacement, dict_regexp_expand, |
289 | 0 | (void *) &expand_context) & MAC_PARSE_ERROR) |
290 | 0 | msg_panic("regexp map %s, line %d: bad replacement syntax", |
291 | 0 | dict->name, rule->lineno); |
292 | 0 | VSTRING_TERMINATE(dict_regexp->expansion_buf); |
293 | 0 | return (vstring_str(dict_regexp->expansion_buf)); |
294 | | |
295 | | /* |
296 | | * Conditional. |
297 | | */ |
298 | 0 | case DICT_REGEXP_OP_IF: |
299 | 0 | if_rule = (DICT_REGEXP_IF_RULE *) rule; |
300 | 0 | if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, |
301 | 0 | if_rule->expr, if_rule->match, lookup_string, |
302 | 0 | NULL_SUBSTITUTIONS, NULL_MATCH_RESULT)) |
303 | 0 | continue; |
304 | | /* An IF without matching ENDIF has no "endif" rule. */ |
305 | 0 | if ((rule = if_rule->endif_rule) == 0) |
306 | 0 | return (0); |
307 | | /* FALLTHROUGH */ |
308 | | |
309 | | /* |
310 | | * ENDIF after IF. |
311 | | */ |
312 | 0 | case DICT_REGEXP_OP_ENDIF: |
313 | 0 | continue; |
314 | | |
315 | 0 | default: |
316 | 0 | msg_panic("dict_regexp_lookup: impossible operation %d", rule->op); |
317 | 0 | } |
318 | 0 | } |
319 | 0 | return (0); |
320 | 0 | } |
321 | | |
322 | | /* dict_regexp_close - close regexp dictionary */ |
323 | | |
324 | | static void dict_regexp_close(DICT *dict) |
325 | 0 | { |
326 | 0 | DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict; |
327 | 0 | DICT_REGEXP_RULE *rule; |
328 | 0 | DICT_REGEXP_RULE *next; |
329 | 0 | DICT_REGEXP_MATCH_RULE *match_rule; |
330 | 0 | DICT_REGEXP_IF_RULE *if_rule; |
331 | |
|
332 | 0 | for (rule = dict_regexp->head; rule; rule = next) { |
333 | 0 | next = rule->next; |
334 | 0 | switch (rule->op) { |
335 | 0 | case DICT_REGEXP_OP_MATCH: |
336 | 0 | match_rule = (DICT_REGEXP_MATCH_RULE *) rule; |
337 | 0 | if (match_rule->first_exp) { |
338 | 0 | regfree(match_rule->first_exp); |
339 | 0 | myfree((void *) match_rule->first_exp); |
340 | 0 | } |
341 | 0 | if (match_rule->second_exp) { |
342 | 0 | regfree(match_rule->second_exp); |
343 | 0 | myfree((void *) match_rule->second_exp); |
344 | 0 | } |
345 | 0 | if (match_rule->replacement) |
346 | 0 | myfree((void *) match_rule->replacement); |
347 | 0 | break; |
348 | 0 | case DICT_REGEXP_OP_IF: |
349 | 0 | if_rule = (DICT_REGEXP_IF_RULE *) rule; |
350 | 0 | if (if_rule->expr) { |
351 | 0 | regfree(if_rule->expr); |
352 | 0 | myfree((void *) if_rule->expr); |
353 | 0 | } |
354 | 0 | break; |
355 | 0 | case DICT_REGEXP_OP_ENDIF: |
356 | 0 | break; |
357 | 0 | default: |
358 | 0 | msg_panic("dict_regexp_close: unknown operation %d", rule->op); |
359 | 0 | } |
360 | 0 | myfree((void *) rule); |
361 | 0 | } |
362 | 0 | if (dict_regexp->pmatch) |
363 | 0 | myfree((void *) dict_regexp->pmatch); |
364 | 0 | if (dict_regexp->expansion_buf) |
365 | 0 | vstring_free(dict_regexp->expansion_buf); |
366 | 0 | if (dict->fold_buf) |
367 | 0 | vstring_free(dict->fold_buf); |
368 | 0 | dict_free(dict); |
369 | 0 | } |
370 | | |
371 | | /* dict_regexp_get_pat - extract one pattern with options from rule */ |
372 | | |
373 | | static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp, |
374 | | DICT_REGEXP_PATTERN *pat) |
375 | 0 | { |
376 | 0 | char *p = *bufp; |
377 | 0 | char re_delim; |
378 | | |
379 | | /* |
380 | | * Process negation operators. |
381 | | */ |
382 | 0 | pat->match = 1; |
383 | 0 | for (;;) { |
384 | 0 | if (*p == '!') |
385 | 0 | pat->match = !pat->match; |
386 | 0 | else if (!ISSPACE(*p)) |
387 | 0 | break; |
388 | 0 | p++; |
389 | 0 | } |
390 | 0 | if (*p == 0) { |
391 | 0 | msg_warn("regexp map %s, line %d: no regexp: skipping this rule", |
392 | 0 | mapname, lineno); |
393 | 0 | return (0); |
394 | 0 | } |
395 | | |
396 | | /* |
397 | | * Search for the closing delimiter, handling backslash escape. |
398 | | */ |
399 | 0 | re_delim = *p++; |
400 | 0 | pat->regexp = p; |
401 | 0 | while (*p) { |
402 | 0 | if (*p == '\\') { |
403 | 0 | if (p[1]) |
404 | 0 | p++; |
405 | 0 | else |
406 | 0 | break; |
407 | 0 | } else if (*p == re_delim) { |
408 | 0 | break; |
409 | 0 | } |
410 | 0 | ++p; |
411 | 0 | } |
412 | 0 | if (!*p) { |
413 | 0 | msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": " |
414 | 0 | "skipping this rule", mapname, lineno, re_delim); |
415 | 0 | return (0); |
416 | 0 | } |
417 | 0 | *p++ = 0; /* null terminate */ |
418 | | |
419 | | /* |
420 | | * Search for options. |
421 | | */ |
422 | 0 | pat->options = REG_EXTENDED | REG_ICASE; |
423 | 0 | while (*p && !ISSPACE(*p) && *p != '!') { |
424 | 0 | switch (*p) { |
425 | 0 | case 'i': |
426 | 0 | pat->options ^= REG_ICASE; |
427 | 0 | break; |
428 | 0 | case 'm': |
429 | 0 | pat->options ^= REG_NEWLINE; |
430 | 0 | break; |
431 | 0 | case 'x': |
432 | 0 | pat->options ^= REG_EXTENDED; |
433 | 0 | break; |
434 | 0 | default: |
435 | 0 | msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": " |
436 | 0 | "skipping this rule", mapname, lineno, *p); |
437 | 0 | return (0); |
438 | 0 | } |
439 | 0 | ++p; |
440 | 0 | } |
441 | 0 | *bufp = p; |
442 | 0 | return (1); |
443 | 0 | } |
444 | | |
445 | | /* dict_regexp_get_pats - get the primary and second patterns and flags */ |
446 | | |
447 | | static int dict_regexp_get_pats(const char *mapname, int lineno, char **p, |
448 | | DICT_REGEXP_PATTERN *first_pat, |
449 | | DICT_REGEXP_PATTERN *second_pat) |
450 | 0 | { |
451 | | |
452 | | /* |
453 | | * Get the primary and optional secondary patterns and their flags. |
454 | | */ |
455 | 0 | if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0) |
456 | 0 | return (0); |
457 | 0 | if (**p == '!') { |
458 | | #if 0 |
459 | | static int bitrot_warned = 0; |
460 | | |
461 | | if (bitrot_warned == 0) { |
462 | | msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away," |
463 | | " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead", |
464 | | mapname, lineno); |
465 | | bitrot_warned = 1; |
466 | | } |
467 | | #endif |
468 | 0 | if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0) |
469 | 0 | return (0); |
470 | 0 | } else { |
471 | 0 | second_pat->regexp = 0; |
472 | 0 | } |
473 | 0 | return (1); |
474 | 0 | } |
475 | | |
476 | | /* dict_regexp_prescan - find largest $number in replacement text */ |
477 | | |
478 | | static int dict_regexp_prescan(int type, VSTRING *buf, void *context) |
479 | 0 | { |
480 | 0 | DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context; |
481 | 0 | size_t n; |
482 | | |
483 | | /* |
484 | | * Keep a copy of literal text (with $$ already replaced by $) if and |
485 | | * only if the replacement text contains no $number expression. This way |
486 | | * we can avoid having to scan the replacement text at lookup time. |
487 | | */ |
488 | 0 | if (type == MAC_PARSE_VARNAME) { |
489 | 0 | if (ctxt->literal) { |
490 | 0 | myfree(ctxt->literal); |
491 | 0 | ctxt->literal = 0; |
492 | 0 | } |
493 | 0 | if (!alldig(vstring_str(buf))) { |
494 | 0 | msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"", |
495 | 0 | ctxt->mapname, ctxt->lineno, vstring_str(buf)); |
496 | 0 | return (MAC_PARSE_ERROR); |
497 | 0 | } |
498 | 0 | n = atoi(vstring_str(buf)); |
499 | 0 | if (n < 1) { |
500 | 0 | msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"", |
501 | 0 | ctxt->mapname, ctxt->lineno, vstring_str(buf)); |
502 | 0 | return (MAC_PARSE_ERROR); |
503 | 0 | } |
504 | 0 | if (n > ctxt->max_sub) |
505 | 0 | ctxt->max_sub = n; |
506 | 0 | } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) { |
507 | 0 | if (ctxt->literal) |
508 | 0 | msg_panic("regexp map %s, line %d: multiple literals but no $number", |
509 | 0 | ctxt->mapname, ctxt->lineno); |
510 | 0 | ctxt->literal = mystrdup(vstring_str(buf)); |
511 | 0 | } |
512 | 0 | return (MAC_PARSE_OK); |
513 | 0 | } |
514 | | |
515 | | /* dict_regexp_compile_pat - compile one pattern */ |
516 | | |
517 | | static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno, |
518 | | DICT_REGEXP_PATTERN *pat) |
519 | 0 | { |
520 | 0 | int error; |
521 | 0 | regex_t *expr; |
522 | |
|
523 | 0 | expr = (regex_t *) mymalloc(sizeof(*expr)); |
524 | 0 | error = regcomp(expr, pat->regexp, pat->options); |
525 | 0 | if (error != 0) { |
526 | 0 | dict_regexp_regerror(mapname, lineno, error, expr); |
527 | 0 | myfree((void *) expr); |
528 | 0 | return (0); |
529 | 0 | } |
530 | 0 | return (expr); |
531 | 0 | } |
532 | | |
533 | | /* dict_regexp_rule_alloc - fill in a generic rule structure */ |
534 | | |
535 | | static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int lineno, size_t size) |
536 | 0 | { |
537 | 0 | DICT_REGEXP_RULE *rule; |
538 | |
|
539 | 0 | rule = (DICT_REGEXP_RULE *) mymalloc(size); |
540 | 0 | rule->op = op; |
541 | 0 | rule->lineno = lineno; |
542 | 0 | rule->next = 0; |
543 | |
|
544 | 0 | return (rule); |
545 | 0 | } |
546 | | |
547 | | /* dict_regexp_parseline - parse one rule */ |
548 | | |
549 | | static DICT_REGEXP_RULE *dict_regexp_parseline(DICT *dict, const char *mapname, |
550 | | int lineno, char *line, |
551 | | int nesting) |
552 | 0 | { |
553 | 0 | char *p; |
554 | |
|
555 | 0 | p = line; |
556 | | |
557 | | /* |
558 | | * An ordinary rule takes one or two patterns and replacement text. |
559 | | */ |
560 | 0 | if (!ISALNUM(*p)) { |
561 | 0 | DICT_REGEXP_PATTERN first_pat; |
562 | 0 | DICT_REGEXP_PATTERN second_pat; |
563 | 0 | DICT_REGEXP_PRESCAN_CONTEXT prescan_context; |
564 | 0 | regex_t *first_exp = 0; |
565 | 0 | regex_t *second_exp; |
566 | 0 | DICT_REGEXP_MATCH_RULE *match_rule; |
567 | | |
568 | | /* |
569 | | * Get the primary and the optional secondary patterns. |
570 | | */ |
571 | 0 | if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat)) |
572 | 0 | return (0); |
573 | | |
574 | | /* |
575 | | * Get the replacement text. |
576 | | */ |
577 | 0 | while (*p && ISSPACE(*p)) |
578 | 0 | ++p; |
579 | 0 | if (!*p) { |
580 | 0 | msg_warn("regexp map %s, line %d: no replacement text: " |
581 | 0 | "using empty string", mapname, lineno); |
582 | 0 | } |
583 | | |
584 | | /* |
585 | | * Find the highest-numbered $number in the replacement text. We can |
586 | | * speed up pattern matching 1) by passing hints to the regexp |
587 | | * compiler, setting the REG_NOSUB flag when the replacement text |
588 | | * contains no $number string; 2) by passing hints to the regexp |
589 | | * execution code, limiting the amount of text that is made available |
590 | | * for substitution. |
591 | | */ |
592 | 0 | prescan_context.mapname = mapname; |
593 | 0 | prescan_context.lineno = lineno; |
594 | 0 | prescan_context.max_sub = 0; |
595 | 0 | prescan_context.literal = 0; |
596 | | |
597 | | /* |
598 | | * The optimizer will eliminate code duplication and/or dead code. |
599 | | */ |
600 | 0 | #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \ |
601 | 0 | if (first_exp) { \ |
602 | 0 | regfree(first_exp); \ |
603 | 0 | myfree((void *) first_exp); \ |
604 | 0 | } \ |
605 | 0 | if (prescan_context.literal) \ |
606 | 0 | myfree(prescan_context.literal); \ |
607 | 0 | return (rval); \ |
608 | 0 | } while (0) |
609 | |
|
610 | 0 | if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) { |
611 | 0 | VSTRING *base64_buf; |
612 | 0 | char *err; |
613 | |
|
614 | 0 | if ((base64_buf = dict_file_to_b64(dict, p)) == 0) { |
615 | 0 | err = dict_file_get_error(dict); |
616 | 0 | msg_warn("regexp map %s, line %d: %s: skipping this rule", |
617 | 0 | mapname, lineno, err); |
618 | 0 | myfree(err); |
619 | 0 | CREATE_MATCHOP_ERROR_RETURN(0); |
620 | 0 | } |
621 | 0 | p = vstring_str(base64_buf); |
622 | 0 | } |
623 | 0 | if (mac_parse(p, dict_regexp_prescan, (void *) &prescan_context) |
624 | 0 | & MAC_PARSE_ERROR) { |
625 | 0 | msg_warn("regexp map %s, line %d: bad replacement syntax: " |
626 | 0 | "skipping this rule", mapname, lineno); |
627 | 0 | CREATE_MATCHOP_ERROR_RETURN(0); |
628 | 0 | } |
629 | | |
630 | | /* |
631 | | * Compile the primary and the optional secondary pattern. Speed up |
632 | | * execution when no matched text needs to be substituted into the |
633 | | * result string, or when the highest numbered substring is less than |
634 | | * the total number of () subpatterns. |
635 | | */ |
636 | 0 | if (prescan_context.max_sub == 0) |
637 | 0 | first_pat.options |= REG_NOSUB; |
638 | 0 | if (prescan_context.max_sub > 0 && first_pat.match == 0) { |
639 | 0 | msg_warn("regexp map %s, line %d: $number found in negative match " |
640 | 0 | "replacement text: skipping this rule", mapname, lineno); |
641 | 0 | CREATE_MATCHOP_ERROR_RETURN(0); |
642 | 0 | } |
643 | 0 | if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) { |
644 | 0 | msg_warn("regexp map %s, line %d: " |
645 | 0 | "regular expression substitution is not allowed: " |
646 | 0 | "skipping this rule", mapname, lineno); |
647 | 0 | CREATE_MATCHOP_ERROR_RETURN(0); |
648 | 0 | } |
649 | 0 | if ((first_exp = dict_regexp_compile_pat(mapname, lineno, |
650 | 0 | &first_pat)) == 0) |
651 | 0 | CREATE_MATCHOP_ERROR_RETURN(0); |
652 | 0 | if (prescan_context.max_sub > first_exp->re_nsub) { |
653 | 0 | msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": " |
654 | 0 | "skipping this rule", mapname, lineno, |
655 | 0 | (int) prescan_context.max_sub); |
656 | 0 | CREATE_MATCHOP_ERROR_RETURN(0); |
657 | 0 | } |
658 | 0 | if (second_pat.regexp != 0) { |
659 | 0 | second_pat.options |= REG_NOSUB; |
660 | 0 | if ((second_exp = dict_regexp_compile_pat(mapname, lineno, |
661 | 0 | &second_pat)) == 0) |
662 | 0 | CREATE_MATCHOP_ERROR_RETURN(0); |
663 | 0 | } else { |
664 | 0 | second_exp = 0; |
665 | 0 | } |
666 | 0 | match_rule = (DICT_REGEXP_MATCH_RULE *) |
667 | 0 | dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, lineno, |
668 | 0 | sizeof(DICT_REGEXP_MATCH_RULE)); |
669 | 0 | match_rule->first_exp = first_exp; |
670 | 0 | match_rule->first_match = first_pat.match; |
671 | 0 | match_rule->max_sub = prescan_context.max_sub; |
672 | 0 | match_rule->second_exp = second_exp; |
673 | 0 | match_rule->second_match = second_pat.match; |
674 | 0 | if (prescan_context.literal) |
675 | 0 | match_rule->replacement = prescan_context.literal; |
676 | 0 | else |
677 | 0 | match_rule->replacement = mystrdup(p); |
678 | 0 | return ((DICT_REGEXP_RULE *) match_rule); |
679 | 0 | } |
680 | | |
681 | | /* |
682 | | * The IF operator takes one pattern but no replacement text. |
683 | | */ |
684 | 0 | else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) { |
685 | 0 | DICT_REGEXP_PATTERN pattern; |
686 | 0 | regex_t *expr; |
687 | 0 | DICT_REGEXP_IF_RULE *if_rule; |
688 | |
|
689 | 0 | p += 2; |
690 | 0 | while (*p && ISSPACE(*p)) |
691 | 0 | p++; |
692 | 0 | if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern)) |
693 | 0 | return (0); |
694 | 0 | while (*p && ISSPACE(*p)) |
695 | 0 | ++p; |
696 | 0 | if (*p) { |
697 | 0 | msg_warn("regexp map %s, line %d: ignoring extra text after" |
698 | 0 | " IF statement: \"%s\"", mapname, lineno, p); |
699 | 0 | msg_warn("regexp map %s, line %d: do not prepend whitespace" |
700 | 0 | " to statements between IF and ENDIF", mapname, lineno); |
701 | 0 | } |
702 | 0 | if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0) |
703 | 0 | return (0); |
704 | 0 | if_rule = (DICT_REGEXP_IF_RULE *) |
705 | 0 | dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, lineno, |
706 | 0 | sizeof(DICT_REGEXP_IF_RULE)); |
707 | 0 | if_rule->expr = expr; |
708 | 0 | if_rule->match = pattern.match; |
709 | 0 | if_rule->endif_rule = 0; |
710 | 0 | return ((DICT_REGEXP_RULE *) if_rule); |
711 | 0 | } |
712 | | |
713 | | /* |
714 | | * The ENDIF operator takes no patterns and no replacement text. |
715 | | */ |
716 | 0 | else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) { |
717 | 0 | DICT_REGEXP_RULE *rule; |
718 | |
|
719 | 0 | p += 5; |
720 | 0 | if (nesting == 0) { |
721 | 0 | msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF", |
722 | 0 | mapname, lineno); |
723 | 0 | return (0); |
724 | 0 | } |
725 | 0 | while (*p && ISSPACE(*p)) |
726 | 0 | ++p; |
727 | 0 | if (*p) |
728 | 0 | msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF", |
729 | 0 | mapname, lineno); |
730 | 0 | rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, lineno, |
731 | 0 | sizeof(DICT_REGEXP_RULE)); |
732 | 0 | return (rule); |
733 | 0 | } |
734 | | |
735 | | /* |
736 | | * Unrecognized input. |
737 | | */ |
738 | 0 | else { |
739 | 0 | msg_warn("regexp map %s, line %d: ignoring unrecognized request", |
740 | 0 | mapname, lineno); |
741 | 0 | return (0); |
742 | 0 | } |
743 | 0 | } |
744 | | |
745 | | /* dict_regexp_open - load and compile a file containing regular expressions */ |
746 | | |
747 | | DICT *dict_regexp_open(const char *mapname, int open_flags, int dict_flags) |
748 | 0 | { |
749 | 0 | const char myname[] = "dict_regexp_open"; |
750 | 0 | DICT_REGEXP *dict_regexp; |
751 | 0 | VSTREAM *map_fp = 0; |
752 | 0 | struct stat st; |
753 | 0 | VSTRING *why = 0; |
754 | 0 | VSTRING *line_buffer = 0; |
755 | 0 | DICT_REGEXP_RULE *rule; |
756 | 0 | DICT_REGEXP_RULE *last_rule = 0; |
757 | 0 | int lineno; |
758 | 0 | int last_line = 0; |
759 | 0 | size_t max_sub = 0; |
760 | 0 | int nesting = 0; |
761 | 0 | char *p; |
762 | 0 | DICT_REGEXP_RULE **rule_stack = 0; |
763 | 0 | MVECT mvect; |
764 | | |
765 | | /* |
766 | | * Let the optimizer worry about eliminating redundant code. |
767 | | */ |
768 | 0 | #define DICT_REGEXP_OPEN_RETURN(d) do { \ |
769 | 0 | DICT *__d = (d); \ |
770 | 0 | if (line_buffer != 0) \ |
771 | 0 | vstring_free(line_buffer); \ |
772 | 0 | if (map_fp != 0) \ |
773 | 0 | vstream_fclose(map_fp); \ |
774 | 0 | if (why != 0) \ |
775 | 0 | vstring_free(why); \ |
776 | 0 | return (__d); \ |
777 | 0 | } while (0) |
778 | | |
779 | | /* |
780 | | * Sanity checks. |
781 | | */ |
782 | 0 | if (open_flags != O_RDONLY) |
783 | 0 | DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, |
784 | 0 | mapname, open_flags, dict_flags, |
785 | 0 | "%s:%s map requires O_RDONLY access mode", |
786 | 0 | DICT_TYPE_REGEXP, mapname)); |
787 | | |
788 | | /* |
789 | | * Open the configuration file. |
790 | | */ |
791 | 0 | if ((map_fp = dict_stream_open(DICT_TYPE_REGEXP, mapname, O_RDONLY, |
792 | 0 | dict_flags, &st, &why)) == 0) |
793 | 0 | DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, mapname, |
794 | 0 | open_flags, dict_flags, |
795 | 0 | "%s", vstring_str(why))); |
796 | 0 | line_buffer = vstring_alloc(100); |
797 | |
|
798 | 0 | dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname, |
799 | 0 | sizeof(*dict_regexp)); |
800 | 0 | dict_regexp->dict.lookup = dict_regexp_lookup; |
801 | 0 | dict_regexp->dict.close = dict_regexp_close; |
802 | 0 | dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN; |
803 | 0 | if (dict_flags & DICT_FLAG_FOLD_MUL) |
804 | 0 | dict_regexp->dict.fold_buf = vstring_alloc(10); |
805 | 0 | dict_regexp->head = 0; |
806 | 0 | dict_regexp->pmatch = 0; |
807 | 0 | dict_regexp->expansion_buf = 0; |
808 | 0 | dict_regexp->dict.owner.uid = st.st_uid; |
809 | 0 | dict_regexp->dict.owner.status = (st.st_uid != 0); |
810 | | |
811 | | /* |
812 | | * Parse the regexp table. |
813 | | */ |
814 | 0 | while (readllines(line_buffer, map_fp, &last_line, &lineno)) { |
815 | 0 | p = vstring_str(line_buffer); |
816 | 0 | trimblanks(p, 0)[0] = 0; |
817 | 0 | if (*p == 0) |
818 | 0 | continue; |
819 | 0 | rule = dict_regexp_parseline(&dict_regexp->dict, mapname, lineno, |
820 | 0 | p, nesting); |
821 | 0 | if (rule == 0) |
822 | 0 | continue; |
823 | 0 | if (rule->op == DICT_REGEXP_OP_MATCH) { |
824 | 0 | if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub) |
825 | 0 | max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub; |
826 | 0 | } else if (rule->op == DICT_REGEXP_OP_IF) { |
827 | 0 | if (rule_stack == 0) |
828 | 0 | rule_stack = (DICT_REGEXP_RULE **) mvect_alloc(&mvect, |
829 | 0 | sizeof(*rule_stack), nesting + 1, |
830 | 0 | (MVECT_FN) 0, (MVECT_FN) 0); |
831 | 0 | else |
832 | 0 | rule_stack = |
833 | 0 | (DICT_REGEXP_RULE **) mvect_realloc(&mvect, nesting + 1); |
834 | 0 | rule_stack[nesting] = rule; |
835 | 0 | nesting++; |
836 | 0 | } else if (rule->op == DICT_REGEXP_OP_ENDIF) { |
837 | 0 | DICT_REGEXP_IF_RULE *if_rule; |
838 | |
|
839 | 0 | if (nesting-- <= 0) |
840 | | /* Already handled in dict_regexp_parseline(). */ |
841 | 0 | msg_panic("%s: ENDIF without IF", myname); |
842 | 0 | if (rule_stack[nesting]->op != DICT_REGEXP_OP_IF) |
843 | 0 | msg_panic("%s: unexpected rule stack element type %d", |
844 | 0 | myname, rule_stack[nesting]->op); |
845 | 0 | if_rule = (DICT_REGEXP_IF_RULE *) rule_stack[nesting]; |
846 | 0 | if_rule->endif_rule = rule; |
847 | 0 | } |
848 | 0 | if (last_rule == 0) |
849 | 0 | dict_regexp->head = rule; |
850 | 0 | else |
851 | 0 | last_rule->next = rule; |
852 | 0 | last_rule = rule; |
853 | 0 | } |
854 | | |
855 | 0 | while (nesting-- > 0) |
856 | 0 | msg_warn("regexp map %s, line %d: IF has no matching ENDIF", |
857 | 0 | mapname, rule_stack[nesting]->lineno); |
858 | |
|
859 | 0 | if (rule_stack) |
860 | 0 | (void) mvect_free(&mvect); |
861 | | |
862 | | /* |
863 | | * Allocate space for only as many matched substrings as used in the |
864 | | * replacement text. |
865 | | */ |
866 | 0 | if (max_sub > 0) |
867 | 0 | dict_regexp->pmatch = |
868 | 0 | (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1)); |
869 | |
|
870 | 0 | dict_file_purge_buffers(&dict_regexp->dict); |
871 | 0 | DICT_REGEXP_OPEN_RETURN(DICT_DEBUG (&dict_regexp->dict)); |
872 | 0 | } |
873 | | |
874 | | #endif |