/src/pigeonhole/src/lib-sieve/plugins/regex/mcht-regex.c
Line | Count | Source |
1 | | /* Copyright (c) 2002-2018 Pigeonhole authors, see the included COPYING file |
2 | | */ |
3 | | |
4 | | /* Match-type ':regex' |
5 | | */ |
6 | | |
7 | | #include "lib.h" |
8 | | #include "mempool.h" |
9 | | #include "buffer.h" |
10 | | #include "array.h" |
11 | | #include "str.h" |
12 | | #include "str-sanitize.h" |
13 | | |
14 | | #include "sieve-common.h" |
15 | | #include "sieve-limits.h" |
16 | | #include "sieve-ast.h" |
17 | | #include "sieve-stringlist.h" |
18 | | #include "sieve-commands.h" |
19 | | #include "sieve-validator.h" |
20 | | #include "sieve-interpreter.h" |
21 | | #include "sieve-comparators.h" |
22 | | #include "sieve-match-types.h" |
23 | | #include "sieve-match.h" |
24 | | |
25 | | #include "ext-regex-common.h" |
26 | | #include "dregex.h" |
27 | | |
28 | | #include <sys/types.h> |
29 | | #include <ctype.h> |
30 | | |
31 | | /* |
32 | | * Configuration |
33 | | */ |
34 | | |
35 | | #define MCHT_REGEX_MAX_SUBSTITUTIONS SIEVE_MAX_MATCH_VALUES |
36 | | |
37 | | /* |
38 | | * Match type |
39 | | */ |
40 | | |
41 | | static bool |
42 | | mcht_regex_validate_context(struct sieve_validator *valdtr, |
43 | | struct sieve_ast_argument *arg, |
44 | | struct sieve_match_type_context *ctx, |
45 | | struct sieve_ast_argument *key_arg); |
46 | | |
47 | | static void mcht_regex_match_init(struct sieve_match_context *mctx); |
48 | | static int |
49 | | mcht_regex_match_keys(struct sieve_match_context *mctx, |
50 | | const char *val, size_t val_size, |
51 | | struct sieve_stringlist *key_list); |
52 | | static void mcht_regex_match_deinit(struct sieve_match_context *mctx); |
53 | | |
54 | | const struct sieve_match_type_def regex_match_type = { |
55 | | SIEVE_OBJECT("regex", ®ex_match_type_operand, 0), |
56 | | .validate_context = mcht_regex_validate_context, |
57 | | .match_init = mcht_regex_match_init, |
58 | | .match_keys = mcht_regex_match_keys, |
59 | | .match_deinit = mcht_regex_match_deinit, |
60 | | }; |
61 | | |
62 | | /* |
63 | | * Match type validation |
64 | | */ |
65 | | |
66 | | static int |
67 | | mcht_regex_validate_regexp(struct sieve_validator *valdtr, |
68 | | struct sieve_match_type_context *mtctx ATTR_UNUSED, |
69 | | struct sieve_ast_argument *key, int cflags) |
70 | 0 | { |
71 | 0 | int ret; |
72 | 0 | const char *dregex_str = sieve_ast_argument_strc(key); |
73 | 0 | const char *error; |
74 | |
|
75 | 0 | struct dregex_code *code = dregex_code_create(); |
76 | 0 | ret = dregex_code_compile(code, dregex_str, cflags, &error); |
77 | 0 | dregex_code_free(&code); |
78 | |
|
79 | 0 | if (ret != 0) { |
80 | 0 | sieve_argument_validate_error( |
81 | 0 | valdtr, key, |
82 | 0 | "invalid regular expression '%s' for regex match: %s", |
83 | 0 | str_sanitize(dregex_str, 128), |
84 | 0 | error); |
85 | 0 | return -1; |
86 | 0 | } |
87 | | |
88 | 0 | return 1; |
89 | 0 | } |
90 | | |
91 | | struct _regex_key_context { |
92 | | struct sieve_validator *valdtr; |
93 | | struct sieve_match_type_context *mtctx; |
94 | | int cflags; |
95 | | }; |
96 | | |
97 | | static int |
98 | | mcht_regex_validate_key_argument(void *context, struct sieve_ast_argument *key) |
99 | 0 | { |
100 | 0 | struct _regex_key_context *keyctx = (struct _regex_key_context *)context; |
101 | | |
102 | | /* FIXME: We can currently only handle string literal argument, so |
103 | | variables are not allowed. |
104 | | */ |
105 | 0 | if (sieve_argument_is_string_literal(key)) { |
106 | 0 | return mcht_regex_validate_regexp(keyctx->valdtr, keyctx->mtctx, |
107 | 0 | key, keyctx->cflags); |
108 | 0 | } |
109 | 0 | return 1; |
110 | 0 | } |
111 | | |
112 | | static bool |
113 | | mcht_regex_validate_context(struct sieve_validator *valdtr, |
114 | | struct sieve_ast_argument *arg ATTR_UNUSED, |
115 | | struct sieve_match_type_context *mtctx, |
116 | | struct sieve_ast_argument *key_arg) |
117 | 0 | { |
118 | 0 | const struct sieve_comparator *cmp = mtctx->comparator; |
119 | 0 | int cflags = DREGEX_NOSUB; |
120 | 0 | struct _regex_key_context keyctx; |
121 | 0 | struct sieve_ast_argument *kitem; |
122 | |
|
123 | 0 | if (cmp != NULL) { |
124 | 0 | if (sieve_comparator_is(cmp, i_ascii_casemap_comparator)) |
125 | 0 | cflags |= DREGEX_ICASE | DREGEX_ASCII_ONLY; |
126 | 0 | else if (sieve_comparator_is(cmp, i_octet_comparator)) |
127 | 0 | cflags |= DREGEX_ASCII_ONLY; |
128 | 0 | else if (sieve_comparator_is(cmp, i_unicode_casemap_comparator)) |
129 | 0 | cflags |= DREGEX_ICASE; |
130 | 0 | else { |
131 | 0 | sieve_argument_validate_error( |
132 | 0 | valdtr, mtctx->argument, |
133 | 0 | "regex match type only supports " |
134 | 0 | "i;octet, i;ascii-casemap and i;unicode-casemap comparators"); |
135 | 0 | return FALSE; |
136 | 0 | } |
137 | 0 | } |
138 | | |
139 | | /* Validate regular expression keys */ |
140 | | |
141 | 0 | keyctx.valdtr = valdtr; |
142 | 0 | keyctx.mtctx = mtctx; |
143 | 0 | keyctx.cflags = cflags; |
144 | |
|
145 | 0 | kitem = key_arg; |
146 | 0 | if (sieve_ast_stringlist_map(&kitem, &keyctx, |
147 | 0 | mcht_regex_validate_key_argument) <= 0) |
148 | 0 | return FALSE; |
149 | | |
150 | 0 | return TRUE; |
151 | 0 | } |
152 | | |
153 | | /* |
154 | | * Match type implementation |
155 | | */ |
156 | | |
157 | | struct mcht_regex_key { |
158 | | struct dregex_code *regexp; |
159 | | int status; |
160 | | }; |
161 | | |
162 | | struct mcht_regex_context { |
163 | | ARRAY(struct mcht_regex_key) reg_expressions; |
164 | | ARRAY_TYPE(const_string) pmatch; |
165 | | bool all_compiled:1; |
166 | | bool capture_groups; |
167 | | }; |
168 | | |
169 | | static void mcht_regex_match_init(struct sieve_match_context *mctx) |
170 | 0 | { |
171 | 0 | pool_t pool = mctx->pool; |
172 | 0 | struct mcht_regex_context *ctx; |
173 | | |
174 | | /* Create context */ |
175 | 0 | ctx = p_new(pool, struct mcht_regex_context, 1); |
176 | | |
177 | | /* Create storage for match values if match values are requested */ |
178 | 0 | ctx->capture_groups = sieve_match_values_are_enabled(mctx->runenv); |
179 | | |
180 | | /* Assign context */ |
181 | 0 | mctx->data = ctx; |
182 | 0 | } |
183 | | |
184 | | static int |
185 | | mcht_regex_match_key(struct sieve_match_context *mctx, const char *val, |
186 | | struct dregex_code *code) |
187 | 0 | { |
188 | 0 | struct mcht_regex_context *ctx = |
189 | 0 | (struct mcht_regex_context *)mctx->data; |
190 | 0 | const char *error; |
191 | 0 | int ret; |
192 | 0 | ARRAY_TYPE(const_string) pmatch; |
193 | 0 | if (ctx->capture_groups) |
194 | 0 | t_array_init(&pmatch, 8); |
195 | | |
196 | | /* Execute regex */ |
197 | |
|
198 | 0 | if (!ctx->capture_groups) |
199 | 0 | ret = dregex_code_match(code, val, &error); |
200 | 0 | else |
201 | 0 | ret = dregex_code_match_groups(code, val, &pmatch, &error); |
202 | | |
203 | | /* Handle match values if necessary */ |
204 | |
|
205 | 0 | if (ret > 0) { |
206 | 0 | if (ctx->capture_groups && array_count(&pmatch) > 0) { |
207 | 0 | struct sieve_match_values *mvalues; |
208 | 0 | string_t *subst = t_str_new(32); |
209 | 0 | const char *mvalue; |
210 | | |
211 | | /* Start new list of match values */ |
212 | 0 | mvalues = sieve_match_values_start(mctx->runenv); |
213 | |
|
214 | 0 | i_assert(mvalues != NULL); |
215 | | |
216 | 0 | array_foreach_elem(&pmatch, mvalue) { |
217 | 0 | str_append(subst, mvalue); |
218 | 0 | sieve_match_values_add(mvalues, subst); |
219 | 0 | str_truncate(subst, 0); |
220 | 0 | } |
221 | | |
222 | | /* Substitute the new match values */ |
223 | 0 | sieve_match_values_commit(mctx->runenv, &mvalues); |
224 | 0 | } |
225 | 0 | return 1; |
226 | 0 | } |
227 | 0 | return 0; |
228 | 0 | } |
229 | | |
230 | | static int |
231 | | mcht_regex_match_keys(struct sieve_match_context *mctx, |
232 | | const char *val, size_t val_size ATTR_UNUSED, |
233 | | struct sieve_stringlist *key_list) |
234 | 0 | { |
235 | 0 | const struct sieve_runtime_env *renv = mctx->runenv; |
236 | 0 | bool trace = sieve_runtime_trace_active(renv, SIEVE_TRLVL_MATCHING); |
237 | 0 | struct mcht_regex_context *ctx = |
238 | 0 | (struct mcht_regex_context *)mctx->data; |
239 | 0 | const struct sieve_comparator *cmp = mctx->comparator; |
240 | 0 | int match; |
241 | |
|
242 | 0 | if (!ctx->all_compiled) { |
243 | 0 | string_t *key_item = NULL; |
244 | 0 | unsigned int i; |
245 | 0 | int ret; |
246 | | |
247 | | /* Regular expressions still need to be compiled */ |
248 | |
|
249 | 0 | if (!array_is_created(&ctx->reg_expressions)) |
250 | 0 | p_array_init(&ctx->reg_expressions, mctx->pool, 16); |
251 | |
|
252 | 0 | i = 0; |
253 | 0 | match = 0; |
254 | 0 | while (match == 0 && |
255 | 0 | (ret = sieve_stringlist_next_item(key_list, &key_item)) > 0) { |
256 | |
|
257 | 0 | T_BEGIN { |
258 | 0 | struct mcht_regex_key *rkey; |
259 | |
|
260 | 0 | if (i >= array_count(&ctx->reg_expressions)) { |
261 | 0 | int cflags = 0; |
262 | |
|
263 | 0 | rkey = array_append_space(&ctx->reg_expressions); |
264 | | |
265 | | /* Configure case-sensitivity according to comparator */ |
266 | 0 | if (sieve_comparator_is(cmp, i_octet_comparator)) |
267 | 0 | cflags |= DREGEX_ASCII_ONLY; |
268 | 0 | else if (sieve_comparator_is(cmp, i_ascii_casemap_comparator)) |
269 | 0 | cflags |= (DREGEX_ICASE | DREGEX_ASCII_ONLY); |
270 | 0 | else if (sieve_comparator_is(cmp, i_unicode_casemap_comparator)) |
271 | 0 | cflags |= DREGEX_ICASE; |
272 | 0 | else |
273 | 0 | rkey->status = -1; /* Not supported */ |
274 | |
|
275 | 0 | if (rkey->status >= 0) { |
276 | 0 | const char *dregex_str = str_c(key_item); |
277 | 0 | const char *error; |
278 | 0 | int rxret; |
279 | | |
280 | | /* Indicate whether match values need to be produced */ |
281 | 0 | if (!ctx->capture_groups) |
282 | 0 | cflags |= DREGEX_NOSUB; |
283 | |
|
284 | 0 | struct dregex_code *code = dregex_code_create(); |
285 | | /* Compile regular expression */ |
286 | 0 | rxret = dregex_code_compile(code, dregex_str, cflags, &error); |
287 | 0 | if (rxret != 0) { |
288 | 0 | sieve_runtime_error(renv, NULL, |
289 | 0 | "invalid regular expression '%s' for regex match: %s", |
290 | 0 | str_sanitize(dregex_str, 128), |
291 | 0 | error); |
292 | 0 | rkey->status = -1; |
293 | 0 | dregex_code_free(&code); |
294 | 0 | } else { |
295 | 0 | rkey->status = 1; |
296 | 0 | rkey->regexp = code; |
297 | 0 | } |
298 | 0 | } |
299 | 0 | } else { |
300 | 0 | rkey = array_idx_modifiable(&ctx->reg_expressions, i); |
301 | 0 | } |
302 | |
|
303 | 0 | if (rkey->status > 0) { |
304 | 0 | match = mcht_regex_match_key( |
305 | 0 | mctx, val, rkey->regexp); |
306 | |
|
307 | 0 | if (trace) { |
308 | 0 | sieve_runtime_trace(renv, 0, |
309 | 0 | "with regex '%s' [id=%u] => %d", |
310 | 0 | str_sanitize(str_c(key_item), 80), |
311 | 0 | i, match); |
312 | 0 | } |
313 | 0 | } |
314 | 0 | } T_END; |
315 | | |
316 | 0 | i++; |
317 | 0 | } |
318 | | |
319 | 0 | if (ret == 0) { |
320 | 0 | ctx->all_compiled = TRUE; |
321 | 0 | } else if (ret < 0) { |
322 | 0 | mctx->exec_status = key_list->exec_status; |
323 | 0 | match = -1; |
324 | 0 | } |
325 | |
|
326 | 0 | } else { |
327 | 0 | const struct mcht_regex_key *rkeys; |
328 | 0 | unsigned int i, count; |
329 | | |
330 | | /* Regular expressions are compiled */ |
331 | |
|
332 | 0 | rkeys = array_get(&ctx->reg_expressions, &count); |
333 | |
|
334 | 0 | i = 0; |
335 | 0 | match = 0; |
336 | 0 | while (match == 0 && i < count) { |
337 | 0 | if (rkeys[i].status > 0) { |
338 | 0 | match = mcht_regex_match_key( |
339 | 0 | mctx, val, rkeys[i].regexp); |
340 | |
|
341 | 0 | if (trace) { |
342 | 0 | sieve_runtime_trace(renv, 0, |
343 | 0 | "with compiled regex [id=%u] => %d", |
344 | 0 | i, match); |
345 | 0 | } |
346 | 0 | } |
347 | |
|
348 | 0 | i++; |
349 | 0 | } |
350 | 0 | } |
351 | | |
352 | 0 | return match; |
353 | 0 | } |
354 | | |
355 | | void mcht_regex_match_deinit(struct sieve_match_context *mctx) |
356 | 0 | { |
357 | 0 | struct mcht_regex_context *ctx = |
358 | 0 | (struct mcht_regex_context *)mctx->data; |
359 | 0 | struct mcht_regex_key *rkeys; |
360 | 0 | unsigned int count, i; |
361 | | |
362 | | /* Clean up compiled regular expressions */ |
363 | 0 | if (array_is_created(&ctx->reg_expressions)) { |
364 | 0 | rkeys = array_get_modifiable(&ctx->reg_expressions, &count); |
365 | 0 | for (i = 0; i < count; i++) |
366 | 0 | dregex_code_free(&rkeys[i].regexp); |
367 | 0 | } |
368 | 0 | } |