/src/pigeonhole/src/lib-sieve/mcht-matches.c
Line | Count | Source |
1 | | /* Copyright (c) 2002-2018 Pigeonhole authors, see the included COPYING file |
2 | | */ |
3 | | |
4 | | /* Match-type ':matches' |
5 | | */ |
6 | | |
7 | | #include "lib.h" |
8 | | #include "str.h" |
9 | | |
10 | | #include "sieve-match-types.h" |
11 | | #include "sieve-comparators.h" |
12 | | #include "sieve-interpreter.h" |
13 | | #include "sieve-match.h" |
14 | | |
15 | | #include <string.h> |
16 | | #include <stdio.h> |
17 | | |
18 | | /* |
19 | | * Forward declarations |
20 | | */ |
21 | | |
22 | | static int |
23 | | mcht_matches_match_key(struct sieve_match_context *mctx, |
24 | | const char *val, size_t val_size, |
25 | | const char *key, size_t key_size); |
26 | | |
27 | | /* |
28 | | * Match-type object |
29 | | */ |
30 | | |
31 | | const struct sieve_match_type_def matches_match_type = { |
32 | | SIEVE_OBJECT("matches", &match_type_operand, SIEVE_MATCH_TYPE_MATCHES), |
33 | | .validate_context = sieve_match_substring_validate_context, |
34 | | .match_key = mcht_matches_match_key |
35 | | }; |
36 | | |
37 | | /* |
38 | | * Match-type implementation |
39 | | */ |
40 | | |
41 | | /* Quick 'n dirty debug */ |
42 | | //#define MATCH_DEBUG |
43 | | #ifdef MATCH_DEBUG |
44 | | #define debug_printf(...) printf ("match debug: " __VA_ARGS__) |
45 | | #else |
46 | | #define debug_printf(...) |
47 | | #endif |
48 | | |
49 | | /* FIXME: Naive implementation, substitute this with dovecot src/lib/str-find.c |
50 | | * |
51 | | * The inner loop polls the interpreter CPU time limit periodically so that a |
52 | | * single O(N*M) search on a large value cannot run for many times the |
53 | | * configured sieve_max_cpu_time. Returns 1 on match, 0 on exhaustion, or -1 |
54 | | * when the CPU time limit was exceeded (mctx->exec_status is set). |
55 | | */ |
56 | 0 | #define SIEVE_MATCHES_CPU_CHECK_INTERVAL 4096 |
57 | | |
58 | | static int |
59 | | _string_find(struct sieve_match_context *mctx, |
60 | | const struct sieve_comparator *cmp, |
61 | | const char **valp, const char *vend, |
62 | | const char **keyp, const char *kend, |
63 | | unsigned int *counter) |
64 | 0 | { |
65 | 0 | while ((*valp < vend) && (*keyp < kend)) { |
66 | 0 | if (!cmp->def->char_match(cmp, valp, vend, keyp, kend)) |
67 | 0 | (*valp)++; |
68 | |
|
69 | 0 | if (++(*counter) >= SIEVE_MATCHES_CPU_CHECK_INTERVAL) { |
70 | 0 | *counter = 0; |
71 | 0 | if (sieve_runtime_cpu_limit_exceeded(mctx->runenv)) { |
72 | 0 | sieve_runtime_error( |
73 | 0 | mctx->runenv, NULL, |
74 | 0 | "execution exceeded CPU time limit"); |
75 | 0 | mctx->exec_status = |
76 | 0 | SIEVE_EXEC_RESOURCE_LIMIT; |
77 | 0 | return -1; |
78 | 0 | } |
79 | 0 | } |
80 | 0 | } |
81 | | |
82 | 0 | return (*keyp == kend ? 1 : 0); |
83 | 0 | } |
84 | | |
85 | | static char |
86 | | _scan_key_section(string_t *section, const char **wcardp, const char *key_end) |
87 | 0 | { |
88 | | /* Find next wildcard and resolve escape sequences */ |
89 | 0 | str_truncate(section, 0); |
90 | 0 | while (*wcardp < key_end && **wcardp != '*' && **wcardp != '?') { |
91 | 0 | if (**wcardp == '\\') { |
92 | 0 | (*wcardp)++; |
93 | 0 | if (*wcardp == key_end) { |
94 | 0 | str_append_c(section,'\\'); |
95 | 0 | break; |
96 | 0 | } |
97 | 0 | } |
98 | 0 | str_append_c(section, **wcardp); |
99 | 0 | (*wcardp)++; |
100 | 0 | } |
101 | | |
102 | | /* Record wildcard character or \0 */ |
103 | 0 | if (*wcardp < key_end) { |
104 | 0 | return **wcardp; |
105 | 0 | } |
106 | | |
107 | 0 | i_assert(*wcardp == key_end); |
108 | 0 | return '\0'; |
109 | 0 | } |
110 | | |
111 | | static int |
112 | | mcht_matches_match_key(struct sieve_match_context *mctx, |
113 | | const char *val, size_t val_size, |
114 | | const char *key, size_t key_size) |
115 | 0 | { |
116 | 0 | const struct sieve_comparator *cmp = mctx->comparator; |
117 | 0 | struct sieve_match_values *mvalues; |
118 | 0 | string_t *mvalue = NULL, *mchars = NULL; |
119 | 0 | string_t *section, *subsection; |
120 | 0 | const char *vend, *kend, *vp, *kp, *wp, *pvp; |
121 | 0 | bool backtrack = FALSE; /* TRUE: match of '?'-connected sections failed |
122 | | */ |
123 | 0 | char wcard = '\0'; /* Current wildcard */ |
124 | 0 | char next_wcard = '\0'; /* Next widlcard */ |
125 | 0 | unsigned int key_offset = 0; |
126 | 0 | unsigned int counter = 0; |
127 | |
|
128 | 0 | if (cmp->def == NULL || cmp->def->char_match == NULL) |
129 | 0 | return 0; |
130 | | |
131 | | /* Key sections */ |
132 | 0 | section = t_str_new(32); /* Section (after beginning or *) */ |
133 | 0 | subsection = t_str_new(32); /* Sub-section (after ?) */ |
134 | | |
135 | | /* Mark end of value and key */ |
136 | 0 | vend = (const char *) val + val_size; |
137 | 0 | kend = (const char *) key + key_size; |
138 | | |
139 | | /* Initialize pointers */ |
140 | 0 | vp = val; /* Value pointer */ |
141 | 0 | kp = key; /* Key pointer */ |
142 | 0 | wp = key; /* Wildcard (key) pointer */ |
143 | | |
144 | | /* Start match values list if requested */ |
145 | 0 | if ((mvalues = sieve_match_values_start(mctx->runenv)) != NULL) { |
146 | | /* Skip ${0} for now; added when match succeeds */ |
147 | 0 | sieve_match_values_add(mvalues, NULL); |
148 | |
|
149 | 0 | mvalue = t_str_new(32); /* Match value (*) */ |
150 | 0 | mchars = t_str_new(32); /* Match characters (.?..?.??) */ |
151 | 0 | } |
152 | | |
153 | | /* Match the pattern: |
154 | | <pattern> = <section>*<section>*<section>... |
155 | | <section> = <sub-section>?<sub-section>?<sub-section>... |
156 | | |
157 | | Escape sequences \? and \* need special attention. |
158 | | */ |
159 | |
|
160 | 0 | debug_printf("=== Start ===\n"); |
161 | 0 | debug_printf(" key: %s\n", t_strdup_until(key, kend)); |
162 | 0 | debug_printf(" value: %s\n", t_strdup_until(val, vend)); |
163 | | |
164 | | /* Loop until either key or value ends */ |
165 | 0 | while (kp < kend && vp < vend) { |
166 | 0 | const char *needle, *nend; |
167 | |
|
168 | 0 | if (++counter >= SIEVE_MATCHES_CPU_CHECK_INTERVAL) { |
169 | 0 | counter = 0; |
170 | 0 | if (sieve_runtime_cpu_limit_exceeded(mctx->runenv)) { |
171 | 0 | sieve_runtime_error( |
172 | 0 | mctx->runenv, NULL, |
173 | 0 | "execution exceeded CPU time limit"); |
174 | 0 | mctx->exec_status = |
175 | 0 | SIEVE_EXEC_RESOURCE_LIMIT; |
176 | 0 | sieve_match_values_abort(&mvalues); |
177 | 0 | return -1; |
178 | 0 | } |
179 | 0 | } |
180 | | |
181 | 0 | if (!backtrack) { |
182 | | /* Search the next '*' wildcard in the key string */ |
183 | |
|
184 | 0 | wcard = next_wcard; |
185 | | |
186 | | /* Find the needle to look for in the string */ |
187 | 0 | key_offset = 0; |
188 | 0 | for (;;) { |
189 | 0 | next_wcard = |
190 | 0 | _scan_key_section(section, &wp, kend); |
191 | |
|
192 | 0 | if (wcard == '\0' || str_len(section) > 0) |
193 | 0 | break; |
194 | 0 | if (next_wcard == '*') |
195 | 0 | break; |
196 | | |
197 | 0 | if (wp < kend) |
198 | 0 | wp++; |
199 | 0 | else |
200 | 0 | break; |
201 | 0 | key_offset++; |
202 | 0 | } |
203 | |
|
204 | 0 | debug_printf("found wildcard '%c' at pos [%d]\n", |
205 | 0 | next_wcard, (int)(wp - key)); |
206 | |
|
207 | 0 | if (mvalues != NULL) |
208 | 0 | str_truncate(mvalue, 0); |
209 | 0 | } else { |
210 | | /* Backtracked; '*' wildcard is retained */ |
211 | 0 | debug_printf("backtracked"); |
212 | 0 | backtrack = FALSE; |
213 | 0 | } |
214 | | |
215 | | /* Determine what we are looking for */ |
216 | 0 | needle = str_c(section); |
217 | 0 | nend = PTR_OFFSET(needle, str_len(section)); |
218 | |
|
219 | 0 | debug_printf(" section needle: '%s'\n", |
220 | 0 | t_strdup_until(needle, nend)); |
221 | 0 | debug_printf(" section key: '%s'\n", |
222 | 0 | t_strdup_until(kp, kend)); |
223 | 0 | debug_printf(" section remnant: '%s'\n", |
224 | 0 | t_strdup_until(wp, kend)); |
225 | 0 | debug_printf(" value remnant: '%s'\n", |
226 | 0 | t_strdup_until(vp, vend)); |
227 | 0 | debug_printf(" key offset: %d\n", key_offset); |
228 | |
|
229 | 0 | pvp = vp; |
230 | 0 | if (next_wcard == '\0') { |
231 | 0 | if (wcard == '\0') { |
232 | | /* No current wildcard; match needs to happen |
233 | | right at the beginning */ |
234 | 0 | debug_printf("next_wcard = NULL && wcard = NUL; " |
235 | 0 | "needle should be equal to value.\n"); |
236 | |
|
237 | 0 | if ((vend - vp) != (nend - needle) || |
238 | 0 | !cmp->def->char_match(cmp, &vp, vend, &needle, nend)) { |
239 | 0 | debug_printf(" key not equal to value\n"); |
240 | 0 | break; |
241 | 0 | } |
242 | |
|
243 | 0 | } else { |
244 | 0 | const char *qp, *qend; |
245 | 0 | size_t slen; |
246 | | |
247 | | /* No more wildcards; find the needle substring |
248 | | at the end of string */ |
249 | 0 | debug_printf("next_wcard = NUL; " |
250 | 0 | "must find needle at end\n"); |
251 | | |
252 | | /* Check if the value is still large enough to |
253 | | contain both the '?'-matched prefix |
254 | | (key_offset chars) and the trailing section. |
255 | | */ |
256 | 0 | slen = str_len(section); |
257 | 0 | if ((vp + slen + key_offset) > vend) { |
258 | 0 | debug_printf(" wont match: " |
259 | 0 | "value is too short\n"); |
260 | 0 | break; |
261 | 0 | } |
262 | | |
263 | | /* Move value pointer to where the needle should |
264 | | be */ |
265 | 0 | vp = vend - slen; |
266 | | |
267 | | /* Record match values */ |
268 | 0 | qend = vp; |
269 | 0 | qp = vp - key_offset; |
270 | | |
271 | | /* Compare needle to end of value string */ |
272 | 0 | if (!cmp->def->char_match(cmp, &vp, vend, |
273 | 0 | &needle, nend)) { |
274 | 0 | debug_printf(" match at end failed\n"); |
275 | 0 | break; |
276 | 0 | } |
277 | | |
278 | | /* Add match values */ |
279 | 0 | if (mvalues != NULL) { |
280 | 0 | i_assert(qp >= pvp); |
281 | 0 | str_append_data(mvalue, pvp, qp - pvp); |
282 | | |
283 | | /* Append '*' match value */ |
284 | 0 | sieve_match_values_add(mvalues, mvalue); |
285 | | |
286 | | /* Append any initial '?' match values |
287 | | */ |
288 | 0 | for (; qp < qend; qp++) { |
289 | 0 | sieve_match_values_add_char( |
290 | 0 | mvalues, *qp); |
291 | 0 | } |
292 | 0 | } |
293 | 0 | } |
294 | | |
295 | | /* Finish match */ |
296 | 0 | kp = kend; |
297 | 0 | vp = vend; |
298 | |
|
299 | 0 | debug_printf(" matched end of value\n"); |
300 | 0 | break; |
301 | 0 | } else { |
302 | | /* Next wildcard found; match needle before next |
303 | | wildcard */ |
304 | | |
305 | | /* Stored value pointer for backtrack */ |
306 | 0 | const char *prv = NULL; |
307 | | /* Stored key pointer for backtrack */ |
308 | 0 | const char *prk = NULL; |
309 | | /* Stored wildcard pointer for backtrack */ |
310 | 0 | const char *prw = NULL; |
311 | 0 | const char *chars; |
312 | | |
313 | | /* Reset '?' match values */ |
314 | 0 | if (mvalues != NULL) |
315 | 0 | str_truncate(mchars, 0); |
316 | |
|
317 | 0 | if (wcard == '\0') { |
318 | | /* No current wildcard; match needs to happen |
319 | | right at the beginning */ |
320 | 0 | debug_printf("wcard = NUL; " |
321 | 0 | "needle should be found at the beginning.\n"); |
322 | 0 | debug_printf(" begin needle: '%s'\n", |
323 | 0 | t_strdup_until(needle, nend)); |
324 | 0 | debug_printf(" begin value: '%s'\n", |
325 | 0 | t_strdup_until(vp, vend)); |
326 | |
|
327 | 0 | if (!cmp->def->char_match(cmp, &vp, vend, &needle, nend)) { |
328 | 0 | debug_printf(" failed to find needle at beginning\n"); |
329 | 0 | break; |
330 | 0 | } |
331 | |
|
332 | 0 | } else { |
333 | | /* Current wildcard present; match needle |
334 | | between current and next wildcard */ |
335 | 0 | debug_printf("wcard != NUL; " |
336 | 0 | "must find needle at an offset (>= %d).\n", |
337 | 0 | key_offset); |
338 | | |
339 | | /* Match may happen at any offset |
340 | | (>= key offset): find substring */ |
341 | 0 | vp += key_offset; |
342 | 0 | if (vp >= vend) { |
343 | 0 | debug_printf(" failed to find needle at an offset\n"); |
344 | 0 | break; |
345 | 0 | } |
346 | 0 | int fres = _string_find(mctx, cmp, &vp, vend, |
347 | 0 | &needle, nend, &counter); |
348 | 0 | if (fres < 0) { |
349 | 0 | sieve_match_values_abort(&mvalues); |
350 | 0 | return -1; |
351 | 0 | } |
352 | 0 | if (fres == 0) { |
353 | 0 | debug_printf(" failed to find needle at an offset\n"); |
354 | 0 | break; |
355 | 0 | } |
356 | | |
357 | 0 | prv = vp - str_len(section); |
358 | 0 | prk = kp; |
359 | 0 | prw = wp; |
360 | | |
361 | | /* Append match values */ |
362 | 0 | if (mvalues != NULL) { |
363 | 0 | const char *qend = vp - str_len(section); |
364 | 0 | const char *qp = qend - key_offset; |
365 | | |
366 | | /* Append '*' match value */ |
367 | 0 | str_append_data(mvalue, pvp, qp - pvp); |
368 | | |
369 | | /* Append any initial '?' match values |
370 | | (those that caused the key offset). |
371 | | */ |
372 | 0 | for (; qp < qend; qp++) |
373 | 0 | str_append_c(mchars, *qp); |
374 | 0 | } |
375 | 0 | } |
376 | | |
377 | | /* Update wildcard and key pointers for next wildcard |
378 | | scan */ |
379 | 0 | if (wp < kend) |
380 | 0 | wp++; |
381 | 0 | kp = wp; |
382 | | |
383 | | /* Scan successive '?' wildcards */ |
384 | 0 | while (next_wcard == '?') { |
385 | 0 | bool match_failed_empty = FALSE; |
386 | |
|
387 | 0 | debug_printf("next_wcard = '?'; " |
388 | 0 | "need to match arbitrary character\n"); |
389 | |
|
390 | 0 | i_assert(vp <= vend); |
391 | 0 | if (vp == vend) |
392 | 0 | match_failed_empty = TRUE; |
393 | 0 | else { |
394 | | /* Add match value */ |
395 | 0 | if (mvalues != NULL) |
396 | 0 | str_append_c(mchars, *vp); |
397 | |
|
398 | 0 | vp++; |
399 | | |
400 | | /* Scan for next '?' wildcard */ |
401 | 0 | next_wcard = _scan_key_section(subsection, &wp, kend); |
402 | 0 | debug_printf("found next wildcard '%c' at pos [%d] " |
403 | 0 | "(fixed match)\n", |
404 | 0 | next_wcard, (int)(wp - key)); |
405 | | |
406 | | /* Determine what we are looking for */ |
407 | 0 | needle = str_c(subsection); |
408 | 0 | nend = needle + str_len(subsection); |
409 | |
|
410 | 0 | debug_printf(" sub key: '%s'\n", |
411 | 0 | t_strdup_until(needle, nend)); |
412 | 0 | debug_printf(" value remnant: '%s'\n", |
413 | 0 | t_strdup_until(vp, vend)); |
414 | 0 | } |
415 | | |
416 | | /* Try matching the needle at fixed position */ |
417 | 0 | if (match_failed_empty || |
418 | 0 | (needle == nend && next_wcard == '\0' && vp < vend) || |
419 | 0 | !cmp->def->char_match(cmp, &vp, vend, &needle, nend)) { |
420 | | |
421 | | /* Match failed: now we have a problem. We need to |
422 | | backtrack to the previous '*' wildcard occurrence |
423 | | and start scanning for the next possible match. |
424 | | */ |
425 | |
|
426 | 0 | debug_printf(" failed fixed match\n"); |
427 | | |
428 | | /* Start backtrack */ |
429 | 0 | if (prv != NULL && prv + 1 < vend) { |
430 | | /* Restore pointers */ |
431 | 0 | vp = prv; |
432 | 0 | kp = prk; |
433 | 0 | wp = prw; |
434 | | |
435 | | /* Skip forward one value character to scan |
436 | | the next possible match */ |
437 | 0 | if (mvalues != NULL) |
438 | 0 | str_append_c(mvalue, *vp); |
439 | 0 | vp++; |
440 | | |
441 | | /* Set wildcard state appropriately */ |
442 | 0 | wcard = '*'; |
443 | 0 | next_wcard = '?'; |
444 | | |
445 | | /* Backtrack */ |
446 | 0 | backtrack = TRUE; |
447 | |
|
448 | 0 | debug_printf(" BACKTRACK\n"); |
449 | 0 | } |
450 | | |
451 | | /* Break '?' wildcard scanning loop */ |
452 | 0 | break; |
453 | 0 | } |
454 | | |
455 | | /* Update wildcard and key pointers for next wildcard scan */ |
456 | 0 | if (wp < kend) |
457 | 0 | wp++; |
458 | 0 | kp = wp; |
459 | 0 | } |
460 | | |
461 | 0 | if (!backtrack) { |
462 | 0 | unsigned int i; |
463 | |
|
464 | 0 | if (next_wcard == '?') { |
465 | 0 | debug_printf("failed to match '?'\n"); |
466 | 0 | break; |
467 | 0 | } |
468 | | |
469 | 0 | if (mvalues != NULL) { |
470 | 0 | if (prv != NULL) |
471 | 0 | sieve_match_values_add(mvalues, mvalue); |
472 | |
|
473 | 0 | chars = (const char *) str_data(mchars); |
474 | |
|
475 | 0 | for (i = 0; i < str_len(mchars); i++) |
476 | 0 | sieve_match_values_add_char(mvalues, chars[i]); |
477 | 0 | } |
478 | |
|
479 | 0 | if (next_wcard != '*') { |
480 | 0 | debug_printf("failed to match at end of string\n"); |
481 | 0 | break; |
482 | 0 | } |
483 | 0 | } |
484 | 0 | } |
485 | | |
486 | | /* Check whether string ends in a wildcard |
487 | | (avoid scanning the rest of the string) |
488 | | */ |
489 | 0 | if (kp == kend && next_wcard == '*') { |
490 | | /* Add the rest of the string as match value */ |
491 | 0 | if (mvalues != NULL) { |
492 | 0 | str_truncate(mvalue, 0); |
493 | 0 | i_assert(vend >= vp); |
494 | 0 | str_append_data(mvalue, vp, vend - vp); |
495 | 0 | sieve_match_values_add(mvalues, mvalue); |
496 | 0 | } |
497 | | |
498 | | /* Finish match */ |
499 | 0 | kp = kend; |
500 | 0 | vp = vend; |
501 | |
|
502 | 0 | debug_printf("key ends with '*'\n"); |
503 | 0 | break; |
504 | 0 | } |
505 | | |
506 | 0 | debug_printf("== Loop ==\n"); |
507 | 0 | } |
508 | | |
509 | | /* Eat away a trailing series of *s */ |
510 | 0 | if (vp == vend) { |
511 | 0 | while (kp < kend && *kp == '*') |
512 | 0 | kp++; |
513 | 0 | } |
514 | | |
515 | | /* By definition, the match is only successful if both value and key |
516 | | pattern are exhausted and we're not still trying to match '?' while |
517 | | the value is empty. |
518 | | */ |
519 | 0 | bool matched = (kp == kend && vp == vend && next_wcard != '?'); |
520 | |
|
521 | 0 | debug_printf("=== Finish ===\n"); |
522 | 0 | debug_printf(" result: %s\n", matched ? "true" : "false"); |
523 | |
|
524 | 0 | if (matched) { |
525 | | /* Activate new match values after successful match */ |
526 | 0 | if (mvalues != NULL) { |
527 | | /* Set ${0} */ |
528 | 0 | string_t *matched = str_new_const( |
529 | 0 | pool_datastack_create(), val, val_size); |
530 | 0 | sieve_match_values_set(mvalues, 0, matched); |
531 | | |
532 | | /* Commit new match values */ |
533 | 0 | sieve_match_values_commit(mctx->runenv, &mvalues); |
534 | 0 | } |
535 | 0 | return 1; |
536 | 0 | } |
537 | | |
538 | | /* No match; drop collected match values */ |
539 | 0 | sieve_match_values_abort(&mvalues); |
540 | 0 | return 0; |
541 | 0 | } |