/src/dovecot/src/lib-imap/imap-match.c
Line | Count | Source |
1 | | /* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ |
2 | | |
3 | | /* imap_match_init() logic originates from Cyrus, but the code is fully |
4 | | rewritten. */ |
5 | | |
6 | | #include "lib.h" |
7 | | #include "array.h" |
8 | | #include "unichar.h" |
9 | | #include "imap-match.h" |
10 | | |
11 | | #include <ctype.h> |
12 | | |
13 | | struct imap_match_pattern { |
14 | | const char *pattern; |
15 | | bool inboxcase; |
16 | | }; |
17 | | |
18 | | struct imap_match_glob { |
19 | | pool_t pool; |
20 | | |
21 | | struct imap_match_pattern *patterns; |
22 | | |
23 | | char sep; |
24 | | char patterns_data[FLEXIBLE_ARRAY_MEMBER]; |
25 | | }; |
26 | | |
27 | | struct imap_match_context { |
28 | | const char *inboxcase_end; |
29 | | |
30 | | char sep; |
31 | | bool inboxcase; |
32 | | }; |
33 | | |
34 | | /* name of "INBOX" - must not have repeated substrings */ |
35 | | static const char inbox[] = "INBOX"; |
36 | 0 | #define INBOXLEN (sizeof(inbox) - 1) |
37 | | |
38 | | struct imap_match_glob * |
39 | | imap_match_init(pool_t pool, const char *pattern, |
40 | | bool inboxcase, char separator) |
41 | 0 | { |
42 | 0 | const char *patterns[2]; |
43 | |
|
44 | 0 | patterns[0] = pattern; |
45 | 0 | patterns[1] = NULL; |
46 | 0 | return imap_match_init_multiple(pool, patterns, inboxcase, separator); |
47 | 0 | } |
48 | | |
49 | | static const char *pattern_compress(const char *pattern) |
50 | 0 | { |
51 | 0 | char *dest, *ret; |
52 | |
|
53 | 0 | dest = ret = t_strdup_noconst(pattern); |
54 | | |
55 | | /* @UNSAFE: compress the pattern */ |
56 | 0 | while (*pattern != '\0') { |
57 | 0 | if (*pattern == '*' || *pattern == '%') { |
58 | | /* remove duplicate hierarchy wildcards */ |
59 | 0 | while (*pattern == '%') pattern++; |
60 | | |
61 | | /* "%*" -> "*" */ |
62 | 0 | if (*pattern == '*') { |
63 | | /* remove duplicate wildcards */ |
64 | 0 | while (*pattern == '*' || *pattern == '%') |
65 | 0 | pattern++; |
66 | 0 | *dest++ = '*'; |
67 | 0 | } else { |
68 | 0 | *dest++ = '%'; |
69 | 0 | } |
70 | 0 | } else { |
71 | 0 | *dest++ = *pattern++; |
72 | 0 | } |
73 | 0 | } |
74 | 0 | *dest = '\0'; |
75 | 0 | return ret; |
76 | 0 | } |
77 | | |
78 | | static bool pattern_is_inboxcase(const char *pattern, char separator) |
79 | 0 | { |
80 | 0 | const char *p = pattern, *inboxp = inbox; |
81 | | |
82 | | /* skip over exact matches */ |
83 | 0 | while (*inboxp == i_toupper(*p) && *p != '\0') { |
84 | 0 | inboxp++; p++; |
85 | 0 | } |
86 | 0 | if (*p != '%') { |
87 | 0 | return *p == '*' || *p == separator || |
88 | 0 | (*inboxp == '\0' && *p == '\0'); |
89 | 0 | } |
90 | | |
91 | | /* handle 'I%B%X' style checks */ |
92 | 0 | for (; *p != '\0' && *p != '*' && *p != separator; p++) { |
93 | 0 | if (*p != '%') { |
94 | 0 | inboxp = strchr(inboxp, i_toupper(*p)); |
95 | 0 | if (inboxp == NULL) |
96 | 0 | return FALSE; |
97 | | |
98 | 0 | if (*++inboxp == '\0') { |
99 | | /* now check that it doesn't end with |
100 | | any invalid chars */ |
101 | 0 | if (*++p == '%') p++; |
102 | 0 | if (*p != '\0' && *p != '*' && |
103 | 0 | *p != separator) |
104 | 0 | return FALSE; |
105 | 0 | break; |
106 | 0 | } |
107 | 0 | } |
108 | 0 | } |
109 | 0 | return TRUE; |
110 | 0 | } |
111 | | |
112 | | static struct imap_match_glob * |
113 | | imap_match_init_multiple_real(pool_t pool, const char *const *patterns, |
114 | | bool inboxcase, char separator) |
115 | 0 | { |
116 | 0 | struct imap_match_glob *glob; |
117 | 0 | struct imap_match_pattern *match_patterns; |
118 | 0 | unsigned int i, patterns_count; |
119 | 0 | size_t len, pos, patterns_data_len = 0; |
120 | |
|
121 | 0 | patterns_count = str_array_length(patterns); |
122 | 0 | match_patterns = p_new(pool, struct imap_match_pattern, |
123 | 0 | patterns_count + 1); |
124 | | |
125 | | /* compress the patterns */ |
126 | 0 | for (i = 0; i < patterns_count; i++) { |
127 | 0 | match_patterns[i].pattern = pattern_compress(patterns[i]); |
128 | 0 | match_patterns[i].inboxcase = inboxcase && |
129 | 0 | pattern_is_inboxcase(match_patterns[i].pattern, |
130 | 0 | separator); |
131 | |
|
132 | 0 | patterns_data_len += strlen(match_patterns[i].pattern) + 1; |
133 | 0 | } |
134 | 0 | patterns_count = i; |
135 | | |
136 | | /* now we know how much memory we need */ |
137 | 0 | size_t glob_alloc_size = |
138 | 0 | MALLOC_ADD(sizeof(struct imap_match_glob), patterns_data_len); |
139 | 0 | glob = p_malloc(pool, glob_alloc_size); |
140 | 0 | glob->pool = pool; |
141 | 0 | glob->sep = separator; |
142 | | |
143 | | /* copy pattern strings to our allocated memory */ |
144 | 0 | for (i = 0, pos = 0; i < patterns_count; i++) { |
145 | 0 | len = strlen(match_patterns[i].pattern) + 1; |
146 | 0 | i_assert(pos + len <= patterns_data_len); |
147 | | |
148 | | /* @UNSAFE */ |
149 | 0 | memcpy(glob->patterns_data + pos, |
150 | 0 | match_patterns[i].pattern, len); |
151 | 0 | match_patterns[i].pattern = glob->patterns_data + pos; |
152 | 0 | pos += len; |
153 | 0 | } |
154 | 0 | glob->patterns = match_patterns; |
155 | 0 | return glob; |
156 | 0 | } |
157 | | |
158 | | struct imap_match_glob * |
159 | | imap_match_init_multiple(pool_t pool, const char *const *patterns, |
160 | | bool inboxcase, char separator) |
161 | 0 | { |
162 | 0 | struct imap_match_glob *glob; |
163 | |
|
164 | 0 | if (pool->datastack_pool) { |
165 | 0 | return imap_match_init_multiple_real(pool, patterns, |
166 | 0 | inboxcase, separator); |
167 | 0 | } |
168 | 0 | T_BEGIN { |
169 | 0 | glob = imap_match_init_multiple_real(pool, patterns, |
170 | 0 | inboxcase, separator); |
171 | 0 | } T_END; |
172 | 0 | return glob; |
173 | 0 | } |
174 | | |
175 | | void imap_match_deinit(struct imap_match_glob **glob) |
176 | 0 | { |
177 | 0 | if (glob == NULL || *glob == NULL) |
178 | 0 | return; |
179 | 0 | p_free((*glob)->pool, (*glob)->patterns); |
180 | 0 | p_free((*glob)->pool, *glob); |
181 | 0 | *glob = NULL; |
182 | 0 | } |
183 | | |
184 | | static struct imap_match_glob * |
185 | | imap_match_dup_real(pool_t pool, const struct imap_match_glob *glob) |
186 | 0 | { |
187 | 0 | ARRAY_TYPE(const_string) patterns; |
188 | 0 | const struct imap_match_pattern *p; |
189 | 0 | bool inboxcase = FALSE; |
190 | |
|
191 | 0 | t_array_init(&patterns, 8); |
192 | 0 | for (p = glob->patterns; p->pattern != NULL; p++) { |
193 | 0 | if (p->inboxcase) |
194 | 0 | inboxcase = TRUE; |
195 | 0 | array_push_back(&patterns, &p->pattern); |
196 | 0 | } |
197 | 0 | array_append_zero(&patterns); |
198 | 0 | return imap_match_init_multiple_real(pool, array_front(&patterns), |
199 | 0 | inboxcase, glob->sep); |
200 | 0 | } |
201 | | |
202 | | struct imap_match_glob * |
203 | | imap_match_dup(pool_t pool, const struct imap_match_glob *glob) |
204 | 0 | { |
205 | 0 | struct imap_match_glob *new_glob; |
206 | |
|
207 | 0 | if (pool->datastack_pool) { |
208 | 0 | return imap_match_dup_real(pool, glob); |
209 | 0 | } else { |
210 | 0 | T_BEGIN { |
211 | 0 | new_glob = imap_match_dup_real(pool, glob); |
212 | 0 | } T_END; |
213 | 0 | return new_glob; |
214 | 0 | } |
215 | 0 | } |
216 | | |
217 | | bool imap_match_globs_equal(const struct imap_match_glob *glob1, |
218 | | const struct imap_match_glob *glob2) |
219 | 0 | { |
220 | 0 | const struct imap_match_pattern *p1 = glob1->patterns; |
221 | 0 | const struct imap_match_pattern *p2 = glob2->patterns; |
222 | |
|
223 | 0 | if (glob1->sep != glob2->sep) |
224 | 0 | return FALSE; |
225 | | |
226 | 0 | for (; p1->pattern != NULL && p2->pattern != NULL; p1++, p2++) { |
227 | 0 | if (strcmp(p1->pattern, p2->pattern) != 0) |
228 | 0 | return FALSE; |
229 | 0 | if (p1->inboxcase != p2->inboxcase) |
230 | 0 | return FALSE; |
231 | 0 | } |
232 | 0 | return p1->pattern == p2->pattern; |
233 | 0 | } |
234 | | |
235 | | static inline bool |
236 | | match_gc(struct imap_match_context *ctx, |
237 | | struct uni_gc_scanner *gcsc_data, struct uni_gc_scanner *gcsc_pattern) |
238 | 0 | { |
239 | 0 | const unsigned char *pat_gc, *data_gc; |
240 | 0 | size_t pat_gc_size, data_gc_size; |
241 | |
|
242 | 0 | pat_gc = uni_gc_scan_get(gcsc_pattern, &pat_gc_size); |
243 | 0 | data_gc = uni_gc_scan_get(gcsc_data, &data_gc_size); |
244 | |
|
245 | 0 | if (pat_gc_size != data_gc_size) |
246 | 0 | return FALSE; |
247 | 0 | if (memcmp(data_gc, pat_gc, data_gc_size) == 0) |
248 | 0 | return TRUE; |
249 | 0 | if (data_gc_size != 1) |
250 | 0 | return FALSE; |
251 | 0 | return ((const char *)data_gc < ctx->inboxcase_end && |
252 | 0 | i_toupper(data_gc[0]) == i_toupper(pat_gc[0])); |
253 | 0 | } |
254 | | |
255 | | static enum imap_match_result |
256 | | match_sub(struct imap_match_context *ctx, |
257 | | struct uni_gc_scanner *gcsc_data_p, |
258 | | struct uni_gc_scanner *gcsc_pattern_p) |
259 | 0 | { |
260 | 0 | enum imap_match_result ret, match; |
261 | 0 | struct uni_gc_scanner gcsc_data = *gcsc_data_p; |
262 | 0 | struct uni_gc_scanner gcsc_pattern = *gcsc_pattern_p; |
263 | 0 | const unsigned char *pat_gc_prev = NULL, *data_gc_prev = NULL; |
264 | 0 | size_t pat_gc_prev_size = 0, data_gc_prev_size = 0; |
265 | | |
266 | | /* match all non-wildcards */ |
267 | 0 | while (!uni_gc_scan_at_end(&gcsc_pattern) && |
268 | 0 | !uni_gc_scan_ascii_equals(&gcsc_pattern, '*') && |
269 | 0 | !uni_gc_scan_ascii_equals(&gcsc_pattern, '%')) { |
270 | 0 | if (!match_gc(ctx, &gcsc_data, &gcsc_pattern)) { |
271 | 0 | if (!uni_gc_scan_at_end(&gcsc_data)) |
272 | 0 | return IMAP_MATCH_NO; |
273 | 0 | if (uni_gc_scan_ascii_equals(&gcsc_pattern, ctx->sep)) |
274 | 0 | return IMAP_MATCH_CHILDREN; |
275 | 0 | if (pat_gc_prev_size == 1 && |
276 | 0 | pat_gc_prev[0] == ctx->sep) { |
277 | | /* data="foo/" pattern = "foo/bar/%" */ |
278 | 0 | return IMAP_MATCH_CHILDREN; |
279 | 0 | } |
280 | 0 | return IMAP_MATCH_NO; |
281 | 0 | } |
282 | | |
283 | 0 | pat_gc_prev = uni_gc_scan_get(&gcsc_pattern, &pat_gc_prev_size); |
284 | 0 | data_gc_prev = uni_gc_scan_get(&gcsc_data, &data_gc_prev_size); |
285 | 0 | uni_gc_scan_shift(&gcsc_pattern); |
286 | 0 | uni_gc_scan_shift(&gcsc_data); |
287 | 0 | } |
288 | 0 | if (uni_gc_scan_at_end(&gcsc_data) && |
289 | 0 | data_gc_prev_size == 1 && data_gc_prev[0] == ctx->sep && |
290 | 0 | !uni_gc_scan_at_end(&gcsc_pattern)) { |
291 | | /* data="/" pattern="/%..." */ |
292 | 0 | match = IMAP_MATCH_CHILDREN; |
293 | 0 | } else { |
294 | 0 | match = IMAP_MATCH_NO; |
295 | 0 | } |
296 | 0 | while (uni_gc_scan_ascii_equals(&gcsc_pattern, '%')) { |
297 | 0 | uni_gc_scan_shift(&gcsc_pattern); |
298 | |
|
299 | 0 | if (uni_gc_scan_at_end(&gcsc_pattern)) { |
300 | | /* match, if this is the last hierarchy */ |
301 | 0 | while (!uni_gc_scan_at_end(&gcsc_data) && |
302 | 0 | !uni_gc_scan_ascii_equals(&gcsc_data, ctx->sep)) |
303 | 0 | uni_gc_scan_shift(&gcsc_data); |
304 | 0 | break; |
305 | 0 | } |
306 | | |
307 | | /* skip over this hierarchy */ |
308 | 0 | while (!uni_gc_scan_at_end(&gcsc_data)) { |
309 | 0 | if (match_gc(ctx, &gcsc_data, &gcsc_pattern)) { |
310 | 0 | ret = match_sub(ctx, &gcsc_data, |
311 | 0 | &gcsc_pattern); |
312 | 0 | if (ret == IMAP_MATCH_YES) |
313 | 0 | break; |
314 | | |
315 | 0 | match |= ret; |
316 | 0 | } |
317 | | |
318 | 0 | if (uni_gc_scan_ascii_equals(&gcsc_data, ctx->sep)) |
319 | 0 | break; |
320 | | |
321 | 0 | uni_gc_scan_shift(&gcsc_data); |
322 | 0 | } |
323 | 0 | } |
324 | |
|
325 | 0 | if (!uni_gc_scan_ascii_equals(&gcsc_pattern, '*')) { |
326 | 0 | if (uni_gc_scan_at_end(&gcsc_data) && |
327 | 0 | !uni_gc_scan_at_end(&gcsc_pattern)) { |
328 | 0 | if (uni_gc_scan_ascii_equals(&gcsc_pattern, |
329 | 0 | ctx->sep)) |
330 | 0 | match |= IMAP_MATCH_CHILDREN; |
331 | 0 | return match; |
332 | 0 | } |
333 | | |
334 | 0 | if (!uni_gc_scan_at_end(&gcsc_data)) { |
335 | 0 | if (uni_gc_scan_at_end(&gcsc_pattern) && |
336 | 0 | uni_gc_scan_ascii_equals(&gcsc_data, ctx->sep)) |
337 | 0 | match |= IMAP_MATCH_PARENT; |
338 | 0 | return match; |
339 | 0 | } |
340 | 0 | } |
341 | | |
342 | 0 | *gcsc_data_p = gcsc_data; |
343 | 0 | *gcsc_pattern_p = gcsc_pattern; |
344 | 0 | return IMAP_MATCH_YES; |
345 | 0 | } |
346 | | |
347 | | static enum imap_match_result |
348 | | imap_match_pattern(struct imap_match_context *ctx, |
349 | | const char *data, const char *pattern) |
350 | 0 | { |
351 | 0 | enum imap_match_result ret, match; |
352 | |
|
353 | 0 | ctx->inboxcase_end = data; |
354 | 0 | if (ctx->inboxcase && strncasecmp(data, inbox, INBOXLEN) == 0 && |
355 | 0 | (data[INBOXLEN] == '\0' || data[INBOXLEN] == ctx->sep)) { |
356 | | /* data begins with INBOX/, use case-insensitive comparison |
357 | | for it */ |
358 | 0 | ctx->inboxcase_end += INBOXLEN; |
359 | 0 | } |
360 | |
|
361 | 0 | struct uni_gc_scanner gcsc_data; |
362 | 0 | struct uni_gc_scanner gcsc_pattern; |
363 | |
|
364 | 0 | uni_gc_scanner_init(&gcsc_data, data, strlen(data)); |
365 | 0 | uni_gc_scanner_init(&gcsc_pattern, pattern, strlen(pattern)); |
366 | |
|
367 | 0 | if (!uni_gc_scan_ascii_equals(&gcsc_pattern, '*')) { |
368 | | /* handle the pattern up to the first '*' */ |
369 | 0 | ret = match_sub(ctx, &gcsc_data, &gcsc_pattern); |
370 | 0 | if (ret != IMAP_MATCH_YES || |
371 | 0 | uni_gc_scan_at_end(&gcsc_pattern)) |
372 | 0 | return ret; |
373 | 0 | } |
374 | | |
375 | 0 | match = IMAP_MATCH_CHILDREN; |
376 | 0 | while (uni_gc_scan_ascii_equals(&gcsc_pattern, '*')) { |
377 | 0 | uni_gc_scan_shift(&gcsc_pattern); |
378 | |
|
379 | 0 | if (uni_gc_scan_at_end(&gcsc_pattern)) |
380 | 0 | return IMAP_MATCH_YES; |
381 | | |
382 | 0 | while (!uni_gc_scan_at_end(&gcsc_data)) { |
383 | 0 | if (match_gc(ctx, &gcsc_data, &gcsc_pattern)) { |
384 | 0 | ret = match_sub(ctx, &gcsc_data, &gcsc_pattern); |
385 | 0 | if (ret == IMAP_MATCH_YES) |
386 | 0 | break; |
387 | 0 | match |= ret; |
388 | 0 | } |
389 | 0 | uni_gc_scan_shift(&gcsc_data); |
390 | 0 | } |
391 | 0 | } |
392 | | |
393 | 0 | return ((uni_gc_scan_at_end(&gcsc_data) && |
394 | 0 | uni_gc_scan_at_end(&gcsc_pattern)) ? |
395 | 0 | IMAP_MATCH_YES : match); |
396 | 0 | } |
397 | | |
398 | | enum imap_match_result |
399 | | imap_match(struct imap_match_glob *glob, const char *data) |
400 | 0 | { |
401 | 0 | struct imap_match_context ctx; |
402 | 0 | unsigned int i; |
403 | 0 | enum imap_match_result ret, match; |
404 | |
|
405 | 0 | match = IMAP_MATCH_NO; |
406 | 0 | ctx.sep = glob->sep; |
407 | 0 | for (i = 0; glob->patterns[i].pattern != NULL; i++) { |
408 | 0 | ctx.inboxcase = glob->patterns[i].inboxcase; |
409 | |
|
410 | 0 | ret = imap_match_pattern(&ctx, data, glob->patterns[i].pattern); |
411 | 0 | if (ret == IMAP_MATCH_YES) |
412 | 0 | return IMAP_MATCH_YES; |
413 | | |
414 | 0 | match |= ret; |
415 | 0 | } |
416 | | |
417 | 0 | return match; |
418 | 0 | } |