/src/proftpd/src/regexp.c
Line | Count | Source |
1 | | /* |
2 | | * ProFTPD - FTP server daemon |
3 | | * Copyright (c) 1997, 1998 Public Flood Software |
4 | | * Copyright (c) 1999, 2000 MacGyver aka Habeeb J. Dihu <macgyver@tos.net> |
5 | | * Copyright (c) 2001-2021 The ProFTPD Project team |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA. |
20 | | * |
21 | | * As a special exemption, Public Flood Software/MacGyver aka Habeeb J. Dihu |
22 | | * and other respective copyright holders give permission to link this program |
23 | | * with OpenSSL, and distribute the resulting executable, without including |
24 | | * the source code for OpenSSL in the source distribution. |
25 | | */ |
26 | | |
27 | | /* Regex management code. */ |
28 | | |
29 | | #include "conf.h" |
30 | | |
31 | | #ifdef PR_USE_REGEX |
32 | | |
33 | | #if defined(PR_USE_PCRE2) |
34 | | struct regexp_rec { |
35 | | pool *regex_pool; |
36 | | |
37 | | /* Owning module */ |
38 | | module *m; |
39 | | |
40 | | /* Copy of the original regular expression pattern, flags */ |
41 | | const char *pattern; |
42 | | int flags; |
43 | | |
44 | | /* For callers wishing to use POSIX REs */ |
45 | | regex_t *re; |
46 | | |
47 | | /* For callers wishing to use PCRE2 REs */ |
48 | | pcre2_code *pcre2; |
49 | | pcre2_general_context *pcre2_general_ctx; |
50 | | pcre2_match_context *pcre2_match_ctx; |
51 | | |
52 | | PCRE2_UCHAR *pcre2_errstr; |
53 | | PCRE2_SIZE pcre2_errstrsz; |
54 | | }; |
55 | | |
56 | | static uint32_t pcre2_match_limit = 0; |
57 | | static uint32_t pcre2_match_limit_recursion = 0; |
58 | | |
59 | | #elif defined(PR_USE_PCRE) |
60 | | struct regexp_rec { |
61 | | pool *regex_pool; |
62 | | |
63 | | /* Owning module */ |
64 | | module *m; |
65 | | |
66 | | /* Copy of the original regular expression pattern, flags */ |
67 | | const char *pattern; |
68 | | int flags; |
69 | | |
70 | | /* For callers wishing to use POSIX REs */ |
71 | | regex_t *re; |
72 | | |
73 | | /* For callers wishing to use PCRE REs */ |
74 | | pcre *pcre; |
75 | | pcre_extra *pcre_extra; |
76 | | |
77 | | const char *pcre_errstr; |
78 | | }; |
79 | | |
80 | | static unsigned long pcre_match_limit = 0; |
81 | | static unsigned long pcre_match_limit_recursion = 0; |
82 | | |
83 | | #else /* !PR_USE_PCRE2 and !PR_USE_PCRE */ |
84 | | struct regexp_rec { |
85 | | pool *regex_pool; |
86 | | |
87 | | /* Owning module */ |
88 | | module *m; |
89 | | |
90 | | /* Copy of the original regular expression pattern, flags */ |
91 | | const char *pattern; |
92 | | int flags; |
93 | | |
94 | | /* For callers wishing to use POSIX REs */ |
95 | | regex_t *re; |
96 | | }; |
97 | | #endif /* PR_USE_PCRE */ |
98 | | |
99 | | static pool *regexp_pool = NULL; |
100 | | static array_header *regexp_list = NULL; |
101 | | |
102 | | #if defined(PR_USE_PCRE2) || \ |
103 | | defined(PR_USE_PCRE) |
104 | | static int regexp_use_posix = FALSE; |
105 | | #else |
106 | | static int regexp_use_posix = TRUE; |
107 | | #endif /* PR_USE_PCRE */ |
108 | | |
109 | | static const char *trace_channel = "regexp"; |
110 | | |
111 | 0 | static void regexp_free(pr_regex_t *pre) { |
112 | | #if defined(PR_USE_PCRE2) |
113 | | if (pre->pcre2 != NULL) { |
114 | | pcre2_code_free(pre->pcre2); |
115 | | pre->pcre2 = NULL; |
116 | | } |
117 | | |
118 | | if (pre->pcre2_general_ctx != NULL) { |
119 | | pcre2_general_context_free(pre->pcre2_general_ctx); |
120 | | pre->pcre2_general_ctx = NULL; |
121 | | } |
122 | | |
123 | | if (pre->pcre2_match_ctx != NULL) { |
124 | | pcre2_match_context_free(pre->pcre2_match_ctx); |
125 | | pre->pcre2_match_ctx = NULL; |
126 | | } |
127 | | #endif /* PR_USE_PCRE2 */ |
128 | |
|
129 | | #if defined(PR_USE_PCRE) |
130 | | if (pre->pcre != NULL) { |
131 | | # if defined(HAVE_PCRE_PCRE_FREE_STUDY) |
132 | | pcre_free_study(pre->pcre_extra); |
133 | | # endif /* HAVE_PCRE_PCRE_FREE_STUDY */ |
134 | | pre->pcre_extra = NULL; |
135 | | pcre_free(pre->pcre); |
136 | | pre->pcre = NULL; |
137 | | } |
138 | | #endif /* PR_USE_PCRE */ |
139 | |
|
140 | 0 | if (pre->re != NULL) { |
141 | | /* This frees memory associated with this pointer by regcomp(3). */ |
142 | | # if defined(HAVE_PCRE2_PCRE2_REGCOMP) |
143 | | pcre2_regfree(pre->re); |
144 | | # else |
145 | 0 | regfree(pre->re); |
146 | 0 | # endif /* HAVE_PCRE2_PCRE2_REGCOMP */ |
147 | 0 | pre->re = NULL; |
148 | 0 | } |
149 | |
|
150 | 0 | pre->pattern = NULL; |
151 | 0 | destroy_pool(pre->regex_pool); |
152 | 0 | } |
153 | | |
154 | 0 | static void regexp_cleanup(void) { |
155 | | /* Only perform this cleanup if necessary */ |
156 | 0 | if (regexp_pool) { |
157 | 0 | register unsigned int i = 0; |
158 | 0 | pr_regex_t **pres = (pr_regex_t **) regexp_list->elts; |
159 | |
|
160 | 0 | for (i = 0; i < regexp_list->nelts; i++) { |
161 | 0 | if (pres[i] != NULL) { |
162 | 0 | regexp_free(pres[i]); |
163 | 0 | pres[i] = NULL; |
164 | 0 | } |
165 | 0 | } |
166 | |
|
167 | 0 | destroy_pool(regexp_pool); |
168 | 0 | regexp_pool = NULL; |
169 | 0 | regexp_list = NULL; |
170 | 0 | } |
171 | 0 | } |
172 | | |
173 | 0 | static void regexp_exit_ev(const void *event_data, void *user_data) { |
174 | 0 | regexp_cleanup(); |
175 | 0 | } |
176 | | |
177 | 0 | static void regexp_restart_ev(const void *event_data, void *user_data) { |
178 | 0 | regexp_cleanup(); |
179 | 0 | } |
180 | | |
181 | 0 | pr_regex_t *pr_regexp_alloc(module *m) { |
182 | 0 | pr_regex_t *pre = NULL; |
183 | 0 | pool *re_pool = NULL; |
184 | | |
185 | | /* If no regex-tracking list has been allocated, create one. Register a |
186 | | * cleanup handler for this pool, to free up the data in the list. |
187 | | */ |
188 | 0 | if (regexp_pool == NULL) { |
189 | 0 | regexp_pool = make_sub_pool(permanent_pool); |
190 | 0 | pr_pool_tag(regexp_pool, "Regexp Pool"); |
191 | 0 | regexp_list = make_array(regexp_pool, 0, sizeof(pr_regex_t *)); |
192 | 0 | } |
193 | |
|
194 | 0 | re_pool = pr_pool_create_sz(regexp_pool, 128); |
195 | 0 | pr_pool_tag(re_pool, "regexp pool"); |
196 | |
|
197 | 0 | pre = pcalloc(re_pool, sizeof(pr_regex_t)); |
198 | 0 | pre->regex_pool = re_pool; |
199 | 0 | pre->m = m; |
200 | | |
201 | | /* Add this pointer to the array. */ |
202 | 0 | *((pr_regex_t **) push_array(regexp_list)) = pre; |
203 | |
|
204 | 0 | return pre; |
205 | 0 | } |
206 | | |
207 | 0 | void pr_regexp_free(module *m, pr_regex_t *pre) { |
208 | 0 | register unsigned int i = 0; |
209 | 0 | pr_regex_t **pres = NULL; |
210 | |
|
211 | 0 | if (regexp_list == NULL) { |
212 | 0 | return; |
213 | 0 | } |
214 | | |
215 | 0 | pres = (pr_regex_t **) regexp_list->elts; |
216 | |
|
217 | 0 | for (i = 0; i < regexp_list->nelts; i++) { |
218 | 0 | if (pres[i] == NULL) { |
219 | 0 | continue; |
220 | 0 | } |
221 | | |
222 | 0 | if ((pre != NULL && pres[i] == pre) || |
223 | 0 | (m != NULL && pres[i]->m == m)) { |
224 | 0 | regexp_free(pres[i]); |
225 | 0 | pres[i] = NULL; |
226 | 0 | } |
227 | 0 | } |
228 | 0 | } |
229 | | |
230 | | #if defined(PR_USE_PCRE2) |
231 | | static int regexp_compile_pcre2(pr_regex_t *pre, const char *pattern, |
232 | | int flags) { |
233 | | int res; |
234 | | PCRE2_SIZE err_offset; |
235 | | |
236 | | if (pre == NULL || |
237 | | pattern == NULL) { |
238 | | errno = EINVAL; |
239 | | return -1; |
240 | | } |
241 | | |
242 | | pr_trace_msg(trace_channel, 9, "compiling pattern '%s' into PCRE2 regex", |
243 | | pattern); |
244 | | pre->pattern = pstrdup(pre->regex_pool, pattern); |
245 | | pre->flags = flags; |
246 | | |
247 | | pre->pcre2 = pcre2_compile((PCRE2_SPTR) pattern, PCRE2_ZERO_TERMINATED, |
248 | | flags, &res, &err_offset, NULL); |
249 | | if (pre->pcre2 == NULL) { |
250 | | if (pre->pcre2_errstr == NULL) { |
251 | | pre->pcre2_errstrsz = 128; |
252 | | pre->pcre2_errstr = pcalloc(pre->regex_pool, pre->pcre2_errstrsz); |
253 | | } |
254 | | |
255 | | pcre2_get_error_message(res, pre->pcre2_errstr, pre->pcre2_errstrsz); |
256 | | pr_trace_msg(trace_channel, 4, |
257 | | "error compiling pattern '%s' into PCRE2 regex: %s", pattern, |
258 | | pre->pcre2_errstr); |
259 | | return -1; |
260 | | } |
261 | | |
262 | | /* Prepare the JIT compiler as well. */ |
263 | | res = pcre2_jit_compile(pre->pcre2, PCRE2_JIT_COMPLETE); |
264 | | if (res != 0) { |
265 | | if (pre->pcre2_errstr == NULL) { |
266 | | pre->pcre2_errstrsz = 128; |
267 | | pre->pcre2_errstr = pcalloc(pre->regex_pool, pre->pcre2_errstrsz); |
268 | | } |
269 | | |
270 | | pcre2_get_error_message(res, pre->pcre2_errstr, pre->pcre2_errstrsz); |
271 | | pr_trace_msg(trace_channel, 4, |
272 | | "error performing PCRE2 JIT compile for pattern '%s': %s", pattern, |
273 | | pre->pcre2_errstr); |
274 | | } |
275 | | |
276 | | return 0; |
277 | | } |
278 | | #endif /* PR_USE_PCRE2 */ |
279 | | |
280 | | #if defined(PR_USE_PCRE) |
281 | | static int regexp_compile_pcre(pr_regex_t *pre, const char *pattern, |
282 | | int flags) { |
283 | | int err_offset, study_flags = 0; |
284 | | |
285 | | if (pre == NULL || |
286 | | pattern == NULL) { |
287 | | errno = EINVAL; |
288 | | return -1; |
289 | | } |
290 | | |
291 | | pr_trace_msg(trace_channel, 9, "compiling pattern '%s' into PCRE regex", |
292 | | pattern); |
293 | | pre->pattern = pstrdup(pre->regex_pool, pattern); |
294 | | pre->flags = flags; |
295 | | |
296 | | pre->pcre = pcre_compile(pattern, flags, &(pre->pcre_errstr), &err_offset, |
297 | | NULL); |
298 | | if (pre->pcre == NULL) { |
299 | | pr_trace_msg(trace_channel, 4, |
300 | | "error compiling pattern '%s' into PCRE regex: %s", pattern, |
301 | | pre->pcre_errstr); |
302 | | return -1; |
303 | | } |
304 | | |
305 | | /* Study the pattern as well, just in case. */ |
306 | | #ifdef PCRE_STUDY_JIT_COMPILE |
307 | | study_flags = PCRE_STUDY_JIT_COMPILE; |
308 | | #endif /* PCRE_STUDY_JIT_COMPILE */ |
309 | | pr_trace_msg(trace_channel, 9, "studying pattern '%s' for PCRE extra data", |
310 | | pattern); |
311 | | pre->pcre_extra = pcre_study(pre->pcre, study_flags, &(pre->pcre_errstr)); |
312 | | if (pre->pcre_extra == NULL) { |
313 | | if (pre->pcre_errstr != NULL) { |
314 | | pr_trace_msg(trace_channel, 4, |
315 | | "error studying pattern '%s' for PCRE regex: %s", pattern, |
316 | | pre->pcre_errstr); |
317 | | } |
318 | | } |
319 | | |
320 | | return 0; |
321 | | } |
322 | | #endif /* PR_USE_PCRE */ |
323 | | |
324 | 0 | int pr_regexp_compile_posix(pr_regex_t *pre, const char *pattern, int flags) { |
325 | 0 | int res; |
326 | |
|
327 | 0 | if (pre == NULL || |
328 | 0 | pattern == NULL) { |
329 | 0 | errno = EINVAL; |
330 | 0 | return -1; |
331 | 0 | } |
332 | | |
333 | 0 | if (pre->re != NULL) { |
334 | 0 | regfree(pre->re); |
335 | 0 | pre->re = NULL; |
336 | 0 | } |
337 | |
|
338 | 0 | pr_trace_msg(trace_channel, 9, "compiling pattern '%s' into POSIX regex", |
339 | 0 | pattern); |
340 | 0 | pre->pattern = pstrdup(pre->regex_pool, pattern); |
341 | |
|
342 | 0 | #if defined(REG_EXTENDED) |
343 | | /* Enable modern ("extended") POSIX regular expressions by default. */ |
344 | 0 | flags |= REG_EXTENDED; |
345 | 0 | #endif /* REG_EXTENDED */ |
346 | |
|
347 | 0 | pre->flags = flags; |
348 | |
|
349 | 0 | pre->re = pcalloc(pre->regex_pool, sizeof(regex_t)); |
350 | | # if defined(HAVE_PCRE2_PCRE2_REGCOMP) |
351 | | res = pcre2_regcomp(pre->re, pattern, flags); |
352 | | # else |
353 | 0 | res = regcomp(pre->re, pattern, flags); |
354 | 0 | # endif /* HAVE_PCRE2_PCRE2_REGCOMP */ |
355 | |
|
356 | 0 | return res; |
357 | 0 | } |
358 | | |
359 | 0 | int pr_regexp_compile(pr_regex_t *pre, const char *pattern, int flags) { |
360 | | #if defined(PR_USE_PCRE2) || \ |
361 | | defined(PR_USE_PCRE) |
362 | | # if defined(PR_USE_PCRE2) |
363 | | int pcre2_flags = 0; |
364 | | # elif defined(PR_USE_PCRE) |
365 | | int pcre_flags = 0; |
366 | | # endif |
367 | | |
368 | | if (regexp_use_posix == TRUE) { |
369 | | return pr_regexp_compile_posix(pre, pattern, flags); |
370 | | } |
371 | | |
372 | | /* Provide a simple mapping of POSIX regcomp(3) flags to |
373 | | * PCRE pcre_compile() flags. The ProFTPD code tends not to use many |
374 | | * of these flags. |
375 | | */ |
376 | | if (flags & REG_ICASE) { |
377 | | # if defined(PR_USE_PCRE2) |
378 | | pcre2_flags |= PCRE2_CASELESS; |
379 | | # elif defined(PR_USE_PCRE) |
380 | | pcre_flags |= PCRE_CASELESS; |
381 | | # endif |
382 | | } |
383 | | |
384 | | # if defined(PR_USE_PCRE2) |
385 | | return regexp_compile_pcre2(pre, pattern, pcre2_flags); |
386 | | # else |
387 | | return regexp_compile_pcre(pre, pattern, pcre_flags); |
388 | | # endif /* PR_USE_PCRE2 */ |
389 | | #else |
390 | 0 | return pr_regexp_compile_posix(pre, pattern, flags); |
391 | 0 | #endif /* PR_USE_PCRE */ |
392 | 0 | } |
393 | | |
394 | | size_t pr_regexp_error(int errcode, const pr_regex_t *pre, char *buf, |
395 | 0 | size_t bufsz) { |
396 | 0 | size_t res = 0; |
397 | |
|
398 | 0 | if (pre == NULL || |
399 | 0 | buf == NULL || |
400 | 0 | bufsz == 0) { |
401 | 0 | return 0; |
402 | 0 | } |
403 | | |
404 | | #if defined(PR_USE_PCRE2) |
405 | | if (pre->pcre2_errstr != NULL) { |
406 | | sstrncpy(buf, (const char *) pre->pcre2_errstr, bufsz); |
407 | | return strlen((const char *) pre->pcre2_errstr) + 1; |
408 | | } |
409 | | #elif defined(PR_USE_PCRE) |
410 | | if (pre->pcre_errstr != NULL) { |
411 | | sstrncpy(buf, pre->pcre_errstr, bufsz); |
412 | | return strlen(pre->pcre_errstr) + 1; |
413 | | } |
414 | | #endif /* PR_USE_PCRE */ |
415 | | |
416 | 0 | if (pre->re != NULL) { |
417 | | /* Make sure the given buffer is always zeroed out first. */ |
418 | 0 | memset(buf, '\0', bufsz); |
419 | | # if defined(HAVE_PCRE2_PCRE2_REGCOMP) |
420 | | res = pcre2_regerror(errcode, pre->re, buf, bufsz-1); |
421 | | # else |
422 | 0 | res = regerror(errcode, pre->re, buf, bufsz-1); |
423 | 0 | # endif /* HAVE_PCRE2_PCRE2_REGCOMP */ |
424 | 0 | } |
425 | |
|
426 | 0 | return res; |
427 | 0 | } |
428 | | |
429 | 0 | const char *pr_regexp_get_pattern(const pr_regex_t *pre) { |
430 | 0 | if (pre == NULL) { |
431 | 0 | errno = EINVAL; |
432 | 0 | return NULL; |
433 | 0 | } |
434 | | |
435 | 0 | if (pre->pattern == NULL) { |
436 | 0 | errno = ENOENT; |
437 | 0 | return NULL; |
438 | 0 | } |
439 | | |
440 | 0 | return pre->pattern; |
441 | 0 | } |
442 | | |
443 | | #if defined(PR_USE_PCRE2) |
444 | | static int regexp_exec_pcre2(pr_regex_t *pre, const char *text, |
445 | | size_t nmatches, regmatch_t *matches, int flags, unsigned long match_limit, |
446 | | unsigned long match_limit_recursion) { |
447 | | pool *tmp_pool = NULL; |
448 | | int res; |
449 | | uint32_t ovector_count = 0; |
450 | | pcre2_match_data *match_data = NULL; |
451 | | |
452 | | if (pre->pcre2 == NULL) { |
453 | | errno = EINVAL; |
454 | | return -1; |
455 | | } |
456 | | |
457 | | /* Use the default match limits, if set and if the caller did not |
458 | | * explicitly provide limits. |
459 | | */ |
460 | | if (match_limit == 0) { |
461 | | match_limit = pcre2_match_limit; |
462 | | } |
463 | | |
464 | | if (match_limit_recursion == 0) { |
465 | | match_limit_recursion = pcre2_match_limit_recursion; |
466 | | } |
467 | | |
468 | | if (match_limit > 0) { |
469 | | if (pre->pcre2_general_ctx == NULL) { |
470 | | pre->pcre2_general_ctx = pcre2_general_context_create(NULL, NULL, NULL); |
471 | | } |
472 | | |
473 | | if (pre->pcre2_match_ctx == NULL) { |
474 | | pre->pcre2_match_ctx = pcre2_match_context_create(pre->pcre2_general_ctx); |
475 | | } |
476 | | |
477 | | pcre2_set_match_limit(pre->pcre2_match_ctx, match_limit); |
478 | | } |
479 | | |
480 | | if (match_limit_recursion > 0) { |
481 | | if (pre->pcre2_general_ctx == NULL) { |
482 | | pre->pcre2_general_ctx = pcre2_general_context_create(NULL, NULL, NULL); |
483 | | } |
484 | | |
485 | | if (pre->pcre2_match_ctx == NULL) { |
486 | | pre->pcre2_match_ctx = pcre2_match_context_create(pre->pcre2_general_ctx); |
487 | | } |
488 | | |
489 | | pcre2_set_depth_limit(pre->pcre2_match_ctx, match_limit_recursion); |
490 | | } |
491 | | |
492 | | if (nmatches > 0 && |
493 | | matches != NULL) { |
494 | | tmp_pool = make_sub_pool(pre->regex_pool); |
495 | | pr_pool_tag(tmp_pool, "regexp tmp pool"); |
496 | | } |
497 | | |
498 | | pr_trace_msg(trace_channel, 9, |
499 | | "executing PCRE2 regex '%s' against subject '%s'", |
500 | | pr_regexp_get_pattern(pre), text); |
501 | | match_data = pcre2_match_data_create_from_pattern(pre->pcre2, |
502 | | pre->pcre2_general_ctx); |
503 | | res = pcre2_match(pre->pcre2, (PCRE2_SPTR) text, PCRE2_ZERO_TERMINATED, 0, |
504 | | flags, match_data, pre->pcre2_match_ctx); |
505 | | |
506 | | if (res < 0) { |
507 | | if (tmp_pool != NULL) { |
508 | | destroy_pool(tmp_pool); |
509 | | } |
510 | | |
511 | | if (pre->pcre2_errstr == NULL) { |
512 | | pre->pcre2_errstrsz = 128; |
513 | | pre->pcre2_errstr = pcalloc(pre->regex_pool, pre->pcre2_errstrsz); |
514 | | } |
515 | | |
516 | | pcre2_get_error_message(res, pre->pcre2_errstr, pre->pcre2_errstrsz); |
517 | | pr_trace_msg(trace_channel, 9, |
518 | | "PCRE2 regex '%s' failed to match subject '%s': %s", |
519 | | pr_regexp_get_pattern(pre), text, pre->pcre2_errstr); |
520 | | pcre2_match_data_free(match_data); |
521 | | |
522 | | return -1; |
523 | | } |
524 | | |
525 | | pr_trace_msg(trace_channel, 9, |
526 | | "PCRE2 regex '%s' successfully matched subject '%s'", |
527 | | pr_regexp_get_pattern(pre), text); |
528 | | |
529 | | if (nmatches > 0 && |
530 | | matches != NULL) { |
531 | | /* If matches/capture groups are requested, do the processing for them. */ |
532 | | ovector_count = pcre2_get_ovector_count(match_data); |
533 | | } |
534 | | |
535 | | if (ovector_count > 0) { |
536 | | /* Populate the provided POSIX regmatch_t array with the PCRE2 data. */ |
537 | | register uint32_t i; |
538 | | PCRE2_SIZE *ovector = NULL; |
539 | | |
540 | | pr_trace_msg(trace_channel, 9, |
541 | | "PCRE2 regex '%s' captured %lu groups in subject '%s'", |
542 | | pr_regexp_get_pattern(pre), (unsigned long) ovector_count, text); |
543 | | |
544 | | ovector = pcre2_get_ovector_pointer(match_data); |
545 | | |
546 | | for (i = 0; i < ovector_count; i++) { |
547 | | matches[i].rm_so = ovector[i * 2]; |
548 | | matches[i].rm_eo = ovector[(i * 2) + 1]; |
549 | | } |
550 | | |
551 | | /* Ensure the remaining items are set to proper defaults as well. */ |
552 | | for (; i < nmatches; i++) { |
553 | | matches[i].rm_so = matches[i].rm_eo = -1; |
554 | | } |
555 | | } |
556 | | |
557 | | destroy_pool(tmp_pool); |
558 | | pcre2_match_data_free(match_data); |
559 | | |
560 | | if (matches != NULL && |
561 | | pr_trace_get_level(trace_channel) >= 20) { |
562 | | register unsigned int i; |
563 | | |
564 | | for (i = 0; i < nmatches; i++) { |
565 | | int match_len; |
566 | | const char *match_text; |
567 | | |
568 | | if (matches[i].rm_so == -1 || |
569 | | matches[i].rm_eo == -1) { |
570 | | break; |
571 | | } |
572 | | |
573 | | match_text = &(text[matches[i].rm_so]); |
574 | | match_len = matches[i].rm_eo - matches[i].rm_so; |
575 | | |
576 | | pr_trace_msg(trace_channel, 20, |
577 | | "PCRE2 regex '%s' match #%u: %.*s (start %ld, len %d)", |
578 | | pr_regexp_get_pattern(pre), i, (int) match_len, match_text, |
579 | | (long) matches[i].rm_so, match_len); |
580 | | } |
581 | | } |
582 | | |
583 | | return 0; |
584 | | } |
585 | | #endif /* PR_USE_PCRE2 */ |
586 | | |
587 | | #if defined(PR_USE_PCRE) |
588 | | static int regexp_exec_pcre(pr_regex_t *pre, const char *text, |
589 | | size_t nmatches, regmatch_t *matches, int flags, unsigned long match_limit, |
590 | | unsigned long match_limit_recursion) { |
591 | | int res, ovector_count = 0, *ovector = NULL; |
592 | | size_t text_len; |
593 | | pool *tmp_pool = NULL; |
594 | | |
595 | | if (pre->pcre == NULL) { |
596 | | errno = EINVAL; |
597 | | return -1; |
598 | | } |
599 | | |
600 | | text_len = strlen(text); |
601 | | |
602 | | /* Use the default match limits, if set and if the caller did not |
603 | | * explicitly provide limits. |
604 | | */ |
605 | | if (match_limit == 0) { |
606 | | match_limit = pcre_match_limit; |
607 | | } |
608 | | |
609 | | if (match_limit_recursion == 0) { |
610 | | match_limit_recursion = pcre_match_limit_recursion; |
611 | | } |
612 | | |
613 | | if (match_limit > 0) { |
614 | | if (pre->pcre_extra == NULL) { |
615 | | pre->pcre_extra = pcalloc(pre->regex_pool, sizeof(pcre_extra)); |
616 | | } |
617 | | |
618 | | pre->pcre_extra->flags |= PCRE_EXTRA_MATCH_LIMIT; |
619 | | pre->pcre_extra->match_limit = match_limit; |
620 | | } |
621 | | |
622 | | if (match_limit_recursion > 0) { |
623 | | if (pre->pcre_extra == NULL) { |
624 | | pre->pcre_extra = pcalloc(pre->regex_pool, sizeof(pcre_extra)); |
625 | | } |
626 | | |
627 | | pre->pcre_extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; |
628 | | pre->pcre_extra->match_limit_recursion = match_limit_recursion; |
629 | | } |
630 | | |
631 | | if (nmatches > 0 && |
632 | | matches != NULL) { |
633 | | tmp_pool = make_sub_pool(pre->regex_pool); |
634 | | pr_pool_tag(tmp_pool, "regexp tmp pool"); |
635 | | |
636 | | ovector_count = nmatches; |
637 | | ovector = pcalloc(tmp_pool, sizeof(int) * nmatches * 3); |
638 | | } |
639 | | |
640 | | pr_trace_msg(trace_channel, 9, |
641 | | "executing PCRE regex '%s' against subject '%s'", |
642 | | pr_regexp_get_pattern(pre), text); |
643 | | res = pcre_exec(pre->pcre, pre->pcre_extra, text, text_len, 0, flags, |
644 | | ovector, ovector_count); |
645 | | |
646 | | if (res < 0) { |
647 | | if (tmp_pool != NULL) { |
648 | | destroy_pool(tmp_pool); |
649 | | } |
650 | | |
651 | | if (pr_trace_get_level(trace_channel) >= 9) { |
652 | | const char *reason = "unknown"; |
653 | | |
654 | | switch (res) { |
655 | | case PCRE_ERROR_NOMATCH: |
656 | | reason = "subject did not match pattern"; |
657 | | break; |
658 | | |
659 | | case PCRE_ERROR_NULL: |
660 | | reason = "null regex or subject"; |
661 | | break; |
662 | | |
663 | | case PCRE_ERROR_BADOPTION: |
664 | | reason = "unsupported options bit"; |
665 | | break; |
666 | | |
667 | | case PCRE_ERROR_BADMAGIC: |
668 | | reason = "bad magic number in regex"; |
669 | | break; |
670 | | |
671 | | case PCRE_ERROR_UNKNOWN_OPCODE: |
672 | | case PCRE_ERROR_INTERNAL: |
673 | | reason = "internal PCRE error or corrupted regex"; |
674 | | break; |
675 | | |
676 | | case PCRE_ERROR_NOMEMORY: |
677 | | reason = "not enough memory for backreferences"; |
678 | | break; |
679 | | |
680 | | case PCRE_ERROR_MATCHLIMIT: |
681 | | reason = "match limit reached/exceeded"; |
682 | | break; |
683 | | |
684 | | case PCRE_ERROR_RECURSIONLIMIT: |
685 | | reason = "match limit recursion reached/exceeded"; |
686 | | break; |
687 | | |
688 | | case PCRE_ERROR_BADUTF8: |
689 | | reason = "invalid UTF8 subject used"; |
690 | | break; |
691 | | |
692 | | case PCRE_ERROR_PARTIAL: |
693 | | reason = "subject matched only partially; PCRE_PARTIAL flag not used"; |
694 | | break; |
695 | | } |
696 | | |
697 | | pr_trace_msg(trace_channel, 9, |
698 | | "PCRE regex '%s' failed to match subject '%s': %s", |
699 | | pr_regexp_get_pattern(pre), text, reason); |
700 | | } |
701 | | |
702 | | return res; |
703 | | } |
704 | | |
705 | | pr_trace_msg(trace_channel, 9, |
706 | | "PCRE regex '%s' successfully matched subject '%s'", |
707 | | pr_regexp_get_pattern(pre), text); |
708 | | |
709 | | if (ovector_count > 0) { |
710 | | /* Populate the provided POSIX regmatch_t array with the PCRE data. */ |
711 | | register int i; |
712 | | |
713 | | for (i = 0; i < res; i++) { |
714 | | matches[i].rm_so = ovector[i * 2]; |
715 | | matches[i].rm_eo = ovector[(i * 2) + 1]; |
716 | | } |
717 | | |
718 | | /* Ensure the remaining items are set to proper defaults as well. */ |
719 | | for (; i < nmatches; i++) { |
720 | | matches[i].rm_so = matches[i].rm_eo = -1; |
721 | | } |
722 | | } |
723 | | |
724 | | destroy_pool(tmp_pool); |
725 | | |
726 | | if (matches != NULL && |
727 | | pr_trace_get_level(trace_channel) >= 20) { |
728 | | register unsigned int i; |
729 | | |
730 | | for (i = 0; i < nmatches; i++) { |
731 | | int match_len; |
732 | | const char *match_text; |
733 | | |
734 | | if (matches[i].rm_so == -1 || |
735 | | matches[i].rm_eo == -1) { |
736 | | break; |
737 | | } |
738 | | |
739 | | match_text = &(text[matches[i].rm_so]); |
740 | | match_len = matches[i].rm_eo - matches[i].rm_so; |
741 | | |
742 | | pr_trace_msg(trace_channel, 20, |
743 | | "PCRE regex '%s' match #%u: %.*s (start %ld, len %d)", |
744 | | pr_regexp_get_pattern(pre), i, (int) match_len, match_text, |
745 | | (long) matches[i].rm_so, match_len); |
746 | | } |
747 | | } |
748 | | |
749 | | return 0; |
750 | | } |
751 | | #endif /* PR_USE_PCRE */ |
752 | | |
753 | | static int regexp_exec_posix(pr_regex_t *pre, const char *text, |
754 | 0 | size_t nmatches, regmatch_t *matches, int flags) { |
755 | 0 | int res; |
756 | |
|
757 | 0 | pr_trace_msg(trace_channel, 9, |
758 | 0 | "executing POSIX regex '%s' against subject '%s'", |
759 | 0 | pr_regexp_get_pattern(pre), text); |
760 | | # if defined(HAVE_PCRE2_PCRE2_REGCOMP) |
761 | | res = pcre2_regexec(pre->re, text, nmatches, matches, flags); |
762 | | # else |
763 | 0 | res = regexec(pre->re, text, nmatches, matches, flags); |
764 | 0 | # endif /* HAVE_PCRE2_PCRE2_REGCOMP */ |
765 | 0 | if (res == 0) { |
766 | 0 | pr_trace_msg(trace_channel, 9, |
767 | 0 | "POSIX regex '%s' successfully matched subject '%s'", |
768 | 0 | pr_regexp_get_pattern(pre), text); |
769 | |
|
770 | 0 | if (matches != NULL && |
771 | 0 | pr_trace_get_level(trace_channel) >= 20) { |
772 | 0 | register unsigned int i; |
773 | |
|
774 | 0 | for (i = 0; i < nmatches; i++) { |
775 | 0 | int match_len; |
776 | 0 | const char *match_text; |
777 | |
|
778 | 0 | if (matches[i].rm_so == -1 || |
779 | 0 | matches[i].rm_eo == -1) { |
780 | 0 | break; |
781 | 0 | } |
782 | | |
783 | 0 | match_text = &(text[matches[i].rm_so]); |
784 | 0 | match_len = matches[i].rm_eo - matches[i].rm_so; |
785 | |
|
786 | 0 | pr_trace_msg(trace_channel, 20, |
787 | 0 | "POSIX regex '%s' match #%u: %.*s (start %ld, len %d)", |
788 | 0 | pr_regexp_get_pattern(pre), i, (int) match_len, match_text, |
789 | 0 | (long) matches[i].rm_so, match_len); |
790 | 0 | } |
791 | 0 | } |
792 | |
|
793 | 0 | } else { |
794 | 0 | if (pr_trace_get_level(trace_channel) >= 9) { |
795 | 0 | const char *reason = "subject did not match pattern"; |
796 | | |
797 | | /* NOTE: Expectation of `res` values here are mixed when PCRE |
798 | | * support, and the <pcreposix.h> header, are involved. |
799 | | */ |
800 | |
|
801 | 0 | pr_trace_msg(trace_channel, 9, |
802 | 0 | "POSIX regex '%s' failed to match subject '%s': %s (%d)", |
803 | 0 | pr_regexp_get_pattern(pre), text, reason, res); |
804 | 0 | } |
805 | 0 | } |
806 | |
|
807 | 0 | return res; |
808 | 0 | } |
809 | | |
810 | | int pr_regexp_exec(pr_regex_t *pre, const char *text, size_t nmatches, |
811 | | regmatch_t *matches, int flags, unsigned long match_limit, |
812 | 0 | unsigned long match_limit_recursion) { |
813 | 0 | int res; |
814 | |
|
815 | 0 | if (pre == NULL || |
816 | 0 | text == NULL) { |
817 | 0 | errno = EINVAL; |
818 | 0 | return -1; |
819 | 0 | } |
820 | | |
821 | | #if defined(PR_USE_PCRE2) |
822 | | if (pre->pcre2 != NULL) { |
823 | | |
824 | | /* What if the given pre was compiled via PCRE2, but we are told to only |
825 | | * use POSIX? In this case, we need to compile+exec on demand. |
826 | | */ |
827 | | if (regexp_use_posix == FALSE) { |
828 | | return regexp_exec_pcre2(pre, text, nmatches, matches, flags, match_limit, |
829 | | match_limit_recursion); |
830 | | } |
831 | | |
832 | | res = pr_regexp_compile_posix(pre, pre->pattern, pre->flags); |
833 | | if (res < 0) { |
834 | | return -1; |
835 | | } |
836 | | } |
837 | | |
838 | | #elif defined(PR_USE_PCRE) |
839 | | if (pre->pcre != NULL) { |
840 | | |
841 | | /* What if the given pre was compiled via PCRE, but we are told to only |
842 | | * use POSIX? In this case, we need to compile+exec on demand. |
843 | | */ |
844 | | if (regexp_use_posix == FALSE) { |
845 | | return regexp_exec_pcre(pre, text, nmatches, matches, flags, match_limit, |
846 | | match_limit_recursion); |
847 | | } |
848 | | |
849 | | res = pr_regexp_compile_posix(pre, pre->pattern, pre->flags); |
850 | | if (res < 0) { |
851 | | return -1; |
852 | | } |
853 | | } |
854 | | #endif /* PR_USE_PCRE */ |
855 | 0 | res = regexp_exec_posix(pre, text, nmatches, matches, flags); |
856 | | |
857 | | /* Make sure that we return a negative value to indicate a failed match; |
858 | | * PCRE already does this. |
859 | | */ |
860 | 0 | if (res == REG_NOMATCH) { |
861 | 0 | res = -1; |
862 | 0 | } |
863 | |
|
864 | 0 | return res; |
865 | 0 | } |
866 | | |
867 | | int pr_regexp_set_limits(unsigned long match_limit, |
868 | 0 | unsigned long match_limit_recursion) { |
869 | |
|
870 | | #if defined(PR_USE_PCRE2) |
871 | | pcre2_match_limit = match_limit; |
872 | | pcre2_match_limit_recursion = match_limit_recursion; |
873 | | |
874 | | #elif defined(PR_USE_PCRE) |
875 | | pcre_match_limit = match_limit; |
876 | | pcre_match_limit_recursion = match_limit_recursion; |
877 | | #endif |
878 | |
|
879 | 0 | return 0; |
880 | 0 | } |
881 | | |
882 | 0 | int pr_regexp_set_engine(const char *engine) { |
883 | 0 | if (engine == NULL) { |
884 | | /* Restore the default. */ |
885 | | #if defined(PR_USE_PCRE2) || \ |
886 | | defined(PR_USE_PCRE) |
887 | | regexp_use_posix = FALSE; |
888 | | #else |
889 | 0 | regexp_use_posix = TRUE; |
890 | 0 | #endif /* PR_USE_PCRE */ |
891 | 0 | pr_trace_msg(trace_channel, 19, "%s", "restored default regexp engine"); |
892 | 0 | return 0; |
893 | 0 | } |
894 | | |
895 | 0 | if (strcasecmp(engine, "POSIX") != 0 && |
896 | 0 | strcasecmp(engine, "PCRE") != 0 && |
897 | 0 | strcasecmp(engine, "PCRE2") != 0) { |
898 | 0 | errno = EINVAL; |
899 | 0 | return -1; |
900 | 0 | } |
901 | | |
902 | | #if defined(PR_USE_PCRE2) |
903 | | if (strcasecmp(engine, "PCRE") == 0) { |
904 | | errno = ENOSYS; |
905 | | return -1; |
906 | | } |
907 | | |
908 | | /* We already use PCRE2 by default, but are being explicitly requested to |
909 | | * only use POSIX. |
910 | | */ |
911 | | if (strcasecmp(engine, "POSIX") == 0) { |
912 | | if (regexp_use_posix == FALSE) { |
913 | | pr_trace_msg(trace_channel, 19, "%s", |
914 | | "changed regexp engine from PCRE2 to POSIX"); |
915 | | } |
916 | | |
917 | | regexp_use_posix = TRUE; |
918 | | |
919 | | } else { |
920 | | if (regexp_use_posix == TRUE) { |
921 | | pr_trace_msg(trace_channel, 19, "%s", |
922 | | "changed regexp engine from POSIX to PCRE2"); |
923 | | } |
924 | | |
925 | | regexp_use_posix = FALSE; |
926 | | } |
927 | | |
928 | | #elif defined(PR_USE_PCRE) |
929 | | if (strcasecmp(engine, "PCRE2") == 0) { |
930 | | errno = ENOSYS; |
931 | | return -1; |
932 | | } |
933 | | |
934 | | /* We already use PCRE by default, but are being explicitly requested to |
935 | | * only use POSIX. |
936 | | */ |
937 | | if (strcasecmp(engine, "POSIX") == 0) { |
938 | | if (regexp_use_posix == FALSE) { |
939 | | pr_trace_msg(trace_channel, 19, "%s", |
940 | | "changed regexp engine from PCRE to POSIX"); |
941 | | } |
942 | | |
943 | | regexp_use_posix = TRUE; |
944 | | |
945 | | } else { |
946 | | if (regexp_use_posix == TRUE) { |
947 | | pr_trace_msg(trace_channel, 19, "%s", |
948 | | "changed regexp engine from POSIX to PCRE"); |
949 | | } |
950 | | |
951 | | regexp_use_posix = FALSE; |
952 | | } |
953 | | #else |
954 | | /* We only use POSIX, but are being requested to use PCRE/PCRE2. */ |
955 | 0 | if (strcasecmp(engine, "PCRE") == 0 || |
956 | 0 | strcasecmp(engine, "PCRE2") == 0) { |
957 | 0 | errno = ENOSYS; |
958 | 0 | return -1; |
959 | 0 | } |
960 | | |
961 | 0 | regexp_use_posix = TRUE; |
962 | 0 | #endif /* PR_USE_PCRE */ |
963 | |
|
964 | 0 | return 0; |
965 | 0 | } |
966 | | |
967 | 0 | void init_regexp(void) { |
968 | | |
969 | | /* Register a restart handler for the regexp pool, so that when restarting, |
970 | | * regfree(3) is called on each of the regex_t pointers in a |
971 | | * regex_t-tracking array, thus preventing memory leaks on a long-running |
972 | | * daemon. |
973 | | * |
974 | | * This registration is done here so that it only happens once. |
975 | | */ |
976 | 0 | pr_event_register(NULL, "core.restart", regexp_restart_ev, NULL); |
977 | 0 | pr_event_register(NULL, "core.exit", regexp_exit_ev, NULL); |
978 | |
|
979 | | #if defined(PR_USE_PCRE2) |
980 | | pr_log_debug(DEBUG2, "using PCRE2 %d.%d", PCRE2_MAJOR, PCRE2_MINOR); |
981 | | #elif defined(PR_USE_PCRE) |
982 | | pr_log_debug(DEBUG2, "using PCRE %s", pcre_version()); |
983 | | #endif /* PR_USE_PCRE */ |
984 | 0 | } |
985 | | |
986 | | #endif |