/src/freeradius-server/src/lib/util/regex.c
Line | Count | Source |
1 | | /* |
2 | | * This program is free software; you can redistribute it and/or modify |
3 | | * it under the terms of the GNU General Public License as published by |
4 | | * the Free Software Foundation; either version 2 of the License, or |
5 | | * (at your option) any later version. |
6 | | * |
7 | | * This program is distributed in the hope that it will be useful, |
8 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | | * GNU General Public License for more details. |
11 | | * |
12 | | * You should have received a copy of the GNU General Public License |
13 | | * along with this program; if not, write to the Free Software |
14 | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA |
15 | | */ |
16 | | |
17 | | /** Wrappers around various regular expression libraries |
18 | | * |
19 | | * @file src/lib/util/regex.c |
20 | | * |
21 | | * @copyright 2014 The FreeRADIUS server project |
22 | | * @copyright 2014 Arran Cudbard-Bell (a.cudbardb@freeradius.org) |
23 | | */ |
24 | | RCSID("$Id: ecec9010400c9be79a070b6c6380a57e34b0e64e $") |
25 | | |
26 | | #ifdef HAVE_REGEX |
27 | | |
28 | | #include <freeradius-devel/util/regex.h> |
29 | | #include <freeradius-devel/util/atexit.h> |
30 | | |
31 | | #if defined(HAVE_REGEX_PCRE2) && defined(PCRE2_CONFIG_JIT) |
32 | | #ifndef FR_PCRE_JIT_STACK_MIN |
33 | | # define FR_PCRE_JIT_STACK_MIN (128 * 1024) |
34 | | #endif |
35 | | #ifndef FR_PCRE_JIT_STACK_MAX |
36 | | # define FR_PCRE_JIT_STACK_MAX (512 * 1024) |
37 | | #endif |
38 | | #endif |
39 | | |
40 | | const fr_sbuff_escape_rules_t regex_escape_rules = { |
41 | | .name = "regex", |
42 | | .chr = '\\', |
43 | | .subs = { |
44 | | ['$'] = '$', |
45 | | ['('] = '(', |
46 | | ['*'] = '*', |
47 | | ['+'] = '+', |
48 | | ['.'] = '.', |
49 | | ['/'] = '/', |
50 | | ['?'] = '?', |
51 | | ['['] = '[', |
52 | | ['\\'] = '\\', |
53 | | ['^'] = '^', |
54 | | ['`'] = '`', |
55 | | ['|'] = '|', |
56 | | ['\a'] = 'a', |
57 | | ['\b'] = 'b', |
58 | | ['\n'] = 'n', |
59 | | ['\r'] = 'r', |
60 | | ['\t'] = 't', |
61 | | ['\v'] = 'v' |
62 | | }, |
63 | | .esc = { |
64 | | SBUFF_CHAR_UNPRINTABLES_LOW, |
65 | | SBUFF_CHAR_UNPRINTABLES_EXTENDED |
66 | | }, |
67 | | .do_utf8 = true, |
68 | | .do_oct = true |
69 | | }; |
70 | | |
71 | | |
72 | | /* |
73 | | *###################################### |
74 | | *# FUNCTIONS FOR LIBPCRE2 # |
75 | | *###################################### |
76 | | */ |
77 | | #ifdef HAVE_REGEX_PCRE2 |
78 | | /* |
79 | | * Wrapper functions for libpcre2. Much more powerful, and guaranteed |
80 | | * to be binary safe for both patterns and subjects but require |
81 | | * libpcre2. |
82 | | */ |
83 | | |
84 | | /** Thread local storage for PCRE2 |
85 | | * |
86 | | * Not all this storage is thread local, but it simplifies cleanup if |
87 | | * we bind its lifetime to the thread, and lets us get away with not |
88 | | * having specific init/free functions. |
89 | | */ |
90 | | typedef struct { |
91 | | TALLOC_CTX *alloc_ctx; //!< Context used for any allocations. |
92 | | pcre2_general_context *gcontext; //!< General context. |
93 | | pcre2_compile_context *ccontext; //!< Compile context. |
94 | | pcre2_match_context *mcontext; //!< Match context. |
95 | | #ifdef PCRE2_CONFIG_JIT |
96 | | pcre2_jit_stack *jit_stack; //!< Jit stack for executing jit'd patterns. |
97 | | bool do_jit; //!< Whether we have runtime JIT support. |
98 | | #endif |
99 | | } fr_pcre2_tls_t; |
100 | | |
101 | | /** Thread local storage for pcre2 |
102 | | * |
103 | | */ |
104 | | static _Thread_local fr_pcre2_tls_t *fr_pcre2_tls; |
105 | | |
106 | | /** Talloc wrapper for pcre2 memory allocation |
107 | | * |
108 | | * @param[in] to_alloc How many bytes to alloc. |
109 | | * @param[in] uctx UNUSED. |
110 | | */ |
111 | | static void *_pcre2_talloc(PCRE2_SIZE to_alloc, UNUSED void *uctx) |
112 | | { |
113 | | return talloc_array(fr_pcre2_tls->alloc_ctx, uint8_t, to_alloc); |
114 | | } |
115 | | |
116 | | /** Talloc wrapper for pcre2 memory freeing |
117 | | * |
118 | | * @param[in] to_free Memory to free. |
119 | | * @param[in] uctx UNUSED. |
120 | | */ |
121 | | static void _pcre2_talloc_free(void *to_free, UNUSED void *uctx) |
122 | | { |
123 | | talloc_free(to_free); |
124 | | } |
125 | | |
126 | | /** Free thread local data |
127 | | * |
128 | | * @param[in] tls Thread local data to free. |
129 | | */ |
130 | | static int _pcre2_tls_free(fr_pcre2_tls_t *tls) |
131 | | { |
132 | | if (tls->gcontext) pcre2_general_context_free(tls->gcontext); |
133 | | if (tls->ccontext) pcre2_compile_context_free(tls->ccontext); |
134 | | if (tls->mcontext) pcre2_match_context_free(tls->mcontext); |
135 | | #ifdef PCRE2_CONFIG_JIT |
136 | | if (tls->jit_stack) pcre2_jit_stack_free(tls->jit_stack); |
137 | | #endif |
138 | | |
139 | | return 0; |
140 | | } |
141 | | |
142 | | static int _pcre2_tls_free_on_exit(void *arg) |
143 | | { |
144 | | return talloc_free(arg); |
145 | | } |
146 | | |
147 | | /** Thread local init for pcre2 |
148 | | * |
149 | | */ |
150 | | static int fr_pcre2_tls_init(void) |
151 | | { |
152 | | fr_pcre2_tls_t *tls; |
153 | | |
154 | | if (unlikely(fr_pcre2_tls != NULL)) return 0; |
155 | | |
156 | | fr_pcre2_tls = tls = talloc_zero(NULL, fr_pcre2_tls_t); |
157 | | if (!tls) return -1; |
158 | | talloc_set_destructor(tls, _pcre2_tls_free); |
159 | | |
160 | | tls->gcontext = pcre2_general_context_create(_pcre2_talloc, _pcre2_talloc_free, NULL); |
161 | | if (!tls->gcontext) { |
162 | | fr_strerror_const("Failed allocating general context"); |
163 | | return -1; |
164 | | } |
165 | | |
166 | | tls->ccontext = pcre2_compile_context_create(tls->gcontext); |
167 | | if (!tls->ccontext) { |
168 | | fr_strerror_const("Failed allocating compile context"); |
169 | | error: |
170 | | fr_pcre2_tls = NULL; |
171 | | _pcre2_tls_free(tls); |
172 | | return -1; |
173 | | } |
174 | | |
175 | | tls->mcontext = pcre2_match_context_create(tls->gcontext); |
176 | | if (!tls->mcontext) { |
177 | | fr_strerror_const("Failed allocating match context"); |
178 | | goto error; |
179 | | } |
180 | | |
181 | | #ifdef PCRE2_CONFIG_JIT |
182 | | pcre2_config(PCRE2_CONFIG_JIT, &tls->do_jit); |
183 | | if (tls->do_jit) { |
184 | | tls->jit_stack = pcre2_jit_stack_create(FR_PCRE_JIT_STACK_MIN, FR_PCRE_JIT_STACK_MAX, tls->gcontext); |
185 | | if (!tls->jit_stack) { |
186 | | fr_strerror_const("Failed allocating JIT stack"); |
187 | | goto error; |
188 | | } |
189 | | pcre2_jit_stack_assign(tls->mcontext, NULL, tls->jit_stack); |
190 | | } |
191 | | #endif |
192 | | |
193 | | /* |
194 | | * Free on thread exit |
195 | | */ |
196 | | fr_atexit_thread_local(fr_pcre2_tls, _pcre2_tls_free_on_exit, tls); |
197 | | fr_pcre2_tls = tls; /* Assign to thread local storage */ |
198 | | |
199 | | return 0; |
200 | | } |
201 | | |
202 | | /** Free regex_t structure |
203 | | * |
204 | | * Calls libpcre specific free functions for the expression and study. |
205 | | * |
206 | | * @param preg to free. |
207 | | */ |
208 | | static int _regex_free(regex_t *preg) |
209 | | { |
210 | | if (preg->compiled) pcre2_code_free(preg->compiled); |
211 | | |
212 | | return 0; |
213 | | } |
214 | | |
215 | | /** Wrapper around pcre2_compile |
216 | | * |
217 | | * Allows the rest of the code to do compilations using one function signature. |
218 | | * |
219 | | * @note Compiled expression must be freed with talloc_free. |
220 | | * |
221 | | * @param[out] out Where to write out a pointer to the structure containing |
222 | | * the compiled expression. |
223 | | * @param[in] pattern to compile. |
224 | | * @param[in] len of pattern. |
225 | | * @param[in] flags controlling matching. May be NULL. |
226 | | * @param[in] subcaptures Whether to compile the regular expression to store subcapture |
227 | | * data. |
228 | | * @param[in] runtime If false run the pattern through the PCRE JIT (if available) |
229 | | * to convert it to machine code. This trades startup time (longer) |
230 | | * for runtime performance (better). |
231 | | * @return |
232 | | * - >= 1 on success. |
233 | | * - <= 0 on error. Negative value is offset of parse error. |
234 | | */ |
235 | | ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len, |
236 | | fr_regex_flags_t const *flags, bool subcaptures, bool runtime) |
237 | | { |
238 | | int ret; |
239 | | PCRE2_SIZE offset; |
240 | | uint32_t cflags = 0; |
241 | | regex_t *preg; |
242 | | |
243 | | /* |
244 | | * Check inputs |
245 | | */ |
246 | | *out = NULL; |
247 | | |
248 | | /* |
249 | | * Thread local initialisation |
250 | | */ |
251 | | if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1; |
252 | | |
253 | | if (len == 0) { |
254 | | fr_strerror_const("Empty expression"); |
255 | | return 0; |
256 | | } |
257 | | |
258 | | /* |
259 | | * Options |
260 | | */ |
261 | | if (flags) { |
262 | | /* flags->global implemented by substitution function */ |
263 | | if (flags->ignore_case) cflags |= PCRE2_CASELESS; |
264 | | if (flags->multiline) cflags |= PCRE2_MULTILINE; |
265 | | if (flags->dot_all) cflags |= PCRE2_DOTALL; |
266 | | if (flags->unicode) cflags |= PCRE2_UTF; |
267 | | if (flags->extended) cflags |= PCRE2_EXTENDED; |
268 | | } |
269 | | |
270 | | if (!subcaptures) cflags |= PCRE2_NO_AUTO_CAPTURE; |
271 | | |
272 | | preg = talloc_zero(ctx, regex_t); |
273 | | talloc_set_destructor(preg, _regex_free); |
274 | | |
275 | | preg->compiled = pcre2_compile((PCRE2_SPTR8)pattern, len, |
276 | | cflags, &ret, &offset, fr_pcre2_tls->ccontext); |
277 | | if (!preg->compiled) { |
278 | | PCRE2_UCHAR errbuff[128]; |
279 | | |
280 | | pcre2_get_error_message(ret, errbuff, sizeof(errbuff)); |
281 | | fr_strerror_printf("%s", (char *)errbuff); |
282 | | talloc_free(preg); |
283 | | |
284 | | return -(ssize_t)offset; |
285 | | } |
286 | | |
287 | | if (!runtime) { |
288 | | preg->precompiled = true; |
289 | | |
290 | | #ifdef PCRE2_CONFIG_JIT |
291 | | /* |
292 | | * This is expensive, so only do it for |
293 | | * expressions that are going to be |
294 | | * evaluated repeatedly. |
295 | | */ |
296 | | if (fr_pcre2_tls->do_jit) { |
297 | | ret = pcre2_jit_compile(preg->compiled, PCRE2_JIT_COMPLETE); |
298 | | if (ret < 0) { |
299 | | PCRE2_UCHAR errbuff[128]; |
300 | | |
301 | | pcre2_get_error_message(ret, errbuff, sizeof(errbuff)); |
302 | | fr_strerror_printf("Pattern JIT failed: %s", (char *)errbuff); |
303 | | talloc_free(preg); |
304 | | |
305 | | return 0; |
306 | | } |
307 | | preg->jitd = true; |
308 | | } |
309 | | #endif |
310 | | } |
311 | | |
312 | | *out = preg; |
313 | | |
314 | | return len; |
315 | | } |
316 | | |
317 | | /** Wrapper around pcre2_exec |
318 | | * |
319 | | * @param[in] preg The compiled expression. |
320 | | * @param[in] subject to match. |
321 | | * @param[in] len Length of subject. |
322 | | * @param[in] regmatch Array of match pointers. |
323 | | * @return |
324 | | * - -1 on failure. |
325 | | * - 0 on no match. |
326 | | * - 1 on match. |
327 | | */ |
328 | | int regex_exec(regex_t *preg, char const *subject, size_t len, fr_regmatch_t *regmatch) |
329 | | { |
330 | | int ret; |
331 | | uint32_t options = 0; |
332 | | |
333 | | char *our_subject = NULL; |
334 | | bool dup_subject = true; |
335 | | pcre2_match_data *match_data; |
336 | | |
337 | | /* |
338 | | * Thread local initialisation |
339 | | */ |
340 | | if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1; |
341 | | |
342 | | if (regmatch) { |
343 | | #ifdef PCRE2_COPY_MATCHED_SUBJECT |
344 | | /* |
345 | | * This is apparently only supported for pcre2_match |
346 | | * NOT pcre2_jit_match. |
347 | | */ |
348 | | # ifdef PCRE2_CONFIG_JIT |
349 | | if (!preg->jitd) { |
350 | | # endif |
351 | | dup_subject = false; |
352 | | |
353 | | /* |
354 | | * If PCRE2_COPY_MATCHED_SUBJECT is available |
355 | | * and set as an options flag, pcre2_match will |
356 | | * strdup the subject string if pcre2_match is |
357 | | * successful and store a pointer to it in the |
358 | | * regmatch struct. |
359 | | * |
360 | | * The lifetime of the string memory will be |
361 | | * bound to the regmatch struct. This is more |
362 | | * efficient that doing it ourselves, as the |
363 | | * strdup only occurs if the subject matches. |
364 | | */ |
365 | | options |= PCRE2_COPY_MATCHED_SUBJECT; |
366 | | # ifdef PCRE2_CONFIG_JIT |
367 | | } |
368 | | # endif |
369 | | #endif |
370 | | if (dup_subject) { |
371 | | /* |
372 | | * We have to dup and operate on the duplicate |
373 | | * of the subject, because pcre2_jit_match and |
374 | | * pcre2_match store a pointer to the subject |
375 | | * in the regmatch structure. |
376 | | */ |
377 | | subject = our_subject = talloc_bstrndup(regmatch, subject, len); |
378 | | if (!subject) { |
379 | | fr_strerror_const("Out of memory"); |
380 | | return -1; |
381 | | } |
382 | | #ifndef NDEBUG |
383 | | regmatch->subject = subject; /* Stored only for tracking memory issues */ |
384 | | #endif |
385 | | } |
386 | | } |
387 | | |
388 | | /* |
389 | | * If we weren't given match data we |
390 | | * need to alloc it else pcre2_match |
391 | | * fails when passed NULL match data. |
392 | | */ |
393 | | if (!regmatch) { |
394 | | match_data = pcre2_match_data_create_from_pattern(preg->compiled, fr_pcre2_tls->gcontext); |
395 | | if (!match_data) { |
396 | | fr_strerror_const("Failed allocating temporary match data"); |
397 | | return -1; |
398 | | } |
399 | | } else { |
400 | | match_data = regmatch->match_data; |
401 | | } |
402 | | |
403 | | #ifdef PCRE2_CONFIG_JIT |
404 | | if (preg->jitd) { |
405 | | ret = pcre2_jit_match(preg->compiled, (PCRE2_SPTR8)subject, len, 0, options, |
406 | | match_data, fr_pcre2_tls->mcontext); |
407 | | } else |
408 | | #endif |
409 | | { |
410 | | ret = pcre2_match(preg->compiled, (PCRE2_SPTR8)subject, len, 0, options, |
411 | | match_data, fr_pcre2_tls->mcontext); |
412 | | } |
413 | | if (!regmatch) pcre2_match_data_free(match_data); |
414 | | if (ret < 0) { |
415 | | PCRE2_UCHAR errbuff[128]; |
416 | | |
417 | | if (dup_subject) talloc_free(our_subject); |
418 | | |
419 | | if (ret == PCRE2_ERROR_NOMATCH) { |
420 | | if (regmatch) regmatch->used = 0; |
421 | | return 0; |
422 | | } |
423 | | |
424 | | pcre2_get_error_message(ret, errbuff, sizeof(errbuff)); |
425 | | fr_strerror_printf("regex evaluation failed with code (%i): %s", ret, errbuff); |
426 | | |
427 | | return -1; |
428 | | } |
429 | | |
430 | | if (regmatch) regmatch->used = ret; |
431 | | |
432 | | return 1; |
433 | | } |
434 | | |
435 | | /** Wrapper around pcre2_substitute |
436 | | * |
437 | | * @param[in] ctx to allocate output string in. |
438 | | * @param[out] out Output string with replacements performed. |
439 | | * @param[in] max_out Maximum length of output buffer. If this is 0 then |
440 | | * the output length is unlimited. |
441 | | * @param[in] preg The compiled expression. |
442 | | * @param[in] flags that affect matching. |
443 | | * @param[in] subject to perform replacements on. |
444 | | * @param[in] subject_len the length of the subject. |
445 | | * @param[in] replacement replacement string containing substitution |
446 | | * markers. |
447 | | * @param[in] replacement_len Length of the replacement string. |
448 | | * @param[in] regmatch Array of match pointers. |
449 | | * @return |
450 | | * - >= 0 the length of the output string. |
451 | | * - < 0 on error. |
452 | | */ |
453 | | int regex_substitute(TALLOC_CTX *ctx, char **out, size_t max_out, regex_t *preg, fr_regex_flags_t const *flags, |
454 | | char const *subject, size_t subject_len, |
455 | | char const *replacement, size_t replacement_len, |
456 | | fr_regmatch_t *regmatch) |
457 | | { |
458 | | int ret; |
459 | | uint32_t options = 0; |
460 | | size_t buff_len, actual_len; |
461 | | char *buff; |
462 | | |
463 | | #ifndef PCRE2_COPY_MATCHED_SUBJECT |
464 | | char *our_subject = NULL; |
465 | | #endif |
466 | | |
467 | | /* |
468 | | * Thread local initialisation |
469 | | */ |
470 | | if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1; |
471 | | |
472 | | /* |
473 | | * Internally pcre2_substitute just calls pcre2_match to |
474 | | * generate the match data, so the same hack as the |
475 | | * regex_exec function above is required. |
476 | | */ |
477 | | if (regmatch) { |
478 | | #ifndef PCRE2_COPY_MATCHED_SUBJECT |
479 | | /* |
480 | | * We have to dup and operate on the duplicate |
481 | | * of the subject, because pcre2_jit_match and |
482 | | * pcre2_match store a pointer to the subject |
483 | | * in the regmatch structure. |
484 | | */ |
485 | | subject = our_subject = talloc_bstrndup(regmatch, subject, subject_len); |
486 | | if (!subject) { |
487 | | fr_strerror_const("Out of memory"); |
488 | | return -1; |
489 | | } |
490 | | #else |
491 | | /* |
492 | | * If PCRE2_COPY_MATCHED_SUBJECT is available |
493 | | * and set as an options flag, pcre2_match will |
494 | | * strdup the subject string if pcre2_match is |
495 | | * successful and store a pointer to it in the |
496 | | * regmatch struct. |
497 | | * |
498 | | * The lifetime of the string memory will be |
499 | | * bound to the regmatch struct. This is more |
500 | | * efficient that doing it ourselves, as the |
501 | | * strdup only occurs if the subject matches. |
502 | | */ |
503 | | options |= PCRE2_COPY_MATCHED_SUBJECT; |
504 | | #endif |
505 | | } |
506 | | |
507 | | /* |
508 | | * Guess (badly) what the length of the output buffer should be |
509 | | */ |
510 | | actual_len = buff_len = subject_len + 1; /* +1 for the \0 */ |
511 | | buff = talloc_array(ctx, char, buff_len); |
512 | | if (!buff) { |
513 | | #ifndef PCRE2_COPY_MATCHED_SUBJECT |
514 | | talloc_free(our_subject); |
515 | | #endif |
516 | | fr_strerror_const("Out of memory"); |
517 | | return -1; |
518 | | } |
519 | | |
520 | | options |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; |
521 | | if (flags->global) options |= PCRE2_SUBSTITUTE_GLOBAL; |
522 | | |
523 | | again: |
524 | | /* |
525 | | * actual_len input value should be the size of the |
526 | | * buffer including space for '\0'. |
527 | | * If input buffer is too small, then actual_len will be set |
528 | | * to the buffer space needed including space for '\0'. |
529 | | * If input buffer is the correct size, then actual_len |
530 | | * will be set to the size of the string written to buff |
531 | | * without the terminating '\0'. |
532 | | */ |
533 | | ret = pcre2_substitute(preg->compiled, |
534 | | (PCRE2_SPTR8)subject, (PCRE2_SIZE)subject_len, 0, |
535 | | options, NULL, fr_pcre2_tls->mcontext, |
536 | | (PCRE2_UCHAR const *)replacement, replacement_len, (PCRE2_UCHAR *)buff, &actual_len); |
537 | | |
538 | | if (ret < 0) { |
539 | | PCRE2_UCHAR errbuff[128]; |
540 | | |
541 | | #ifndef PCRE2_COPY_MATCHED_SUBJECT |
542 | | talloc_free(our_subject); |
543 | | #endif |
544 | | talloc_free(buff); |
545 | | |
546 | | if (ret == PCRE2_ERROR_NOMEMORY) { |
547 | | if ((max_out > 0) && (actual_len > max_out)) { |
548 | | fr_strerror_printf("String length with substitutions (%zu) " |
549 | | "exceeds max string length (%zu)", actual_len - 1, max_out - 1); |
550 | | return -1; |
551 | | } |
552 | | |
553 | | /* |
554 | | * Check that actual_len != buff_len as that'd be |
555 | | * an actual error. |
556 | | */ |
557 | | if (actual_len == buff_len) { |
558 | | fr_strerror_const("libpcre2 out of memory"); |
559 | | return -1; |
560 | | } |
561 | | buff_len = actual_len; /* The length we get passed back includes the \0 */ |
562 | | buff = talloc_array(ctx, char, buff_len); |
563 | | goto again; |
564 | | } |
565 | | |
566 | | if (ret == PCRE2_ERROR_NOMATCH) { |
567 | | if (regmatch) regmatch->used = 0; |
568 | | return 0; |
569 | | } |
570 | | |
571 | | pcre2_get_error_message(ret, errbuff, sizeof(errbuff)); |
572 | | fr_strerror_printf("regex evaluation failed with code (%i): %s", ret, errbuff); |
573 | | return -1; |
574 | | } |
575 | | |
576 | | /* |
577 | | * Trim the replacement buffer to the correct length |
578 | | * |
579 | | * buff_len includes \0. |
580 | | * ...and as pcre2_substitute just succeeded actual_len does not include \0. |
581 | | */ |
582 | | if (actual_len < (buff_len - 1)) { |
583 | | buff = talloc_bstr_realloc(ctx, buff, actual_len); |
584 | | if (!buff) { |
585 | | fr_strerror_const("reallocing pcre2_substitute result buffer failed"); |
586 | | return -1; |
587 | | } |
588 | | } |
589 | | |
590 | | if (regmatch) regmatch->used = ret; |
591 | | *out = buff; |
592 | | |
593 | | return 1; |
594 | | } |
595 | | |
596 | | |
597 | | /** Returns the number of subcapture groups |
598 | | * |
599 | | * @return |
600 | | * - >0 The number of subcaptures contained within the pattern |
601 | | * - 0 if the number of subcaptures can't be determined. |
602 | | */ |
603 | | uint32_t regex_subcapture_count(regex_t const *preg) |
604 | | { |
605 | | uint32_t count; |
606 | | |
607 | | if (pcre2_pattern_info(preg->compiled, PCRE2_INFO_CAPTURECOUNT, &count) != 0) { |
608 | | fr_strerror_const("Error determining subcapture group count"); |
609 | | return 0; |
610 | | } |
611 | | |
612 | | return count + 1; |
613 | | } |
614 | | |
615 | | /** Free libpcre2's matchdata |
616 | | * |
617 | | * @note Don't call directly, will be called if talloc_free is called on a #regmatch_t. |
618 | | */ |
619 | | static int _pcre2_match_data_free(fr_regmatch_t *regmatch) |
620 | | { |
621 | | pcre2_match_data_free(regmatch->match_data); |
622 | | return 0; |
623 | | } |
624 | | |
625 | | /** Allocate vectors to fill with match data |
626 | | * |
627 | | * @param[in] ctx to allocate match vectors in. |
628 | | * @param[in] count The number of vectors to allocate. |
629 | | * @return |
630 | | * - NULL on error. |
631 | | * - Array of match vectors. |
632 | | */ |
633 | | fr_regmatch_t *regex_match_data_alloc(TALLOC_CTX *ctx, uint32_t count) |
634 | | { |
635 | | fr_regmatch_t *regmatch; |
636 | | |
637 | | /* |
638 | | * Thread local initialisation |
639 | | */ |
640 | | if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return NULL; |
641 | | |
642 | | regmatch = talloc(ctx, fr_regmatch_t); |
643 | | if (!regmatch) { |
644 | | oom: |
645 | | fr_strerror_const("Out of memory"); |
646 | | return NULL; |
647 | | } |
648 | | |
649 | | regmatch->match_data = pcre2_match_data_create(count, fr_pcre2_tls->gcontext); |
650 | | if (!regmatch->match_data) { |
651 | | talloc_free(regmatch); |
652 | | goto oom; |
653 | | } |
654 | | talloc_set_type(regmatch->match_data, pcre2_match_data); |
655 | | |
656 | | talloc_set_destructor(regmatch, _pcre2_match_data_free); |
657 | | |
658 | | return regmatch; |
659 | | } |
660 | | |
661 | | /* |
662 | | *###################################### |
663 | | *# FUNCTIONS FOR POSIX-REGEX # |
664 | | *###################################### |
665 | | */ |
666 | | # else |
667 | | /* |
668 | | * Wrapper functions for POSIX like, and extended regular |
669 | | * expressions. These use the system regex library. |
670 | | */ |
671 | | |
672 | | /** Free heap allocated regex_t structure |
673 | | * |
674 | | * Heap allocation of regex_t is needed so regex_compile has the same signature with |
675 | | * POSIX or libpcre. |
676 | | * |
677 | | * @param preg to free. |
678 | | */ |
679 | | static int _regex_free(regex_t *preg) |
680 | 0 | { |
681 | 0 | regfree(preg); |
682 | |
|
683 | 0 | return 0; |
684 | 0 | } |
685 | | |
686 | | /** Binary safe wrapper around regcomp |
687 | | * |
688 | | * If we have the BSD extensions we don't need to do any special work |
689 | | * if we don't have the BSD extensions we need to check to see if the |
690 | | * regular expression contains any \0 bytes. |
691 | | * |
692 | | * If it does we fail and print the appropriate error message. |
693 | | * |
694 | | * @note Compiled expression must be freed with talloc_free. |
695 | | * |
696 | | * @param[in] ctx To allocate memory in. |
697 | | * @param[out] out Where to write out a pointer |
698 | | * to the structure containing the compiled expression. |
699 | | * @param[in] pattern to compile. |
700 | | * @param[in] len of pattern. |
701 | | * @param[in] flags controlling matching. May be NULL. |
702 | | * @param[in] subcaptures Whether to compile the regular expression |
703 | | * to store subcapture data. |
704 | | * @param[in] runtime Whether the compilation is being done at runtime. |
705 | | * @return |
706 | | * - >= 1 on success. |
707 | | * - <= 0 on error. Negative value is offset of parse error. |
708 | | * With POSIX regex we only give the correct offset for embedded \0 errors. |
709 | | */ |
710 | | ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len, |
711 | | fr_regex_flags_t const *flags, bool subcaptures, UNUSED bool runtime) |
712 | 0 | { |
713 | 0 | int ret; |
714 | 0 | int cflags = REG_EXTENDED; |
715 | 0 | regex_t *preg; |
716 | |
|
717 | 0 | if (len == 0) { |
718 | 0 | fr_strerror_const("Empty expression"); |
719 | 0 | return 0; |
720 | 0 | } |
721 | | |
722 | | /* |
723 | | * Options |
724 | | */ |
725 | 0 | if (flags) { |
726 | 0 | if (flags->global) { |
727 | 0 | fr_strerror_const("g - Global matching/substitution not supported with posix-regex"); |
728 | 0 | return 0; |
729 | 0 | } |
730 | 0 | if (flags->dot_all) { |
731 | 0 | fr_strerror_const("s - Single line matching is not supported with posix-regex"); |
732 | 0 | return 0; |
733 | 0 | } |
734 | 0 | if (flags->unicode) { |
735 | 0 | fr_strerror_const("u - Unicode matching not supported with posix-regex"); |
736 | 0 | return 0; |
737 | 0 | } |
738 | 0 | if (flags->extended) { |
739 | 0 | fr_strerror_const("x - Whitespace and comments not supported with posix-regex"); |
740 | 0 | return 0; |
741 | 0 | } |
742 | | |
743 | 0 | if (flags->ignore_case) cflags |= REG_ICASE; |
744 | 0 | if (flags->multiline) cflags |= REG_NEWLINE; |
745 | 0 | } |
746 | | |
747 | | |
748 | 0 | if (!subcaptures) cflags |= REG_NOSUB; |
749 | |
|
750 | 0 | #ifndef HAVE_REGNCOMP |
751 | 0 | { |
752 | 0 | char const *p; |
753 | |
|
754 | 0 | p = pattern; |
755 | 0 | p += strlen(pattern); |
756 | |
|
757 | 0 | if ((size_t)(p - pattern) != len) { |
758 | 0 | fr_strerror_printf("Found null in pattern at offset %zu. Pattern unsafe for compilation", |
759 | 0 | (p - pattern)); |
760 | 0 | return -(p - pattern); |
761 | 0 | } |
762 | | |
763 | 0 | preg = talloc_zero(ctx, regex_t); |
764 | 0 | if (!preg) return 0; |
765 | | |
766 | 0 | ret = regcomp(preg, pattern, cflags); |
767 | 0 | } |
768 | | #else |
769 | | preg = talloc_zero(ctx, regex_t); |
770 | | if (!preg) return 0; |
771 | | ret = regncomp(preg, pattern, len, cflags); |
772 | | #endif |
773 | 0 | if (ret != 0) { |
774 | 0 | char errbuf[128]; |
775 | |
|
776 | 0 | regerror(ret, preg, errbuf, sizeof(errbuf)); |
777 | 0 | fr_strerror_printf("%s", errbuf); |
778 | |
|
779 | 0 | talloc_free(preg); |
780 | |
|
781 | 0 | return 0; /* POSIX expressions don't give us the failure offset */ |
782 | 0 | } |
783 | | |
784 | 0 | talloc_set_destructor(preg, _regex_free); |
785 | 0 | *out = preg; |
786 | |
|
787 | 0 | return len; |
788 | 0 | } |
789 | | |
790 | | /** Binary safe wrapper around regexec |
791 | | * |
792 | | * If we have the BSD extensions we don't need to do any special work |
793 | | * If we don't have the BSD extensions we need to check to see if the |
794 | | * value to be compared contains any \0 bytes. |
795 | | * |
796 | | * If it does, we fail and print the appropriate error message. |
797 | | * |
798 | | * @param[in] preg The compiled expression. |
799 | | * @param[in] subject to match. |
800 | | * @param[in] regmatch Match result structure. |
801 | | * @return |
802 | | * - -1 on failure. |
803 | | * - 0 on no match. |
804 | | * - 1 on match. |
805 | | */ |
806 | | int regex_exec(regex_t *preg, char const *subject, size_t len, fr_regmatch_t *regmatch) |
807 | 0 | { |
808 | 0 | int ret; |
809 | 0 | size_t matches; |
810 | | |
811 | | /* |
812 | | * Disable capturing |
813 | | */ |
814 | 0 | if (!regmatch) { |
815 | 0 | matches = 0; |
816 | 0 | } else { |
817 | 0 | matches = regmatch->allocd; |
818 | | |
819 | | /* |
820 | | * Reset the match result structure |
821 | | */ |
822 | 0 | memset(regmatch->match_data, 0, sizeof(regmatch->match_data[0]) * matches); |
823 | 0 | regmatch->used = 0; |
824 | 0 | } |
825 | |
|
826 | 0 | #ifndef HAVE_REGNEXEC |
827 | 0 | { |
828 | 0 | char const *p; |
829 | |
|
830 | 0 | p = subject; |
831 | 0 | p += strlen(subject); |
832 | |
|
833 | 0 | if ((size_t)(p - subject) != len) { |
834 | 0 | fr_strerror_printf("Found null in subject at offset %zu. String unsafe for evaluation", |
835 | 0 | (p - subject)); |
836 | 0 | if (regmatch) regmatch->used = 0; |
837 | 0 | return -1; |
838 | 0 | } |
839 | 0 | ret = regexec(preg, subject, matches, regmatch ? regmatch->match_data : NULL, 0); |
840 | 0 | } |
841 | | #else |
842 | | ret = regnexec(preg, subject, len, matches, regmatch ? regmatch->match_data : NULL, 0); |
843 | | #endif |
844 | 0 | if (ret != 0) { |
845 | 0 | if (ret != REG_NOMATCH) { |
846 | 0 | char errbuf[128]; |
847 | |
|
848 | 0 | regerror(ret, preg, errbuf, sizeof(errbuf)); |
849 | |
|
850 | 0 | fr_strerror_printf("regex evaluation failed: %s", errbuf); |
851 | 0 | return -1; |
852 | 0 | } |
853 | 0 | return 0; |
854 | 0 | } |
855 | | |
856 | | /* |
857 | | * Update regmatch->count to be the maximum number of |
858 | | * groups that *could* have been populated as we don't |
859 | | * have the number of matches. |
860 | | */ |
861 | 0 | if (regmatch) { |
862 | 0 | regmatch->used = preg->re_nsub + 1; |
863 | |
|
864 | 0 | if (regmatch->subject) talloc_const_free(regmatch->subject); |
865 | 0 | regmatch->subject = talloc_bstrndup(regmatch, subject, len); |
866 | 0 | if (!regmatch->subject) { |
867 | 0 | fr_strerror_const("Out of memory"); |
868 | 0 | return -1; |
869 | 0 | } |
870 | 0 | } |
871 | 0 | return 1; |
872 | 0 | } |
873 | | |
874 | | /** Returns the number of subcapture groups |
875 | | * |
876 | | * @return |
877 | | * - 0 we can't determine this for POSIX regular expressions. |
878 | | */ |
879 | | uint32_t regex_subcapture_count(UNUSED regex_t const *preg) |
880 | 0 | { |
881 | 0 | return 0; |
882 | 0 | } |
883 | | # endif |
884 | | |
885 | | # if defined(HAVE_REGEX_POSIX) |
886 | | /** Allocate vectors to fill with match data |
887 | | * |
888 | | * @param[in] ctx to allocate match vectors in. |
889 | | * @param[in] count The number of vectors to allocate. |
890 | | * @return |
891 | | * - NULL on error. |
892 | | * - Array of match vectors. |
893 | | */ |
894 | | fr_regmatch_t *regex_match_data_alloc(TALLOC_CTX *ctx, uint32_t count) |
895 | 0 | { |
896 | 0 | fr_regmatch_t *regmatch; |
897 | | |
898 | | /* |
899 | | * Pre-allocate space for the match structure |
900 | | * and for a 128b subject string. |
901 | | */ |
902 | 0 | regmatch = talloc_zero_pooled_object(ctx, fr_regmatch_t, 2, (sizeof(regmatch_t) * count) + 128); |
903 | 0 | if (unlikely(!regmatch)) { |
904 | 0 | error: |
905 | 0 | fr_strerror_const("Out of memory"); |
906 | 0 | talloc_free(regmatch); |
907 | 0 | return NULL; |
908 | 0 | } |
909 | 0 | regmatch->match_data = talloc_array(regmatch, regmatch_t, count); |
910 | 0 | if (unlikely(!regmatch->match_data)) goto error; |
911 | | |
912 | 0 | regmatch->allocd = count; |
913 | 0 | regmatch->used = 0; |
914 | 0 | regmatch->subject = NULL; |
915 | |
|
916 | 0 | return regmatch; |
917 | 0 | } |
918 | | # endif |
919 | | |
920 | | /* |
921 | | *######################################## |
922 | | *# UNIVERSAL FUNCTIONS # |
923 | | *######################################## |
924 | | */ |
925 | | |
926 | | /** Parse a string containing one or more regex flags |
927 | | * |
928 | | * @param[out] err May be NULL. If not NULL will be set to: |
929 | | * - 0 on success. |
930 | | * - -1 on unknown flag. |
931 | | * - -2 on duplicate. |
932 | | * @param[out] out Flag structure to populate. Must be initialised to zero |
933 | | * if this is the first call to regex_flags_parse. |
934 | | * @param[in] in Flag string to parse. |
935 | | * @param[in] terminals Terminal characters. If parsing ends before the buffer |
936 | | * is exhausted, and is pointing to one of these chars |
937 | | * it's not considered an error. |
938 | | * @param[in] err_on_dup Error if the flag is already set. |
939 | | * @return |
940 | | * - > 0 on success. The number of flag bytes parsed. |
941 | | * - <= 0 on failure. Negative offset of first unrecognised flag. |
942 | | */ |
943 | | fr_slen_t regex_flags_parse(int *err, fr_regex_flags_t *out, fr_sbuff_t *in, |
944 | | fr_sbuff_term_t const *terminals, bool err_on_dup) |
945 | 0 | { |
946 | 0 | fr_sbuff_t our_in = FR_SBUFF(in); |
947 | |
|
948 | 0 | if (err) *err = 0; |
949 | |
|
950 | 0 | while (fr_sbuff_extend(&our_in)) { |
951 | 0 | switch (*our_in.p) { |
952 | 0 | #define DO_REGEX_FLAG(_f, _c) \ |
953 | 0 | case _c: \ |
954 | 0 | if (err_on_dup && out->_f) { \ |
955 | 0 | fr_strerror_printf("Duplicate regex flag '%c'", *our_in.p); \ |
956 | 0 | if (err) *err = -2; \ |
957 | 0 | FR_SBUFF_ERROR_RETURN(&our_in); \ |
958 | 0 | } \ |
959 | 0 | out->_f = 1; \ |
960 | 0 | break |
961 | | |
962 | 0 | DO_REGEX_FLAG(global, 'g'); |
963 | 0 | DO_REGEX_FLAG(ignore_case, 'i'); |
964 | 0 | DO_REGEX_FLAG(multiline, 'm'); |
965 | 0 | DO_REGEX_FLAG(dot_all, 's'); |
966 | 0 | DO_REGEX_FLAG(unicode, 'u'); |
967 | 0 | DO_REGEX_FLAG(extended, 'x'); |
968 | 0 | #undef DO_REGEX_FLAG |
969 | | |
970 | 0 | default: |
971 | 0 | if (fr_sbuff_is_terminal(&our_in, terminals)) FR_SBUFF_SET_RETURN(in, &our_in); |
972 | | |
973 | 0 | fr_strerror_printf("Unsupported regex flag '%c'", *our_in.p); |
974 | 0 | if (err) *err = -1; |
975 | 0 | FR_SBUFF_ERROR_RETURN(&our_in); |
976 | 0 | } |
977 | 0 | fr_sbuff_advance(&our_in, 1); |
978 | 0 | } |
979 | 0 | FR_SBUFF_SET_RETURN(in, &our_in); |
980 | 0 | } |
981 | | |
982 | | /** Print the flags |
983 | | * |
984 | | * @param[out] sbuff where to write flags. |
985 | | * @param[in] flags to print. |
986 | | * @return |
987 | | * - The number of bytes written to the out buffer. |
988 | | * - A number >= outlen if truncation has occurred. |
989 | | */ |
990 | | ssize_t regex_flags_print(fr_sbuff_t *sbuff, fr_regex_flags_t const *flags) |
991 | 0 | { |
992 | 0 | fr_sbuff_t our_sbuff = FR_SBUFF(sbuff); |
993 | |
|
994 | 0 | #define DO_REGEX_FLAG(_f, _c) \ |
995 | 0 | if (flags->_f) FR_SBUFF_IN_CHAR_RETURN(&our_sbuff, _c) |
996 | |
|
997 | 0 | DO_REGEX_FLAG(global, 'g'); |
998 | 0 | DO_REGEX_FLAG(ignore_case, 'i'); |
999 | 0 | DO_REGEX_FLAG(multiline, 'm'); |
1000 | 0 | DO_REGEX_FLAG(dot_all, 's'); |
1001 | 0 | DO_REGEX_FLAG(unicode, 'u'); |
1002 | 0 | DO_REGEX_FLAG(extended, 'x'); |
1003 | 0 | #undef DO_REGEX_FLAG |
1004 | | |
1005 | 0 | FR_SBUFF_SET_RETURN(sbuff, &our_sbuff); |
1006 | 0 | } |
1007 | | #endif |
1008 | | |
1009 | | /** Compare two boxes using an operator |
1010 | | * |
1011 | | * @todo - allow /foo/i on the RHS |
1012 | | * |
1013 | | * However, this involves allocating intermediate sbuffs for the |
1014 | | * unescaped RHS, and all kinds of extra work. It's not overly hard, |
1015 | | * but it's something we wish to avoid for now. |
1016 | | * |
1017 | | * @param[in] op to use in comparison. MUST be T_OP_REG_EQ or T_OP_REG_NE |
1018 | | * @param[in] a Value to compare, MUST be FR_TYPE_STRING |
1019 | | * @param[in] b uncompiled regex as FR_TYPE_STRING |
1020 | | * @return |
1021 | | * - 1 if true |
1022 | | * - 0 if false |
1023 | | * - -1 on failure. |
1024 | | */ |
1025 | | int fr_regex_cmp_op(fr_token_t op, fr_value_box_t const *a, fr_value_box_t const *b) |
1026 | 0 | { |
1027 | 0 | int rcode; |
1028 | 0 | TALLOC_CTX *ctx = NULL; |
1029 | 0 | size_t lhs_len; |
1030 | 0 | char const *lhs; |
1031 | 0 | regex_t *regex = NULL; |
1032 | |
|
1033 | 0 | if (!((op == T_OP_REG_EQ) || (op == T_OP_REG_NE))) { |
1034 | 0 | fr_strerror_const("Invalid operator for regex comparison"); |
1035 | 0 | return -1; |
1036 | 0 | } |
1037 | | |
1038 | 0 | if (b->type != FR_TYPE_STRING) { |
1039 | 0 | fr_strerror_const("RHS must be regular expression"); |
1040 | 0 | return -1; |
1041 | 0 | } |
1042 | | |
1043 | 0 | ctx = talloc_init_const("regex_cmp_op"); |
1044 | 0 | if (!ctx) return -1; |
1045 | | |
1046 | 0 | if ((a->type != FR_TYPE_STRING) && (a->type != FR_TYPE_OCTETS)) { |
1047 | 0 | fr_slen_t slen; |
1048 | 0 | char *p; |
1049 | |
|
1050 | 0 | slen = fr_value_box_aprint(ctx, &p, a, NULL); /* no escaping */ |
1051 | 0 | if (slen < 0) return slen; |
1052 | | |
1053 | 0 | lhs = p; |
1054 | 0 | lhs_len = slen; |
1055 | |
|
1056 | 0 | } else { |
1057 | 0 | lhs = a->vb_strvalue; |
1058 | 0 | lhs_len = a->vb_length; |
1059 | 0 | } |
1060 | | |
1061 | 0 | if (regex_compile(ctx, ®ex, b->vb_strvalue, b->vb_length, NULL, false, true) < 0) { |
1062 | 0 | talloc_free(ctx); |
1063 | 0 | return -1; |
1064 | 0 | } |
1065 | | |
1066 | | #ifdef STATIC_ANALYZER |
1067 | | if (!regex) { |
1068 | | talloc_free(ctx); |
1069 | | return -1; |
1070 | | } |
1071 | | #endif |
1072 | | |
1073 | 0 | rcode = regex_exec(regex, lhs, lhs_len, NULL); |
1074 | 0 | talloc_free(ctx); |
1075 | 0 | if (rcode < 0) return rcode; |
1076 | | |
1077 | | /* |
1078 | | * Invert the sense of the rcode for !~ |
1079 | | */ |
1080 | 0 | if (op == T_OP_REG_NE) rcode = (rcode == 0); |
1081 | |
|
1082 | 0 | return rcode; |
1083 | 0 | } |