Coverage Report

Created: 2026-02-26 06:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/freeradius-server/src/lib/util/regex.c
Line
Count
Source
1
/*
2
 *   This program is free software; you can redistribute it and/or modify
3
 *   it under the terms of the GNU General Public License as published by
4
 *   the Free Software Foundation; either version 2 of the License, or
5
 *   (at your option) any later version.
6
 *
7
 *   This program is distributed in the hope that it will be useful,
8
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 *   GNU General Public License for more details.
11
 *
12
 *   You should have received a copy of the GNU General Public License
13
 *   along with this program; if not, write to the Free Software
14
 *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
15
 */
16
17
/** Wrappers around various regular expression libraries
18
 *
19
 * @file src/lib/util/regex.c
20
 *
21
 * @copyright 2014 The FreeRADIUS server project
22
 * @copyright 2014 Arran Cudbard-Bell (a.cudbardb@freeradius.org)
23
 */
24
RCSID("$Id: ecec9010400c9be79a070b6c6380a57e34b0e64e $")
25
26
#ifdef HAVE_REGEX
27
28
#include <freeradius-devel/util/regex.h>
29
#include <freeradius-devel/util/atexit.h>
30
31
#if defined(HAVE_REGEX_PCRE2) && defined(PCRE2_CONFIG_JIT)
32
#ifndef FR_PCRE_JIT_STACK_MIN
33
#  define FR_PCRE_JIT_STACK_MIN (128 * 1024)
34
#endif
35
#ifndef FR_PCRE_JIT_STACK_MAX
36
#  define FR_PCRE_JIT_STACK_MAX (512 * 1024)
37
#endif
38
#endif
39
40
const fr_sbuff_escape_rules_t regex_escape_rules = {
41
  .name = "regex",
42
  .chr = '\\',
43
  .subs = {
44
    ['$'] = '$',
45
    ['('] = '(',
46
    ['*'] = '*',
47
    ['+'] = '+',
48
    ['.'] = '.',
49
    ['/'] = '/',
50
    ['?'] = '?',
51
    ['['] = '[',
52
    ['\\'] = '\\',
53
    ['^'] = '^',
54
    ['`'] = '`',
55
    ['|'] = '|',
56
    ['\a'] = 'a',
57
    ['\b'] = 'b',
58
    ['\n'] = 'n',
59
    ['\r'] = 'r',
60
    ['\t'] = 't',
61
    ['\v'] = 'v'
62
  },
63
  .esc = {
64
    SBUFF_CHAR_UNPRINTABLES_LOW,
65
    SBUFF_CHAR_UNPRINTABLES_EXTENDED
66
  },
67
  .do_utf8 = true,
68
  .do_oct = true
69
};
70
71
72
/*
73
 *######################################
74
 *#      FUNCTIONS FOR LIBPCRE2        #
75
 *######################################
76
 */
77
#ifdef HAVE_REGEX_PCRE2
78
/*
79
 *  Wrapper functions for libpcre2. Much more powerful, and guaranteed
80
 *  to be binary safe for both patterns and subjects but require
81
 *  libpcre2.
82
 */
83
84
/** Thread local storage for PCRE2
85
 *
86
 * Not all this storage is thread local, but it simplifies cleanup if
87
 * we bind its lifetime to the thread, and lets us get away with not
88
 * having specific init/free functions.
89
 */
90
typedef struct {
91
  TALLOC_CTX    *alloc_ctx; //!< Context used for any allocations.
92
  pcre2_general_context *gcontext;  //!< General context.
93
  pcre2_compile_context *ccontext;  //!< Compile context.
94
  pcre2_match_context *mcontext;  //!< Match context.
95
#ifdef PCRE2_CONFIG_JIT
96
  pcre2_jit_stack   *jit_stack; //!< Jit stack for executing jit'd patterns.
97
  bool      do_jit;   //!< Whether we have runtime JIT support.
98
#endif
99
} fr_pcre2_tls_t;
100
101
/** Thread local storage for pcre2
102
 *
103
 */
104
static _Thread_local fr_pcre2_tls_t *fr_pcre2_tls;
105
106
/** Talloc wrapper for pcre2 memory allocation
107
 *
108
 * @param[in] to_alloc    How many bytes to alloc.
109
 * @param[in] uctx    UNUSED.
110
 */
111
static void *_pcre2_talloc(PCRE2_SIZE to_alloc, UNUSED void *uctx)
112
{
113
  return talloc_array(fr_pcre2_tls->alloc_ctx, uint8_t, to_alloc);
114
}
115
116
/** Talloc wrapper for pcre2 memory freeing
117
 *
118
 * @param[in] to_free   Memory to free.
119
 * @param[in] uctx    UNUSED.
120
 */
121
static void _pcre2_talloc_free(void *to_free, UNUSED void *uctx)
122
{
123
  talloc_free(to_free);
124
}
125
126
/** Free thread local data
127
 *
128
 * @param[in] tls Thread local data to free.
129
 */
130
static int _pcre2_tls_free(fr_pcre2_tls_t *tls)
131
{
132
  if (tls->gcontext) pcre2_general_context_free(tls->gcontext);
133
  if (tls->ccontext) pcre2_compile_context_free(tls->ccontext);
134
  if (tls->mcontext) pcre2_match_context_free(tls->mcontext);
135
#ifdef PCRE2_CONFIG_JIT
136
  if (tls->jit_stack) pcre2_jit_stack_free(tls->jit_stack);
137
#endif
138
139
  return 0;
140
}
141
142
static int _pcre2_tls_free_on_exit(void *arg)
143
{
144
  return talloc_free(arg);
145
}
146
147
/** Thread local init for pcre2
148
 *
149
 */
150
static int fr_pcre2_tls_init(void)
151
{
152
  fr_pcre2_tls_t *tls;
153
154
  if (unlikely(fr_pcre2_tls != NULL)) return 0;
155
156
  fr_pcre2_tls = tls = talloc_zero(NULL, fr_pcre2_tls_t);
157
  if (!tls) return -1;
158
  talloc_set_destructor(tls, _pcre2_tls_free);
159
160
  tls->gcontext = pcre2_general_context_create(_pcre2_talloc, _pcre2_talloc_free, NULL);
161
  if (!tls->gcontext) {
162
    fr_strerror_const("Failed allocating general context");
163
    return -1;
164
  }
165
166
  tls->ccontext = pcre2_compile_context_create(tls->gcontext);
167
  if (!tls->ccontext) {
168
    fr_strerror_const("Failed allocating compile context");
169
  error:
170
    fr_pcre2_tls = NULL;
171
    _pcre2_tls_free(tls);
172
    return -1;
173
  }
174
175
  tls->mcontext = pcre2_match_context_create(tls->gcontext);
176
  if (!tls->mcontext) {
177
    fr_strerror_const("Failed allocating match context");
178
    goto error;
179
  }
180
181
#ifdef PCRE2_CONFIG_JIT
182
  pcre2_config(PCRE2_CONFIG_JIT, &tls->do_jit);
183
  if (tls->do_jit) {
184
    tls->jit_stack = pcre2_jit_stack_create(FR_PCRE_JIT_STACK_MIN, FR_PCRE_JIT_STACK_MAX, tls->gcontext);
185
    if (!tls->jit_stack) {
186
      fr_strerror_const("Failed allocating JIT stack");
187
      goto error;
188
    }
189
    pcre2_jit_stack_assign(tls->mcontext, NULL, tls->jit_stack);
190
  }
191
#endif
192
193
  /*
194
   *  Free on thread exit
195
   */
196
  fr_atexit_thread_local(fr_pcre2_tls, _pcre2_tls_free_on_exit, tls);
197
  fr_pcre2_tls = tls; /* Assign to thread local storage */
198
199
  return 0;
200
}
201
202
/** Free regex_t structure
203
 *
204
 * Calls libpcre specific free functions for the expression and study.
205
 *
206
 * @param preg to free.
207
 */
208
static int _regex_free(regex_t *preg)
209
{
210
  if (preg->compiled) pcre2_code_free(preg->compiled);
211
212
  return 0;
213
}
214
215
/** Wrapper around pcre2_compile
216
 *
217
 * Allows the rest of the code to do compilations using one function signature.
218
 *
219
 * @note Compiled expression must be freed with talloc_free.
220
 *
221
 * @param[out] out    Where to write out a pointer to the structure containing
222
 *        the compiled expression.
223
 * @param[in] pattern   to compile.
224
 * @param[in] len   of pattern.
225
 * @param[in] flags   controlling matching. May be NULL.
226
 * @param[in] subcaptures Whether to compile the regular expression to store subcapture
227
 *        data.
228
 * @param[in] runtime   If false run the pattern through the PCRE JIT (if available)
229
 *        to convert it to machine code. This trades startup time (longer)
230
 *        for runtime performance (better).
231
 * @return
232
 *  - >= 1 on success.
233
 *  - <= 0 on error. Negative value is offset of parse error.
234
 */
235
ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len,
236
          fr_regex_flags_t const *flags, bool subcaptures, bool runtime)
237
{
238
  int   ret;
239
  PCRE2_SIZE  offset;
240
  uint32_t  cflags = 0;
241
  regex_t   *preg;
242
243
  /*
244
   *  Check inputs
245
   */
246
  *out = NULL;
247
248
  /*
249
   *  Thread local initialisation
250
   */
251
  if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1;
252
253
  if (len == 0) {
254
    fr_strerror_const("Empty expression");
255
    return 0;
256
  }
257
258
  /*
259
   *  Options
260
   */
261
  if (flags) {
262
     /* flags->global implemented by substitution function */
263
    if (flags->ignore_case) cflags |= PCRE2_CASELESS;
264
    if (flags->multiline) cflags |= PCRE2_MULTILINE;
265
    if (flags->dot_all) cflags |= PCRE2_DOTALL;
266
    if (flags->unicode) cflags |= PCRE2_UTF;
267
    if (flags->extended) cflags |= PCRE2_EXTENDED;
268
  }
269
270
  if (!subcaptures) cflags |= PCRE2_NO_AUTO_CAPTURE;
271
272
  preg = talloc_zero(ctx, regex_t);
273
  talloc_set_destructor(preg, _regex_free);
274
275
  preg->compiled = pcre2_compile((PCRE2_SPTR8)pattern, len,
276
               cflags, &ret, &offset, fr_pcre2_tls->ccontext);
277
  if (!preg->compiled) {
278
    PCRE2_UCHAR errbuff[128];
279
280
    pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
281
    fr_strerror_printf("%s", (char *)errbuff);
282
    talloc_free(preg);
283
284
    return -(ssize_t)offset;
285
  }
286
287
  if (!runtime) {
288
    preg->precompiled = true;
289
290
#ifdef PCRE2_CONFIG_JIT
291
    /*
292
     *  This is expensive, so only do it for
293
     *  expressions that are going to be
294
     *  evaluated repeatedly.
295
     */
296
    if (fr_pcre2_tls->do_jit) {
297
      ret = pcre2_jit_compile(preg->compiled, PCRE2_JIT_COMPLETE);
298
      if (ret < 0) {
299
        PCRE2_UCHAR errbuff[128];
300
301
        pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
302
        fr_strerror_printf("Pattern JIT failed: %s", (char *)errbuff);
303
        talloc_free(preg);
304
305
        return 0;
306
      }
307
      preg->jitd = true;
308
    }
309
#endif
310
  }
311
312
  *out = preg;
313
314
  return len;
315
}
316
317
/** Wrapper around pcre2_exec
318
 *
319
 * @param[in] preg  The compiled expression.
320
 * @param[in] subject to match.
321
 * @param[in] len Length of subject.
322
 * @param[in] regmatch  Array of match pointers.
323
 * @return
324
 *  - -1 on failure.
325
 *  - 0 on no match.
326
 *  - 1 on match.
327
 */
328
int regex_exec(regex_t *preg, char const *subject, size_t len, fr_regmatch_t *regmatch)
329
{
330
  int     ret;
331
  uint32_t    options = 0;
332
333
  char      *our_subject = NULL;
334
  bool      dup_subject = true;
335
  pcre2_match_data  *match_data;
336
337
  /*
338
   *  Thread local initialisation
339
   */
340
  if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1;
341
342
  if (regmatch) {
343
#ifdef PCRE2_COPY_MATCHED_SUBJECT
344
    /*
345
     *  This is apparently only supported for pcre2_match
346
     *  NOT pcre2_jit_match.
347
     */
348
#  ifdef PCRE2_CONFIG_JIT
349
    if (!preg->jitd) {
350
#  endif
351
      dup_subject = false;
352
353
      /*
354
       *  If PCRE2_COPY_MATCHED_SUBJECT is available
355
       *  and set as an options flag, pcre2_match will
356
       *  strdup the subject string if pcre2_match is
357
       *  successful and store a pointer to it in the
358
       *  regmatch struct.
359
       *
360
       *  The lifetime of the string memory will be
361
       *  bound to the regmatch struct.  This is more
362
       *  efficient that doing it ourselves, as the
363
       *  strdup only occurs if the subject matches.
364
       */
365
      options |= PCRE2_COPY_MATCHED_SUBJECT;
366
#  ifdef PCRE2_CONFIG_JIT
367
    }
368
#  endif
369
#endif
370
    if (dup_subject) {
371
      /*
372
       *  We have to dup and operate on the duplicate
373
       *  of the subject, because pcre2_jit_match and
374
       *  pcre2_match store a pointer to the subject
375
       *  in the regmatch structure.
376
       */
377
      subject = our_subject = talloc_bstrndup(regmatch, subject, len);
378
      if (!subject) {
379
        fr_strerror_const("Out of memory");
380
        return -1;
381
      }
382
#ifndef NDEBUG
383
      regmatch->subject = subject; /* Stored only for tracking memory issues */
384
#endif
385
    }
386
  }
387
388
  /*
389
   *  If we weren't given match data we
390
   *  need to alloc it else pcre2_match
391
   *  fails when passed NULL match data.
392
   */
393
  if (!regmatch) {
394
    match_data = pcre2_match_data_create_from_pattern(preg->compiled, fr_pcre2_tls->gcontext);
395
    if (!match_data) {
396
      fr_strerror_const("Failed allocating temporary match data");
397
      return -1;
398
    }
399
  } else {
400
    match_data = regmatch->match_data;
401
  }
402
403
#ifdef PCRE2_CONFIG_JIT
404
  if (preg->jitd) {
405
    ret = pcre2_jit_match(preg->compiled, (PCRE2_SPTR8)subject, len, 0, options,
406
              match_data, fr_pcre2_tls->mcontext);
407
  } else
408
#endif
409
  {
410
    ret = pcre2_match(preg->compiled, (PCRE2_SPTR8)subject, len, 0, options,
411
          match_data, fr_pcre2_tls->mcontext);
412
  }
413
  if (!regmatch) pcre2_match_data_free(match_data);
414
  if (ret < 0) {
415
    PCRE2_UCHAR errbuff[128];
416
417
    if (dup_subject) talloc_free(our_subject);
418
419
    if (ret == PCRE2_ERROR_NOMATCH) {
420
      if (regmatch) regmatch->used = 0;
421
      return 0;
422
    }
423
424
    pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
425
    fr_strerror_printf("regex evaluation failed with code (%i): %s", ret, errbuff);
426
427
    return -1;
428
  }
429
430
  if (regmatch) regmatch->used = ret;
431
432
  return 1;
433
}
434
435
/** Wrapper around pcre2_substitute
436
 *
437
 * @param[in] ctx   to allocate output string in.
438
 * @param[out] out    Output string with replacements performed.
439
 * @param[in] max_out   Maximum length of output buffer.  If this is 0 then
440
 *        the output length is unlimited.
441
 * @param[in] preg    The compiled expression.
442
 * @param[in] flags   that affect matching.
443
 * @param[in] subject   to perform replacements on.
444
 * @param[in] subject_len the length of the subject.
445
 * @param[in] replacement replacement string containing substitution
446
 *        markers.
447
 * @param[in] replacement_len Length of the replacement string.
448
 * @param[in] regmatch    Array of match pointers.
449
 * @return
450
 *  - >= 0 the length of the output string.
451
 *  - < 0 on error.
452
 */
453
int regex_substitute(TALLOC_CTX *ctx, char **out, size_t max_out, regex_t *preg, fr_regex_flags_t const *flags,
454
         char const *subject, size_t subject_len,
455
         char const *replacement, size_t replacement_len,
456
         fr_regmatch_t *regmatch)
457
{
458
  int     ret;
459
  uint32_t    options = 0;
460
  size_t      buff_len, actual_len;
461
  char      *buff;
462
463
#ifndef PCRE2_COPY_MATCHED_SUBJECT
464
  char      *our_subject = NULL;
465
#endif
466
467
  /*
468
   *  Thread local initialisation
469
   */
470
  if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return -1;
471
472
  /*
473
   *  Internally pcre2_substitute just calls pcre2_match to
474
   *  generate the match data, so the same hack as the
475
   *  regex_exec function above is required.
476
   */
477
  if (regmatch) {
478
#ifndef PCRE2_COPY_MATCHED_SUBJECT
479
    /*
480
     *  We have to dup and operate on the duplicate
481
     *  of the subject, because pcre2_jit_match and
482
     *  pcre2_match store a pointer to the subject
483
     *  in the regmatch structure.
484
     */
485
    subject = our_subject = talloc_bstrndup(regmatch, subject, subject_len);
486
    if (!subject) {
487
      fr_strerror_const("Out of memory");
488
      return -1;
489
    }
490
#else
491
    /*
492
     *  If PCRE2_COPY_MATCHED_SUBJECT is available
493
     *  and set as an options flag, pcre2_match will
494
     *  strdup the subject string if pcre2_match is
495
     *  successful and store a pointer to it in the
496
     *  regmatch struct.
497
     *
498
     *  The lifetime of the string memory will be
499
     *  bound to the regmatch struct.  This is more
500
     *  efficient that doing it ourselves, as the
501
     *  strdup only occurs if the subject matches.
502
     */
503
    options |= PCRE2_COPY_MATCHED_SUBJECT;
504
#endif
505
  }
506
507
  /*
508
   *  Guess (badly) what the length of the output buffer should be
509
   */
510
  actual_len = buff_len = subject_len + 1;  /* +1 for the \0 */
511
  buff = talloc_array(ctx, char, buff_len);
512
  if (!buff) {
513
#ifndef PCRE2_COPY_MATCHED_SUBJECT
514
    talloc_free(our_subject);
515
#endif
516
    fr_strerror_const("Out of memory");
517
    return -1;
518
  }
519
520
  options |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
521
  if (flags->global) options |= PCRE2_SUBSTITUTE_GLOBAL;
522
523
again:
524
  /*
525
   *  actual_len input value should be the size of the
526
   *  buffer including space for '\0'.
527
   *  If input buffer is too small, then actual_len will be set
528
   *      to the buffer space needed including space for '\0'.
529
   *  If input buffer is the correct size, then actual_len
530
   *  will be set to the size of the string written to buff
531
   *  without the terminating '\0'.
532
   */
533
  ret = pcre2_substitute(preg->compiled,
534
             (PCRE2_SPTR8)subject, (PCRE2_SIZE)subject_len, 0,
535
             options, NULL, fr_pcre2_tls->mcontext,
536
             (PCRE2_UCHAR const *)replacement, replacement_len, (PCRE2_UCHAR *)buff, &actual_len);
537
538
  if (ret < 0) {
539
    PCRE2_UCHAR errbuff[128];
540
541
#ifndef PCRE2_COPY_MATCHED_SUBJECT
542
    talloc_free(our_subject);
543
#endif
544
    talloc_free(buff);
545
546
    if (ret == PCRE2_ERROR_NOMEMORY) {
547
      if ((max_out > 0) && (actual_len > max_out)) {
548
        fr_strerror_printf("String length with substitutions (%zu) "
549
                "exceeds max string length (%zu)", actual_len - 1, max_out - 1);
550
        return -1;
551
      }
552
553
      /*
554
       *  Check that actual_len != buff_len as that'd be
555
       *  an actual error.
556
       */
557
      if (actual_len == buff_len) {
558
        fr_strerror_const("libpcre2 out of memory");
559
        return -1;
560
      }
561
      buff_len = actual_len;  /* The length we get passed back includes the \0 */
562
      buff = talloc_array(ctx, char, buff_len);
563
      goto again;
564
    }
565
566
    if (ret == PCRE2_ERROR_NOMATCH) {
567
      if (regmatch) regmatch->used = 0;
568
      return 0;
569
    }
570
571
    pcre2_get_error_message(ret, errbuff, sizeof(errbuff));
572
    fr_strerror_printf("regex evaluation failed with code (%i): %s", ret, errbuff);
573
    return -1;
574
  }
575
576
  /*
577
   *  Trim the replacement buffer to the correct length
578
   *
579
   *  buff_len includes \0.
580
   *  ...and as pcre2_substitute just succeeded actual_len does not include \0.
581
   */
582
  if (actual_len < (buff_len - 1)) {
583
    buff = talloc_bstr_realloc(ctx, buff, actual_len);
584
    if (!buff) {
585
      fr_strerror_const("reallocing pcre2_substitute result buffer failed");
586
      return -1;
587
    }
588
  }
589
590
  if (regmatch) regmatch->used = ret;
591
  *out = buff;
592
593
  return 1;
594
}
595
596
597
/** Returns the number of subcapture groups
598
 *
599
 * @return
600
 *  - >0 The number of subcaptures contained within the pattern
601
 *  - 0 if the number of subcaptures can't be determined.
602
 */
603
uint32_t regex_subcapture_count(regex_t const *preg)
604
{
605
  uint32_t count;
606
607
  if (pcre2_pattern_info(preg->compiled, PCRE2_INFO_CAPTURECOUNT, &count) != 0) {
608
    fr_strerror_const("Error determining subcapture group count");
609
    return 0;
610
  }
611
612
  return count + 1;
613
}
614
615
/** Free libpcre2's matchdata
616
 *
617
 * @note Don't call directly, will be called if talloc_free is called on a #regmatch_t.
618
 */
619
static int _pcre2_match_data_free(fr_regmatch_t *regmatch)
620
{
621
  pcre2_match_data_free(regmatch->match_data);
622
  return 0;
623
}
624
625
/** Allocate vectors to fill with match data
626
 *
627
 * @param[in] ctx to allocate match vectors in.
628
 * @param[in] count The number of vectors to allocate.
629
 * @return
630
 *  - NULL on error.
631
 *  - Array of match vectors.
632
 */
633
fr_regmatch_t *regex_match_data_alloc(TALLOC_CTX *ctx, uint32_t count)
634
{
635
  fr_regmatch_t *regmatch;
636
637
  /*
638
   *  Thread local initialisation
639
   */
640
  if (unlikely(!fr_pcre2_tls) && (fr_pcre2_tls_init() < 0)) return NULL;
641
642
  regmatch = talloc(ctx, fr_regmatch_t);
643
  if (!regmatch) {
644
  oom:
645
    fr_strerror_const("Out of memory");
646
    return NULL;
647
  }
648
649
  regmatch->match_data = pcre2_match_data_create(count, fr_pcre2_tls->gcontext);
650
  if (!regmatch->match_data) {
651
    talloc_free(regmatch);
652
    goto oom;
653
  }
654
  talloc_set_type(regmatch->match_data, pcre2_match_data);
655
656
  talloc_set_destructor(regmatch, _pcre2_match_data_free);
657
658
  return regmatch;
659
}
660
661
/*
662
 *######################################
663
 *#    FUNCTIONS FOR POSIX-REGEX      #
664
 *######################################
665
 */
666
#  else
667
/*
668
 *  Wrapper functions for POSIX like, and extended regular
669
 *  expressions.  These use the system regex library.
670
 */
671
672
/** Free heap allocated regex_t structure
673
 *
674
 * Heap allocation of regex_t is needed so regex_compile has the same signature with
675
 * POSIX or libpcre.
676
 *
677
 * @param preg to free.
678
 */
679
static int _regex_free(regex_t *preg)
680
0
{
681
0
  regfree(preg);
682
683
0
  return 0;
684
0
}
685
686
/** Binary safe wrapper around regcomp
687
 *
688
 * If we have the BSD extensions we don't need to do any special work
689
 * if we don't have the BSD extensions we need to check to see if the
690
 * regular expression contains any \0 bytes.
691
 *
692
 * If it does we fail and print the appropriate error message.
693
 *
694
 * @note Compiled expression must be freed with talloc_free.
695
 *
696
 * @param[in] ctx   To allocate memory in.
697
 * @param[out] out    Where to write out a pointer
698
 *        to the structure containing the compiled expression.
699
 * @param[in] pattern   to compile.
700
 * @param[in] len   of pattern.
701
 * @param[in] flags   controlling matching.  May be NULL.
702
 * @param[in] subcaptures Whether to compile the regular expression
703
 *        to store subcapture data.
704
 * @param[in] runtime   Whether the compilation is being done at runtime.
705
 * @return
706
 *  - >= 1 on success.
707
 *  - <= 0 on error. Negative value is offset of parse error.
708
 *  With POSIX regex we only give the correct offset for embedded \0 errors.
709
 */
710
ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len,
711
          fr_regex_flags_t const *flags, bool subcaptures, UNUSED bool runtime)
712
0
{
713
0
  int ret;
714
0
  int cflags = REG_EXTENDED;
715
0
  regex_t *preg;
716
717
0
  if (len == 0) {
718
0
    fr_strerror_const("Empty expression");
719
0
    return 0;
720
0
  }
721
722
  /*
723
   *  Options
724
   */
725
0
  if (flags) {
726
0
    if (flags->global) {
727
0
      fr_strerror_const("g - Global matching/substitution not supported with posix-regex");
728
0
      return 0;
729
0
    }
730
0
    if (flags->dot_all) {
731
0
      fr_strerror_const("s - Single line matching is not supported with posix-regex");
732
0
      return 0;
733
0
    }
734
0
    if (flags->unicode) {
735
0
      fr_strerror_const("u - Unicode matching not supported with posix-regex");
736
0
      return 0;
737
0
    }
738
0
    if (flags->extended) {
739
0
      fr_strerror_const("x - Whitespace and comments not supported with posix-regex");
740
0
      return 0;
741
0
    }
742
743
0
    if (flags->ignore_case) cflags |= REG_ICASE;
744
0
    if (flags->multiline) cflags |= REG_NEWLINE;
745
0
  }
746
747
748
0
  if (!subcaptures) cflags |= REG_NOSUB;
749
750
0
#ifndef HAVE_REGNCOMP
751
0
  {
752
0
    char const *p;
753
754
0
    p = pattern;
755
0
    p += strlen(pattern);
756
757
0
    if ((size_t)(p - pattern) != len) {
758
0
      fr_strerror_printf("Found null in pattern at offset %zu.  Pattern unsafe for compilation",
759
0
             (p - pattern));
760
0
      return -(p - pattern);
761
0
    }
762
763
0
    preg = talloc_zero(ctx, regex_t);
764
0
    if (!preg) return 0;
765
766
0
    ret = regcomp(preg, pattern, cflags);
767
0
  }
768
#else
769
  preg = talloc_zero(ctx, regex_t);
770
  if (!preg) return 0;
771
  ret = regncomp(preg, pattern, len, cflags);
772
#endif
773
0
  if (ret != 0) {
774
0
    char errbuf[128];
775
776
0
    regerror(ret, preg, errbuf, sizeof(errbuf));
777
0
    fr_strerror_printf("%s", errbuf);
778
779
0
    talloc_free(preg);
780
781
0
    return 0; /* POSIX expressions don't give us the failure offset */
782
0
  }
783
784
0
  talloc_set_destructor(preg, _regex_free);
785
0
  *out = preg;
786
787
0
  return len;
788
0
}
789
790
/** Binary safe wrapper around regexec
791
 *
792
 * If we have the BSD extensions we don't need to do any special work
793
 * If we don't have the BSD extensions we need to check to see if the
794
 * value to be compared contains any \0 bytes.
795
 *
796
 * If it does, we fail and print the appropriate error message.
797
 *
798
 * @param[in] preg  The compiled expression.
799
 * @param[in] subject to match.
800
 * @param[in] regmatch  Match result structure.
801
 * @return
802
 *  - -1 on failure.
803
 *  - 0 on no match.
804
 *  - 1 on match.
805
 */
806
int regex_exec(regex_t *preg, char const *subject, size_t len, fr_regmatch_t *regmatch)
807
0
{
808
0
  int ret;
809
0
  size_t  matches;
810
811
  /*
812
   *  Disable capturing
813
   */
814
0
  if (!regmatch) {
815
0
    matches = 0;
816
0
  } else {
817
0
    matches = regmatch->allocd;
818
819
    /*
820
     *  Reset the match result structure
821
     */
822
0
    memset(regmatch->match_data, 0, sizeof(regmatch->match_data[0]) * matches);
823
0
    regmatch->used = 0;
824
0
  }
825
826
0
#ifndef HAVE_REGNEXEC
827
0
  {
828
0
    char const *p;
829
830
0
    p = subject;
831
0
    p += strlen(subject);
832
833
0
    if ((size_t)(p - subject) != len) {
834
0
      fr_strerror_printf("Found null in subject at offset %zu.  String unsafe for evaluation",
835
0
             (p - subject));
836
0
      if (regmatch) regmatch->used = 0;
837
0
      return -1;
838
0
    }
839
0
    ret = regexec(preg, subject, matches, regmatch ? regmatch->match_data : NULL, 0);
840
0
  }
841
#else
842
  ret = regnexec(preg, subject, len, matches, regmatch ? regmatch->match_data : NULL, 0);
843
#endif
844
0
  if (ret != 0) {
845
0
    if (ret != REG_NOMATCH) {
846
0
      char errbuf[128];
847
848
0
      regerror(ret, preg, errbuf, sizeof(errbuf));
849
850
0
      fr_strerror_printf("regex evaluation failed: %s", errbuf);
851
0
      return -1;
852
0
    }
853
0
    return 0;
854
0
  }
855
856
  /*
857
   *  Update regmatch->count to be the maximum number of
858
   *  groups that *could* have been populated as we don't
859
   *  have the number of matches.
860
   */
861
0
  if (regmatch) {
862
0
    regmatch->used = preg->re_nsub + 1;
863
864
0
    if (regmatch->subject) talloc_const_free(regmatch->subject);
865
0
    regmatch->subject = talloc_bstrndup(regmatch, subject, len);
866
0
    if (!regmatch->subject) {
867
0
      fr_strerror_const("Out of memory");
868
0
      return -1;
869
0
    }
870
0
  }
871
0
  return 1;
872
0
}
873
874
/** Returns the number of subcapture groups
875
 *
876
 * @return
877
 *  - 0 we can't determine this for POSIX regular expressions.
878
 */
879
uint32_t regex_subcapture_count(UNUSED regex_t const *preg)
880
0
{
881
0
  return 0;
882
0
}
883
#  endif
884
885
#  if defined(HAVE_REGEX_POSIX)
886
/** Allocate vectors to fill with match data
887
 *
888
 * @param[in] ctx to allocate match vectors in.
889
 * @param[in] count The number of vectors to allocate.
890
 * @return
891
 *  - NULL on error.
892
 *  - Array of match vectors.
893
 */
894
fr_regmatch_t *regex_match_data_alloc(TALLOC_CTX *ctx, uint32_t count)
895
0
{
896
0
  fr_regmatch_t *regmatch;
897
898
  /*
899
   *  Pre-allocate space for the match structure
900
   *  and for a 128b subject string.
901
   */
902
0
  regmatch = talloc_zero_pooled_object(ctx, fr_regmatch_t, 2, (sizeof(regmatch_t) * count) + 128);
903
0
  if (unlikely(!regmatch)) {
904
0
  error:
905
0
    fr_strerror_const("Out of memory");
906
0
    talloc_free(regmatch);
907
0
    return NULL;
908
0
  }
909
0
  regmatch->match_data = talloc_array(regmatch, regmatch_t, count);
910
0
  if (unlikely(!regmatch->match_data)) goto error;
911
912
0
  regmatch->allocd = count;
913
0
  regmatch->used = 0;
914
0
  regmatch->subject = NULL;
915
916
0
  return regmatch;
917
0
}
918
#  endif
919
920
/*
921
 *########################################
922
 *#         UNIVERSAL FUNCTIONS          #
923
 *########################################
924
 */
925
926
/** Parse a string containing one or more regex flags
927
 *
928
 * @param[out] err    May be NULL. If not NULL will be set to:
929
 *        - 0 on success.
930
 *        - -1 on unknown flag.
931
 *        - -2 on duplicate.
932
 * @param[out] out    Flag structure to populate.  Must be initialised to zero
933
 *        if this is the first call to regex_flags_parse.
934
 * @param[in] in    Flag string to parse.
935
 * @param[in] terminals   Terminal characters. If parsing ends before the buffer
936
 *        is exhausted, and is pointing to one of these chars
937
 *        it's not considered an error.
938
 * @param[in] err_on_dup  Error if the flag is already set.
939
 * @return
940
 *      - > 0 on success.  The number of flag bytes parsed.
941
 *  - <= 0 on failure.  Negative offset of first unrecognised flag.
942
 */
943
fr_slen_t regex_flags_parse(int *err, fr_regex_flags_t *out, fr_sbuff_t *in,
944
          fr_sbuff_term_t const *terminals, bool err_on_dup)
945
0
{
946
0
  fr_sbuff_t  our_in = FR_SBUFF(in);
947
948
0
  if (err) *err = 0;
949
950
0
  while (fr_sbuff_extend(&our_in)) {
951
0
    switch (*our_in.p) {
952
0
#define DO_REGEX_FLAG(_f, _c) \
953
0
    case _c: \
954
0
      if (err_on_dup && out->_f) { \
955
0
        fr_strerror_printf("Duplicate regex flag '%c'", *our_in.p); \
956
0
        if (err) *err = -2; \
957
0
        FR_SBUFF_ERROR_RETURN(&our_in); \
958
0
      } \
959
0
      out->_f = 1; \
960
0
      break
961
962
0
    DO_REGEX_FLAG(global, 'g');
963
0
    DO_REGEX_FLAG(ignore_case, 'i');
964
0
    DO_REGEX_FLAG(multiline, 'm');
965
0
    DO_REGEX_FLAG(dot_all, 's');
966
0
    DO_REGEX_FLAG(unicode, 'u');
967
0
    DO_REGEX_FLAG(extended, 'x');
968
0
#undef DO_REGEX_FLAG
969
970
0
    default:
971
0
      if (fr_sbuff_is_terminal(&our_in, terminals)) FR_SBUFF_SET_RETURN(in, &our_in);
972
973
0
      fr_strerror_printf("Unsupported regex flag '%c'", *our_in.p);
974
0
      if (err) *err = -1;
975
0
      FR_SBUFF_ERROR_RETURN(&our_in);
976
0
    }
977
0
    fr_sbuff_advance(&our_in, 1);
978
0
  }
979
0
  FR_SBUFF_SET_RETURN(in, &our_in);
980
0
}
981
982
/** Print the flags
983
 *
984
 * @param[out] sbuff  where to write flags.
985
 * @param[in] flags to print.
986
 * @return
987
 *  - The number of bytes written to the out buffer.
988
 *  - A number >= outlen if truncation has occurred.
989
 */
990
ssize_t regex_flags_print(fr_sbuff_t *sbuff, fr_regex_flags_t const *flags)
991
0
{
992
0
  fr_sbuff_t our_sbuff = FR_SBUFF(sbuff);
993
994
0
#define DO_REGEX_FLAG(_f, _c) \
995
0
  if (flags->_f) FR_SBUFF_IN_CHAR_RETURN(&our_sbuff, _c)
996
997
0
  DO_REGEX_FLAG(global, 'g');
998
0
  DO_REGEX_FLAG(ignore_case, 'i');
999
0
  DO_REGEX_FLAG(multiline, 'm');
1000
0
  DO_REGEX_FLAG(dot_all, 's');
1001
0
  DO_REGEX_FLAG(unicode, 'u');
1002
0
  DO_REGEX_FLAG(extended, 'x');
1003
0
#undef DO_REGEX_FLAG
1004
1005
0
  FR_SBUFF_SET_RETURN(sbuff, &our_sbuff);
1006
0
}
1007
#endif
1008
1009
/** Compare two boxes using an operator
1010
 *
1011
 *  @todo - allow /foo/i on the RHS
1012
 *
1013
 *  However, this involves allocating intermediate sbuffs for the
1014
 *  unescaped RHS, and all kinds of extra work.  It's not overly hard,
1015
 *  but it's something we wish to avoid for now.
1016
 *
1017
 * @param[in] op to use in comparison. MUST be T_OP_REG_EQ or T_OP_REG_NE
1018
 * @param[in] a Value to compare,  MUST be FR_TYPE_STRING
1019
 * @param[in] b uncompiled regex as FR_TYPE_STRING
1020
 * @return
1021
 *  - 1 if true
1022
 *  - 0 if false
1023
 *  - -1 on failure.
1024
 */
1025
int fr_regex_cmp_op(fr_token_t op, fr_value_box_t const *a, fr_value_box_t const *b)
1026
0
{
1027
0
  int rcode;
1028
0
  TALLOC_CTX *ctx = NULL;
1029
0
  size_t lhs_len;
1030
0
  char const *lhs;
1031
0
  regex_t *regex = NULL;
1032
1033
0
  if (!((op == T_OP_REG_EQ) || (op == T_OP_REG_NE))) {
1034
0
    fr_strerror_const("Invalid operator for regex comparison");
1035
0
    return -1;
1036
0
  }
1037
1038
0
  if (b->type != FR_TYPE_STRING) {
1039
0
    fr_strerror_const("RHS must be regular expression");
1040
0
    return -1;
1041
0
  }
1042
1043
0
  ctx = talloc_init_const("regex_cmp_op");
1044
0
  if (!ctx) return -1;
1045
1046
0
  if ((a->type != FR_TYPE_STRING) && (a->type != FR_TYPE_OCTETS)) {
1047
0
    fr_slen_t slen;
1048
0
    char *p;
1049
1050
0
    slen = fr_value_box_aprint(ctx, &p, a, NULL); /* no escaping */
1051
0
    if (slen < 0) return slen;
1052
1053
0
    lhs = p;
1054
0
    lhs_len = slen;
1055
1056
0
  } else {
1057
0
    lhs = a->vb_strvalue;
1058
0
    lhs_len = a->vb_length;
1059
0
  }
1060
1061
0
  if (regex_compile(ctx, &regex, b->vb_strvalue, b->vb_length, NULL, false, true) < 0) {
1062
0
    talloc_free(ctx);
1063
0
    return -1;
1064
0
  }
1065
1066
#ifdef STATIC_ANALYZER
1067
  if (!regex) {
1068
    talloc_free(ctx);
1069
    return -1;
1070
  }
1071
#endif
1072
1073
0
  rcode = regex_exec(regex, lhs, lhs_len, NULL);
1074
0
  talloc_free(ctx);
1075
0
  if (rcode < 0) return rcode;
1076
1077
  /*
1078
   *  Invert the sense of the rcode for !~
1079
   */
1080
0
  if (op == T_OP_REG_NE) rcode = (rcode == 0);
1081
1082
0
  return rcode;
1083
0
}