Coverage Report

Created: 2025-07-11 06:24

/src/selinux/libselinux/src/regex.c
Line
Count
Source (jump to first uncovered line)
1
#include <assert.h>
2
#include <endian.h>
3
#include <pthread.h>
4
#include <stdint.h>
5
#include <stdio.h>
6
#include <string.h>
7
8
#include "regex.h"
9
#include "label_file.h"
10
#include "selinux_internal.h"
11
12
#ifdef USE_PCRE2
13
#define REGEX_ARCH_SIZE_T PCRE2_SIZE
14
#else
15
#define REGEX_ARCH_SIZE_T size_t
16
#endif
17
18
#ifndef __BYTE_ORDER__
19
20
/* If the compiler doesn't define __BYTE_ORDER__, try to use the C
21
 * library <endian.h> header definitions. */
22
#ifndef __BYTE_ORDER
23
#error Neither __BYTE_ORDER__ nor __BYTE_ORDER defined. Unable to determine endianness.
24
#endif
25
26
#define __ORDER_LITTLE_ENDIAN __LITTLE_ENDIAN
27
#define __ORDER_BIG_ENDIAN __BIG_ENDIAN
28
#define __BYTE_ORDER__ __BYTE_ORDER
29
30
#endif
31
32
#ifdef USE_PCRE2
33
static pthread_once_t once = PTHREAD_ONCE_INIT;
34
static char arch_string_buffer[32];
35
36
static void regex_arch_string_init(void)
37
1
{
38
1
  char const *endianness;
39
1
  int rc;
40
41
1
  if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
42
1
    endianness = "el";
43
0
  else if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
44
0
    endianness = "eb";
45
0
  else {
46
0
    arch_string_buffer[0] = '\0';
47
0
    return;
48
0
  }
49
50
1
  rc = snprintf(arch_string_buffer, sizeof(arch_string_buffer),
51
1
      "%zu-%zu-%s", sizeof(void *),
52
1
      sizeof(REGEX_ARCH_SIZE_T),
53
1
      endianness);
54
1
  if (rc < 0 || (size_t)rc >= sizeof(arch_string_buffer)) {
55
0
    arch_string_buffer[0] = '\0';
56
0
    return;
57
0
  }
58
1
}
59
60
const char *regex_arch_string(void)
61
5.03k
{
62
5.03k
  __selinux_once(once, regex_arch_string_init);
63
64
5.03k
  return arch_string_buffer[0] != '\0' ? arch_string_buffer : NULL;
65
5.03k
}
66
67
struct regex_data {
68
  pcre2_code *regex; /* compiled regular expression */
69
#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
70
  /*
71
   * match data block required for the compiled
72
   * pattern in pcre2
73
   */
74
  pcre2_match_data *match_data;
75
#endif
76
  pthread_mutex_t match_mutex;
77
};
78
79
int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
80
           struct regex_error_data *errordata)
81
1.37M
{
82
1.37M
  memset(errordata, 0, sizeof(struct regex_error_data));
83
84
1.37M
  *regex = regex_data_create();
85
1.37M
  if (!(*regex))
86
0
    return -1;
87
88
1.37M
  (*regex)->regex = pcre2_compile(
89
1.37M
      (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL,
90
1.37M
      &errordata->error_code, &errordata->error_offset, NULL);
91
1.37M
  if (!(*regex)->regex) {
92
144
    goto err;
93
144
  }
94
95
1.37M
#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
96
1.37M
  (*regex)->match_data =
97
1.37M
      pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
98
1.37M
  if (!(*regex)->match_data) {
99
0
    goto err;
100
0
  }
101
1.37M
#endif
102
1.37M
  return 0;
103
104
144
err:
105
144
  regex_data_free(*regex);
106
144
  *regex = NULL;
107
144
  return -1;
108
1.37M
}
109
110
char const *regex_version(void)
111
5.03k
{
112
5.03k
  static char version_buf[256];
113
5.03k
  size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
114
5.03k
  if (len <= 0 || len > sizeof(version_buf))
115
0
    return NULL;
116
117
5.03k
  pcre2_config(PCRE2_CONFIG_VERSION, version_buf);
118
5.03k
  return version_buf;
119
5.03k
}
120
121
int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
122
        int do_load_precompregex, bool *regex_compiled)
123
6.31k
{
124
6.31k
  int rc;
125
6.31k
  uint32_t data_u32, entry_len;
126
127
6.31k
  *regex_compiled = false;
128
6.31k
  rc = next_entry(&data_u32, mmap_area, sizeof(uint32_t));
129
6.31k
  if (rc < 0)
130
16
    return -1;
131
132
6.29k
  entry_len = be32toh(data_u32);
133
134
6.29k
  if (entry_len && do_load_precompregex) {
135
    /*
136
     * this should yield exactly one because we store one pattern at
137
     * a time
138
     */
139
0
    rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
140
0
    if (rc != 1)
141
0
      return -1;
142
143
0
    *regex = regex_data_create();
144
0
    if (!*regex)
145
0
      return -1;
146
147
0
    rc = pcre2_serialize_decode(&(*regex)->regex, 1,
148
0
              (PCRE2_SPTR)mmap_area->next_addr,
149
0
              NULL);
150
0
    if (rc != 1)
151
0
      goto err;
152
153
0
#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
154
0
    (*regex)->match_data =
155
0
        pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
156
0
    if (!(*regex)->match_data)
157
0
      goto err;
158
0
#endif
159
160
0
    *regex_compiled = true;
161
0
  }
162
163
  /* and skip the decoded bit */
164
6.29k
  rc = next_entry(NULL, mmap_area, entry_len);
165
6.29k
  if (rc < 0)
166
110
    goto err;
167
168
6.18k
  return 0;
169
110
err:
170
110
  regex_data_free(*regex);
171
110
  *regex = NULL;
172
110
  return -1;
173
6.29k
}
174
175
int regex_writef(struct regex_data *regex, FILE *fp, int do_write_precompregex)
176
0
{
177
0
  int rc = 0;
178
0
  size_t len;
179
0
  PCRE2_SIZE serialized_size;
180
0
  uint32_t to_write = 0, data_u32;
181
0
  PCRE2_UCHAR *bytes = NULL;
182
183
0
  if (do_write_precompregex) {
184
    /* encode the pattern for serialization */
185
0
    rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex,
186
0
              1, &bytes, &serialized_size, NULL);
187
0
    if (rc != 1 || serialized_size >= UINT32_MAX) {
188
0
      rc = -3;
189
0
      goto out;
190
0
    }
191
0
    to_write = serialized_size;
192
0
  }
193
194
  /* write serialized pattern's size */
195
0
  data_u32 = htobe32(to_write);
196
0
  len = fwrite(&data_u32, sizeof(uint32_t), 1, fp);
197
0
  if (len != 1) {
198
0
    rc = -1;
199
0
    goto out;
200
0
  }
201
202
0
  if (do_write_precompregex) {
203
    /* write serialized pattern */
204
0
    len = fwrite(bytes, 1, to_write, fp);
205
0
    if (len != to_write)
206
0
      rc = -1;
207
0
  }
208
209
0
out:
210
0
  if (bytes)
211
0
    pcre2_serialize_free(bytes);
212
213
0
  return rc;
214
0
}
215
216
void regex_data_free(struct regex_data *regex)
217
1.73M
{
218
1.73M
  if (regex) {
219
1.37M
    if (regex->regex)
220
1.37M
      pcre2_code_free(regex->regex);
221
222
1.37M
#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
223
1.37M
    if (regex->match_data)
224
1.37M
      pcre2_match_data_free(regex->match_data);
225
1.37M
#endif
226
227
1.37M
    __pthread_mutex_destroy(&regex->match_mutex);
228
1.37M
    free(regex);
229
1.37M
  }
230
1.73M
}
231
232
int regex_match(struct regex_data *regex, char const *subject, int partial)
233
660k
{
234
660k
  int rc;
235
660k
  pcre2_match_data *match_data;
236
660k
  __pthread_mutex_lock(&regex->match_mutex);
237
238
#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
239
  match_data = pcre2_match_data_create_from_pattern(
240
      regex->regex, NULL);
241
  if (match_data == NULL) {
242
    __pthread_mutex_unlock(&regex->match_mutex);
243
    return REGEX_ERROR;
244
  }
245
#else
246
660k
  match_data = regex->match_data;
247
660k
#endif
248
249
660k
  rc = pcre2_match(
250
660k
      regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
251
660k
      partial ? PCRE2_PARTIAL_SOFT : 0, match_data, NULL);
252
253
#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
254
  // pcre2_match allocates heap and it won't be freed until
255
  // pcre2_match_data_free, resulting in heap overhead.
256
  pcre2_match_data_free(match_data);
257
#endif
258
259
660k
  __pthread_mutex_unlock(&regex->match_mutex);
260
660k
  if (rc > 0)
261
455k
    return REGEX_MATCH;
262
204k
  switch (rc) {
263
485
  case PCRE2_ERROR_PARTIAL:
264
485
    return REGEX_MATCH_PARTIAL;
265
204k
  case PCRE2_ERROR_NOMATCH:
266
204k
    return REGEX_NO_MATCH;
267
13
  default:
268
13
    return REGEX_ERROR;
269
204k
  }
270
204k
}
271
272
/*
273
 * TODO Replace this compare function with something that actually compares the
274
 * regular expressions.
275
 * This compare function basically just compares the binary representations of
276
 * the automatons, and because this representation contains pointers and
277
 * metadata, it can only return a match if regex1 == regex2.
278
 * Preferably, this function would be replaced with an algorithm that computes
279
 * the equivalence of the automatons systematically.
280
 */
281
int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
282
0
{
283
0
  int rc;
284
0
  size_t len1, len2;
285
0
  rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
286
0
  assert(rc == 0);
287
0
  rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
288
0
  assert(rc == 0);
289
0
  if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
290
0
    return SELABEL_INCOMPARABLE;
291
292
0
  return SELABEL_EQUAL;
293
0
}
294
295
struct regex_data *regex_data_create(void)
296
1.37M
{
297
1.37M
  struct regex_data *regex_data =
298
1.37M
    (struct regex_data *)calloc(1, sizeof(struct regex_data));
299
1.37M
  if (!regex_data)
300
0
    return NULL;
301
302
1.37M
  __pthread_mutex_init(&regex_data->match_mutex, NULL);
303
1.37M
  return regex_data;
304
1.37M
}
305
306
#else // !USE_PCRE2
307
char const *regex_arch_string(void)
308
{
309
  return "N/A";
310
}
311
312
/* Prior to version 8.20, libpcre did not have pcre_free_study() */
313
#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
314
#define pcre_free_study pcre_free
315
#endif
316
317
struct regex_data {
318
  int owned;   /*
319
          * non zero if regex and pcre_extra is owned by this
320
          * structure and thus must be freed on destruction.
321
          */
322
  pcre *regex; /* compiled regular expression */
323
  union {
324
    pcre_extra *sd; /* pointer to extra compiled stuff */
325
    pcre_extra lsd; /* used to hold the mmap'd version */
326
  };
327
};
328
329
int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
330
           struct regex_error_data *errordata)
331
{
332
  memset(errordata, 0, sizeof(struct regex_error_data));
333
334
  *regex = regex_data_create();
335
  if (!(*regex))
336
    return -1;
337
338
  (*regex)->regex =
339
      pcre_compile(pattern_string, PCRE_DOTALL, &errordata->error_buffer,
340
       &errordata->error_offset, NULL);
341
  if (!(*regex)->regex)
342
    goto err;
343
344
  (*regex)->owned = 1;
345
346
  (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
347
  if (!(*regex)->sd && errordata->error_buffer)
348
    goto err;
349
350
  return 0;
351
352
err:
353
  regex_data_free(*regex);
354
  *regex = NULL;
355
  return -1;
356
}
357
358
char const *regex_version(void)
359
{
360
  return pcre_version();
361
}
362
363
int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
364
        int do_load_precompregex __attribute__((unused)), bool *regex_compiled)
365
{
366
  int rc;
367
  uint32_t data_u32, entry_len;
368
  size_t info_len;
369
370
  rc = next_entry(&data_u32, mmap_area, sizeof(uint32_t));
371
  if (rc < 0)
372
    return -1;
373
374
  entry_len = be32toh(data_u32);
375
  if (!entry_len)
376
    return -1;
377
378
  *regex = regex_data_create();
379
  if (!(*regex))
380
    return -1;
381
382
  (*regex)->owned = 0;
383
  (*regex)->regex = (pcre *)mmap_area->next_addr;
384
  rc = next_entry(NULL, mmap_area, entry_len);
385
  if (rc < 0)
386
    goto err;
387
388
  /*
389
   * Check that regex lengths match. pcre_fullinfo()
390
   * also validates its magic number.
391
   */
392
  rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
393
  if (rc < 0 || info_len != entry_len)
394
    goto err;
395
396
  rc = next_entry(&data_u32, mmap_area, sizeof(uint32_t));
397
  if (rc < 0)
398
    goto err;
399
400
  entry_len = be32toh(data_u32);
401
402
  if (entry_len) {
403
    (*regex)->lsd.study_data = (void *)mmap_area->next_addr;
404
    (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
405
    rc = next_entry(NULL, mmap_area, entry_len);
406
    if (rc < 0)
407
      goto err;
408
409
    /* Check that study data lengths match. */
410
    rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
411
           PCRE_INFO_STUDYSIZE, &info_len);
412
    if (rc < 0 || info_len != entry_len)
413
      goto err;
414
  }
415
416
  *regex_compiled = true;
417
  return 0;
418
419
err:
420
  regex_data_free(*regex);
421
  *regex = NULL;
422
  return -1;
423
}
424
425
static inline pcre_extra *get_pcre_extra(struct regex_data *regex)
426
{
427
  if (!regex) return NULL;
428
  if (regex->owned) {
429
    return regex->sd;
430
  } else if (regex->lsd.study_data) {
431
    return &regex->lsd;
432
  } else {
433
    return NULL;
434
  }
435
}
436
437
int regex_writef(struct regex_data *regex, FILE *fp,
438
     int do_write_precompregex __attribute__((unused)))
439
{
440
  int rc;
441
  size_t len;
442
  uint32_t data_u32;
443
  size_t size;
444
  pcre_extra *sd = get_pcre_extra(regex);
445
446
  /* determine the size of the pcre data in bytes */
447
  rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
448
  if (rc < 0 || size >= UINT32_MAX)
449
    return -3;
450
451
  /* write the number of bytes in the pcre data */
452
  data_u32 = htobe32(size);
453
  len = fwrite(&data_u32, sizeof(uint32_t), 1, fp);
454
  if (len != 1)
455
    return -1;
456
457
  /* write the actual pcre data as a char array */
458
  len = fwrite(regex->regex, 1, size, fp);
459
  if (len != size)
460
    return -1;
461
462
  if (sd) {
463
    /* determine the size of the pcre study info */
464
    rc =
465
        pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size);
466
    if (rc < 0 || size >= UINT32_MAX)
467
      return -3;
468
  } else
469
    size = 0;
470
471
  /* write the number of bytes in the pcre study data */
472
  data_u32 = htobe32(size);
473
  len = fwrite(&data_u32, sizeof(uint32_t), 1, fp);
474
  if (len != 1)
475
    return -1;
476
477
  if (sd) {
478
    /* write the actual pcre study data as a char array */
479
    len = fwrite(sd->study_data, 1, size, fp);
480
    if (len != size)
481
      return -1;
482
  }
483
484
  return 0;
485
}
486
487
void regex_data_free(struct regex_data *regex)
488
{
489
  if (regex) {
490
    if (regex->owned) {
491
      if (regex->regex)
492
        pcre_free(regex->regex);
493
      if (regex->sd)
494
        pcre_free_study(regex->sd);
495
    }
496
    free(regex);
497
  }
498
}
499
500
int regex_match(struct regex_data *regex, char const *subject, int partial)
501
{
502
  int rc;
503
504
  rc = pcre_exec(regex->regex, get_pcre_extra(regex),
505
           subject, strlen(subject), 0,
506
           partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0);
507
  switch (rc) {
508
  case 0:
509
    return REGEX_MATCH;
510
  case PCRE_ERROR_PARTIAL:
511
    return REGEX_MATCH_PARTIAL;
512
  case PCRE_ERROR_NOMATCH:
513
    return REGEX_NO_MATCH;
514
  default:
515
    return REGEX_ERROR;
516
  }
517
}
518
519
/*
520
 * TODO Replace this compare function with something that actually compares the
521
 * regular expressions.
522
 * This compare function basically just compares the binary representations of
523
 * the automatons, and because this representation contains pointers and
524
 * metadata, it can only return a match if regex1 == regex2.
525
 * Preferably, this function would be replaced with an algorithm that computes
526
 * the equivalence of the automatons systematically.
527
 */
528
int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
529
{
530
  int rc;
531
  size_t len1, len2;
532
  rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
533
  assert(rc == 0);
534
  rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
535
  assert(rc == 0);
536
  if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
537
    return SELABEL_INCOMPARABLE;
538
539
  return SELABEL_EQUAL;
540
}
541
542
struct regex_data *regex_data_create(void)
543
{
544
  return (struct regex_data *)calloc(1, sizeof(struct regex_data));
545
}
546
547
#endif
548
549
void regex_format_error(struct regex_error_data const *error_data, char *buffer,
550
      size_t buf_size)
551
144
{
552
144
  unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
553
144
  char *ptr = &buffer[buf_size - the_end_length];
554
144
  int rc = 0;
555
144
  size_t pos = 0;
556
144
  if (!buffer || !buf_size)
557
0
    return;
558
144
  rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
559
144
  if (rc < 0)
560
    /*
561
     * If snprintf fails it constitutes a logical error that needs
562
     * fixing.
563
     */
564
0
    abort();
565
566
144
  pos += rc;
567
144
  if (pos >= buf_size)
568
0
    goto truncated;
569
570
  /* Return early if there is no error to format */
571
144
#ifdef USE_PCRE2
572
144
  if (!error_data->error_code) {
573
0
    rc = snprintf(buffer + pos, buf_size - pos, "no error code");
574
0
    if (rc < 0)
575
0
      abort();
576
0
    pos += rc;
577
0
    if (pos >= buf_size)
578
0
      goto truncated;
579
0
    return;
580
0
  }
581
#else
582
  if (!error_data->error_buffer) {
583
    rc = snprintf(buffer + pos, buf_size - pos, "empty error");
584
    if (rc < 0)
585
      abort();
586
    pos += rc;
587
    if (pos >= buf_size)
588
      goto truncated;
589
    return;
590
  }
591
#endif
592
593
144
  if (error_data->error_offset > 0) {
594
144
#ifdef USE_PCRE2
595
144
    rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
596
144
            error_data->error_offset);
597
#else
598
    rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
599
            error_data->error_offset);
600
#endif
601
144
    if (rc < 0)
602
0
      abort();
603
144
    pos += rc;
604
144
    if (pos >= buf_size)
605
0
      goto truncated;
606
144
  }
607
608
144
#ifdef USE_PCRE2
609
144
  rc = pcre2_get_error_message(error_data->error_code,
610
144
             (PCRE2_UCHAR *)(buffer + pos),
611
144
             buf_size - pos);
612
144
  if (rc == PCRE2_ERROR_NOMEMORY)
613
0
    goto truncated;
614
#else
615
  rc = snprintf(buffer + pos, buf_size - pos, "%s",
616
          error_data->error_buffer);
617
  if (rc < 0)
618
    abort();
619
620
  if ((size_t)rc < strlen(error_data->error_buffer))
621
    goto truncated;
622
#endif
623
624
144
  return;
625
626
144
truncated:
627
  /* replace end of string with "..." to indicate that it was truncated */
628
0
  switch (the_end_length) {
629
  /* no break statements, fall-through is intended */
630
0
  case 4:
631
0
    *ptr++ = '.';
632
    /* FALLTHRU */
633
0
  case 3:
634
0
    *ptr++ = '.';
635
    /* FALLTHRU */
636
0
  case 2:
637
0
    *ptr++ = '.';
638
    /* FALLTHRU */
639
0
  case 1:
640
0
    *ptr++ = '\0';
641
    /* FALLTHRU */
642
0
  default:
643
0
    break;
644
0
  }
645
0
  return;
646
0
}