Coverage Report

Created: 2026-02-26 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/bind9/lib/isc/regex.c
Line
Count
Source
1
/*
2
 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3
 *
4
 * SPDX-License-Identifier: MPL-2.0
5
 *
6
 * This Source Code Form is subject to the terms of the Mozilla Public
7
 * License, v. 2.0. If a copy of the MPL was not distributed with this
8
 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9
 *
10
 * See the COPYRIGHT file distributed with this work for additional
11
 * information regarding copyright ownership.
12
 */
13
14
#include <stdbool.h>
15
16
#include <isc/file.h>
17
#include <isc/regex.h>
18
#include <isc/string.h>
19
20
#if VALREGEX_REPORT_REASON
21
#define FAIL(x)               \
22
  do {                  \
23
    reason = (x); \
24
    goto error;   \
25
  } while (0)
26
#else /* if VALREGEX_REPORT_REASON */
27
0
#define FAIL(x) goto error
28
#endif /* if VALREGEX_REPORT_REASON */
29
30
/*
31
 * Validate the regular expression 'C' locale.
32
 */
33
int
34
0
isc_regex_validate(const char *c) {
35
0
  enum {
36
0
    none,
37
0
    parse_bracket,
38
0
    parse_bound,
39
0
    parse_ce,
40
0
    parse_ec,
41
0
    parse_cc
42
0
  } state = none;
43
  /* Well known character classes. */
44
0
  const char *cc[] = { ":alnum:", ":digit:", ":punct:", ":alpha:",
45
0
           ":graph:", ":space:", ":blank:", ":lower:",
46
0
           ":upper:", ":cntrl:", ":print:", ":xdigit:" };
47
0
  bool seen_comma = false;
48
0
  bool seen_high = false;
49
0
  bool seen_char = false;
50
0
  bool seen_ec = false;
51
0
  bool seen_ce = false;
52
0
  bool have_atom = false;
53
0
  int group = 0;
54
0
  int range = 0;
55
0
  int sub = 0;
56
0
  bool empty_ok = false;
57
0
  bool neg = false;
58
0
  bool was_multiple = false;
59
0
  unsigned int low = 0;
60
0
  unsigned int high = 0;
61
0
  const char *ccname = NULL;
62
0
  int range_start = 0;
63
#if VALREGEX_REPORT_REASON
64
  const char *reason = "";
65
#endif /* if VALREGEX_REPORT_REASON */
66
67
0
  if (c == NULL || *c == 0) {
68
0
    FAIL("empty string");
69
0
  }
70
71
0
  while (c != NULL && *c != 0) {
72
0
    switch (state) {
73
0
    case none:
74
0
      switch (*c) {
75
0
      case '\\': /* make literal */
76
0
        ++c;
77
0
        switch (*c) {
78
0
        case '1':
79
0
        case '2':
80
0
        case '3':
81
0
        case '4':
82
0
        case '5':
83
0
        case '6':
84
0
        case '7':
85
0
        case '8':
86
0
        case '9':
87
0
          if ((*c - '0') > sub) {
88
0
            FAIL("bad back reference");
89
0
          }
90
0
          have_atom = true;
91
0
          was_multiple = false;
92
0
          break;
93
0
        case 0:
94
0
          FAIL("escaped end-of-string");
95
0
        default:
96
0
          goto literal;
97
0
        }
98
0
        ++c;
99
0
        break;
100
0
      case '[': /* bracket start */
101
0
        ++c;
102
0
        neg = false;
103
0
        was_multiple = false;
104
0
        seen_char = false;
105
0
        state = parse_bracket;
106
0
        break;
107
0
      case '{': /* bound start */
108
0
        switch (c[1]) {
109
0
        case '0':
110
0
        case '1':
111
0
        case '2':
112
0
        case '3':
113
0
        case '4':
114
0
        case '5':
115
0
        case '6':
116
0
        case '7':
117
0
        case '8':
118
0
        case '9':
119
0
          if (!have_atom) {
120
0
            FAIL("no atom");
121
0
          }
122
0
          if (was_multiple) {
123
0
            FAIL("was multiple");
124
0
          }
125
0
          seen_comma = false;
126
0
          seen_high = false;
127
0
          low = high = 0;
128
0
          state = parse_bound;
129
0
          break;
130
0
        default:
131
0
          goto literal;
132
0
        }
133
0
        ++c;
134
0
        have_atom = true;
135
0
        was_multiple = true;
136
0
        break;
137
0
      case '}':
138
0
        goto literal;
139
0
      case '(': /* group start */
140
0
        have_atom = false;
141
0
        was_multiple = false;
142
0
        empty_ok = true;
143
0
        ++group;
144
0
        ++sub;
145
0
        ++c;
146
0
        break;
147
0
      case ')': /* group end */
148
0
        if (group && !have_atom && !empty_ok) {
149
0
          FAIL("empty alternative");
150
0
        }
151
0
        have_atom = true;
152
0
        was_multiple = false;
153
0
        if (group != 0) {
154
0
          --group;
155
0
        }
156
0
        ++c;
157
0
        break;
158
0
      case '|': /* alternative separator */
159
0
        if (!have_atom) {
160
0
          FAIL("no atom");
161
0
        }
162
0
        have_atom = false;
163
0
        empty_ok = false;
164
0
        was_multiple = false;
165
0
        ++c;
166
0
        break;
167
0
      case '^':
168
0
      case '$':
169
0
        have_atom = true;
170
0
        was_multiple = true;
171
0
        ++c;
172
0
        break;
173
0
      case '+':
174
0
      case '*':
175
0
      case '?':
176
0
        if (was_multiple) {
177
0
          FAIL("was multiple");
178
0
        }
179
0
        if (!have_atom) {
180
0
          FAIL("no atom");
181
0
        }
182
0
        have_atom = true;
183
0
        was_multiple = true;
184
0
        ++c;
185
0
        break;
186
0
      case '.':
187
0
      default:
188
0
      literal:
189
0
        have_atom = true;
190
0
        was_multiple = false;
191
0
        ++c;
192
0
        break;
193
0
      }
194
0
      break;
195
0
    case parse_bound:
196
0
      switch (*c) {
197
0
      case '0':
198
0
      case '1':
199
0
      case '2':
200
0
      case '3':
201
0
      case '4':
202
0
      case '5':
203
0
      case '6':
204
0
      case '7':
205
0
      case '8':
206
0
      case '9':
207
0
        if (!seen_comma) {
208
0
          low = low * 10 + *c - '0';
209
0
          if (low > 255) {
210
0
            FAIL("lower bound too big");
211
0
          }
212
0
        } else {
213
0
          seen_high = true;
214
0
          high = high * 10 + *c - '0';
215
0
          if (high > 255) {
216
0
            FAIL("upper bound too big");
217
0
          }
218
0
        }
219
0
        ++c;
220
0
        break;
221
0
      case ',':
222
0
        if (seen_comma) {
223
0
          FAIL("multiple commas");
224
0
        }
225
0
        seen_comma = true;
226
0
        ++c;
227
0
        break;
228
0
      default:
229
0
      case '{':
230
0
        FAIL("non digit/comma");
231
0
      case '}':
232
0
        if (seen_high && low > high) {
233
0
          FAIL("bad parse bound");
234
0
        }
235
0
        seen_comma = false;
236
0
        state = none;
237
0
        ++c;
238
0
        break;
239
0
      }
240
0
      break;
241
0
    case parse_bracket:
242
0
      switch (*c) {
243
0
      case '^':
244
0
        if (seen_char || neg) {
245
0
          goto inside;
246
0
        }
247
0
        neg = true;
248
0
        ++c;
249
0
        break;
250
0
      case '-':
251
0
        if (range == 2) {
252
0
          goto inside;
253
0
        }
254
0
        if (!seen_char) {
255
0
          goto inside;
256
0
        }
257
0
        if (range == 1) {
258
0
          FAIL("bad range");
259
0
        }
260
0
        range = 2;
261
0
        ++c;
262
0
        break;
263
0
      case '[':
264
0
        ++c;
265
0
        switch (*c) {
266
0
        case '.': /* collating element */
267
0
          if (range != 0) {
268
0
            --range;
269
0
          }
270
0
          ++c;
271
0
          state = parse_ce;
272
0
          seen_ce = false;
273
0
          break;
274
0
        case '=': /* equivalence class */
275
0
          if (range == 2) {
276
0
            FAIL("equivalence class in "
277
0
                 "range");
278
0
          }
279
0
          ++c;
280
0
          state = parse_ec;
281
0
          seen_ec = false;
282
0
          break;
283
0
        case ':': /* character class */
284
0
          if (range == 2) {
285
0
            FAIL("character class in "
286
0
                 "range");
287
0
          }
288
0
          ccname = c;
289
0
          ++c;
290
0
          state = parse_cc;
291
0
          break;
292
0
        }
293
0
        seen_char = true;
294
0
        break;
295
0
      case ']':
296
0
        if (!c[1] && !seen_char) {
297
0
          FAIL("unfinished brace");
298
0
        }
299
0
        if (!seen_char) {
300
0
          goto inside;
301
0
        }
302
0
        ++c;
303
0
        range = 0;
304
0
        have_atom = true;
305
0
        state = none;
306
0
        break;
307
0
      default:
308
0
      inside:
309
0
        seen_char = true;
310
0
        if (range == 2 && (*c & 0xff) < range_start) {
311
0
          FAIL("out of order range");
312
0
        }
313
0
        if (range != 0) {
314
0
          --range;
315
0
        }
316
0
        range_start = *c & 0xff;
317
0
        ++c;
318
0
        break;
319
0
      }
320
0
      break;
321
0
    case parse_ce:
322
0
      switch (*c) {
323
0
      case '.':
324
0
        ++c;
325
0
        switch (*c) {
326
0
        case ']':
327
0
          if (!seen_ce) {
328
0
            FAIL("empty ce");
329
0
          }
330
0
          ++c;
331
0
          state = parse_bracket;
332
0
          break;
333
0
        default:
334
0
          if (seen_ce) {
335
0
            range_start = 256;
336
0
          } else {
337
0
            range_start = '.';
338
0
          }
339
0
          seen_ce = true;
340
0
          break;
341
0
        }
342
0
        break;
343
0
      default:
344
0
        if (seen_ce) {
345
0
          range_start = 256;
346
0
        } else {
347
0
          range_start = *c;
348
0
        }
349
0
        seen_ce = true;
350
0
        ++c;
351
0
        break;
352
0
      }
353
0
      break;
354
0
    case parse_ec:
355
0
      switch (*c) {
356
0
      case '=':
357
0
        ++c;
358
0
        switch (*c) {
359
0
        case ']':
360
0
          if (!seen_ec) {
361
0
            FAIL("no ec");
362
0
          }
363
0
          ++c;
364
0
          state = parse_bracket;
365
0
          break;
366
0
        default:
367
0
          seen_ec = true;
368
0
          break;
369
0
        }
370
0
        break;
371
0
      default:
372
0
        seen_ec = true;
373
0
        ++c;
374
0
        break;
375
0
      }
376
0
      break;
377
0
    case parse_cc:
378
0
      switch (*c) {
379
0
      case ':':
380
0
        ++c;
381
0
        switch (*c) {
382
0
        case ']': {
383
0
          unsigned int i;
384
0
          bool found = false;
385
0
          for (i = 0;
386
0
               i < sizeof(cc) / sizeof(*cc); i++)
387
0
          {
388
0
            unsigned int len;
389
0
            len = strlen(cc[i]);
390
0
            if (len !=
391
0
                (unsigned int)(c - ccname))
392
0
            {
393
0
              continue;
394
0
            }
395
0
            if (strncmp(cc[i], ccname, len))
396
0
            {
397
0
              continue;
398
0
            }
399
0
            found = true;
400
0
          }
401
0
          if (!found) {
402
0
            FAIL("unknown cc");
403
0
          }
404
0
          ++c;
405
0
          state = parse_bracket;
406
0
          break;
407
0
        }
408
0
        default:
409
0
          break;
410
0
        }
411
0
        break;
412
0
      default:
413
0
        ++c;
414
0
        break;
415
0
      }
416
0
      break;
417
0
    }
418
0
  }
419
0
  if (group != 0) {
420
0
    FAIL("group open");
421
0
  }
422
0
  if (state != none) {
423
0
    FAIL("incomplete");
424
0
  }
425
0
  if (!have_atom) {
426
0
    FAIL("no atom");
427
0
  }
428
0
  return sub;
429
430
0
error:
431
#if VALREGEX_REPORT_REASON
432
  fprintf(stderr, "%s\n", reason);
433
#endif /* if VALREGEX_REPORT_REASON */
434
0
  return -1;
435
0
}