Coverage Report

Created: 2026-06-09 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/bind9/lib/isc/regex.c
Line
Count
Source
1
/*
2
 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3
 *
4
 * SPDX-License-Identifier: MPL-2.0
5
 *
6
 * This Source Code Form is subject to the terms of the Mozilla Public
7
 * License, v. 2.0. If a copy of the MPL was not distributed with this
8
 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9
 *
10
 * See the COPYRIGHT file distributed with this work for additional
11
 * information regarding copyright ownership.
12
 */
13
14
#include <stdbool.h>
15
16
#include <isc/file.h>
17
#include <isc/regex.h>
18
#include <isc/string.h>
19
20
#if VALREGEX_REPORT_REASON
21
#define FAIL(x)               \
22
  do {                  \
23
    reason = (x); \
24
    goto error;   \
25
  } while (0)
26
#else /* if VALREGEX_REPORT_REASON */
27
1.40k
#define FAIL(x) goto error
28
#endif /* if VALREGEX_REPORT_REASON */
29
30
/*
31
 * Validate the regular expression 'C' locale.
32
 */
33
int
34
8.41k
isc_regex_validate(const char *c) {
35
8.41k
  enum {
36
8.41k
    none,
37
8.41k
    parse_bracket,
38
8.41k
    parse_bound,
39
8.41k
    parse_ce,
40
8.41k
    parse_ec,
41
8.41k
    parse_cc
42
8.41k
  } state = none;
43
  /* Well known character classes. */
44
8.41k
  const char *cc[] = { ":alnum:", ":digit:", ":punct:", ":alpha:",
45
8.41k
           ":graph:", ":space:", ":blank:", ":lower:",
46
8.41k
           ":upper:", ":cntrl:", ":print:", ":xdigit:" };
47
8.41k
  bool seen_comma = false;
48
8.41k
  bool seen_high = false;
49
8.41k
  bool seen_char = false;
50
8.41k
  bool seen_ec = false;
51
8.41k
  bool seen_ce = false;
52
8.41k
  bool have_atom = false;
53
8.41k
  int group = 0;
54
8.41k
  int range = 0;
55
8.41k
  int sub = 0;
56
8.41k
  bool empty_ok = false;
57
8.41k
  bool neg = false;
58
8.41k
  bool was_multiple = false;
59
8.41k
  unsigned int low = 0;
60
8.41k
  unsigned int high = 0;
61
8.41k
  const char *ccname = NULL;
62
8.41k
  int range_start = 0;
63
#if VALREGEX_REPORT_REASON
64
  const char *reason = "";
65
#endif /* if VALREGEX_REPORT_REASON */
66
67
8.41k
  if (c == NULL || *c == 0) {
68
26
    FAIL("empty string");
69
26
  }
70
71
520k
  while (c != NULL && *c != 0) {
72
512k
    switch (state) {
73
317k
    case none:
74
317k
      switch (*c) {
75
12.0k
      case '\\': /* make literal */
76
12.0k
        ++c;
77
12.0k
        switch (*c) {
78
792
        case '1':
79
1.51k
        case '2':
80
2.20k
        case '3':
81
3.44k
        case '4':
82
4.26k
        case '5':
83
4.81k
        case '6':
84
5.21k
        case '7':
85
6.14k
        case '8':
86
7.59k
        case '9':
87
7.59k
          if ((*c - '0') > sub) {
88
40
            FAIL("bad back reference");
89
40
          }
90
7.55k
          have_atom = true;
91
7.55k
          was_multiple = false;
92
7.55k
          break;
93
0
        case 0:
94
0
          FAIL("escaped end-of-string");
95
4.49k
        default:
96
4.49k
          goto literal;
97
12.0k
        }
98
7.55k
        ++c;
99
7.55k
        break;
100
14.4k
      case '[': /* bracket start */
101
14.4k
        ++c;
102
14.4k
        neg = false;
103
14.4k
        was_multiple = false;
104
14.4k
        seen_char = false;
105
14.4k
        state = parse_bracket;
106
14.4k
        break;
107
16.6k
      case '{': /* bound start */
108
16.6k
        switch (c[1]) {
109
1.42k
        case '0':
110
3.24k
        case '1':
111
3.51k
        case '2':
112
4.34k
        case '3':
113
4.56k
        case '4':
114
5.04k
        case '5':
115
5.45k
        case '6':
116
6.61k
        case '7':
117
6.82k
        case '8':
118
7.11k
        case '9':
119
7.11k
          if (!have_atom) {
120
5
            FAIL("no atom");
121
5
          }
122
7.11k
          if (was_multiple) {
123
6
            FAIL("was multiple");
124
6
          }
125
7.10k
          seen_comma = false;
126
7.10k
          seen_high = false;
127
7.10k
          low = high = 0;
128
7.10k
          state = parse_bound;
129
7.10k
          break;
130
9.49k
        default:
131
9.49k
          goto literal;
132
16.6k
        }
133
7.10k
        ++c;
134
7.10k
        have_atom = true;
135
7.10k
        was_multiple = true;
136
7.10k
        break;
137
6.03k
      case '}':
138
6.03k
        goto literal;
139
22.4k
      case '(': /* group start */
140
22.4k
        have_atom = false;
141
22.4k
        was_multiple = false;
142
22.4k
        empty_ok = true;
143
22.4k
        ++group;
144
22.4k
        ++sub;
145
22.4k
        ++c;
146
22.4k
        break;
147
31.6k
      case ')': /* group end */
148
31.6k
        if (group && !have_atom && !empty_ok) {
149
4
          FAIL("empty alternative");
150
4
        }
151
31.6k
        have_atom = true;
152
31.6k
        was_multiple = false;
153
31.6k
        if (group != 0) {
154
20.6k
          --group;
155
20.6k
        }
156
31.6k
        ++c;
157
31.6k
        break;
158
1.84k
      case '|': /* alternative separator */
159
1.84k
        if (!have_atom) {
160
4
          FAIL("no atom");
161
4
        }
162
1.84k
        have_atom = false;
163
1.84k
        empty_ok = false;
164
1.84k
        was_multiple = false;
165
1.84k
        ++c;
166
1.84k
        break;
167
2.27k
      case '^':
168
6.66k
      case '$':
169
6.66k
        have_atom = true;
170
6.66k
        was_multiple = true;
171
6.66k
        ++c;
172
6.66k
        break;
173
3.67k
      case '+':
174
7.43k
      case '*':
175
11.9k
      case '?':
176
11.9k
        if (was_multiple) {
177
40
          FAIL("was multiple");
178
40
        }
179
11.8k
        if (!have_atom) {
180
13
          FAIL("no atom");
181
13
        }
182
11.8k
        have_atom = true;
183
11.8k
        was_multiple = true;
184
11.8k
        ++c;
185
11.8k
        break;
186
4.06k
      case '.':
187
193k
      default:
188
213k
      literal:
189
213k
        have_atom = true;
190
213k
        was_multiple = false;
191
213k
        ++c;
192
213k
        break;
193
317k
      }
194
317k
      break;
195
317k
    case parse_bound:
196
26.2k
      switch (*c) {
197
3.35k
      case '0':
198
6.07k
      case '1':
199
7.00k
      case '2':
200
8.37k
      case '3':
201
9.27k
      case '4':
202
10.5k
      case '5':
203
12.2k
      case '6':
204
15.1k
      case '7':
205
16.5k
      case '8':
206
17.6k
      case '9':
207
17.6k
        if (!seen_comma) {
208
14.1k
          low = low * 10 + *c - '0';
209
14.1k
          if (low > 255) {
210
40
            FAIL("lower bound too big");
211
40
          }
212
14.1k
        } else {
213
3.44k
          seen_high = true;
214
3.44k
          high = high * 10 + *c - '0';
215
3.44k
          if (high > 255) {
216
17
            FAIL("upper bound too big");
217
17
          }
218
3.44k
        }
219
17.5k
        ++c;
220
17.5k
        break;
221
1.78k
      case ',':
222
1.78k
        if (seen_comma) {
223
5
          FAIL("multiple commas");
224
5
        }
225
1.78k
        seen_comma = true;
226
1.78k
        ++c;
227
1.78k
        break;
228
27
      default:
229
28
      case '{':
230
28
        FAIL("non digit/comma");
231
6.82k
      case '}':
232
6.82k
        if (seen_high && low > high) {
233
25
          FAIL("bad parse bound");
234
25
        }
235
6.80k
        seen_comma = false;
236
6.80k
        state = none;
237
6.80k
        ++c;
238
6.80k
        break;
239
26.2k
      }
240
26.1k
      break;
241
125k
    case parse_bracket:
242
125k
      switch (*c) {
243
4.16k
      case '^':
244
4.16k
        if (seen_char || neg) {
245
2.60k
          goto inside;
246
2.60k
        }
247
1.55k
        neg = true;
248
1.55k
        ++c;
249
1.55k
        break;
250
10.1k
      case '-':
251
10.1k
        if (range == 2) {
252
914
          goto inside;
253
914
        }
254
9.26k
        if (!seen_char) {
255
1.78k
          goto inside;
256
1.78k
        }
257
7.47k
        if (range == 1) {
258
5
          FAIL("bad range");
259
5
        }
260
7.47k
        range = 2;
261
7.47k
        ++c;
262
7.47k
        break;
263
16.0k
      case '[':
264
16.0k
        ++c;
265
16.0k
        switch (*c) {
266
3.72k
        case '.': /* collating element */
267
3.72k
          if (range != 0) {
268
1.77k
            --range;
269
1.77k
          }
270
3.72k
          ++c;
271
3.72k
          state = parse_ce;
272
3.72k
          seen_ce = false;
273
3.72k
          break;
274
343
        case '=': /* equivalence class */
275
343
          if (range == 2) {
276
4
            FAIL("equivalence class in "
277
4
                 "range");
278
4
          }
279
339
          ++c;
280
339
          state = parse_ec;
281
339
          seen_ec = false;
282
339
          break;
283
1.38k
        case ':': /* character class */
284
1.38k
          if (range == 2) {
285
4
            FAIL("character class in "
286
4
                 "range");
287
4
          }
288
1.37k
          ccname = c;
289
1.37k
          ++c;
290
1.37k
          state = parse_cc;
291
1.37k
          break;
292
16.0k
        }
293
16.0k
        seen_char = true;
294
16.0k
        break;
295
15.7k
      case ']':
296
15.7k
        if (!c[1] && !seen_char) {
297
7
          FAIL("unfinished brace");
298
7
        }
299
15.7k
        if (!seen_char) {
300
2.08k
          goto inside;
301
2.08k
        }
302
13.6k
        ++c;
303
13.6k
        range = 0;
304
13.6k
        have_atom = true;
305
13.6k
        state = none;
306
13.6k
        break;
307
79.0k
      default:
308
86.4k
      inside:
309
86.4k
        seen_char = true;
310
86.4k
        if (range == 2 && (*c & 0xff) < range_start) {
311
26
          FAIL("out of order range");
312
26
        }
313
86.4k
        if (range != 0) {
314
11.7k
          --range;
315
11.7k
        }
316
86.4k
        range_start = *c & 0xff;
317
86.4k
        ++c;
318
86.4k
        break;
319
125k
      }
320
125k
      break;
321
125k
    case parse_ce:
322
30.1k
      switch (*c) {
323
10.2k
      case '.':
324
10.2k
        ++c;
325
10.2k
        switch (*c) {
326
3.57k
        case ']':
327
3.57k
          if (!seen_ce) {
328
3
            FAIL("empty ce");
329
3
          }
330
3.57k
          ++c;
331
3.57k
          state = parse_bracket;
332
3.57k
          break;
333
6.68k
        default:
334
6.68k
          if (seen_ce) {
335
4.87k
            range_start = 256;
336
4.87k
          } else {
337
1.80k
            range_start = '.';
338
1.80k
          }
339
6.68k
          seen_ce = true;
340
6.68k
          break;
341
10.2k
        }
342
10.2k
        break;
343
19.8k
      default:
344
19.8k
        if (seen_ce) {
345
17.9k
          range_start = 256;
346
17.9k
        } else {
347
1.89k
          range_start = *c;
348
1.89k
        }
349
19.8k
        seen_ce = true;
350
19.8k
        ++c;
351
19.8k
        break;
352
30.1k
      }
353
30.1k
      break;
354
30.1k
    case parse_ec:
355
3.90k
      switch (*c) {
356
1.02k
      case '=':
357
1.02k
        ++c;
358
1.02k
        switch (*c) {
359
247
        case ']':
360
247
          if (!seen_ec) {
361
4
            FAIL("no ec");
362
4
          }
363
243
          ++c;
364
243
          state = parse_bracket;
365
243
          break;
366
781
        default:
367
781
          seen_ec = true;
368
781
          break;
369
1.02k
        }
370
1.02k
        break;
371
2.87k
      default:
372
2.87k
        seen_ec = true;
373
2.87k
        ++c;
374
2.87k
        break;
375
3.90k
      }
376
3.90k
      break;
377
9.59k
    case parse_cc:
378
9.59k
      switch (*c) {
379
2.03k
      case ':':
380
2.03k
        ++c;
381
2.03k
        switch (*c) {
382
1.29k
        case ']': {
383
1.29k
          unsigned int i;
384
1.29k
          bool found = false;
385
1.29k
          for (i = 0;
386
16.8k
               i < sizeof(cc) / sizeof(*cc); i++)
387
15.5k
          {
388
15.5k
            unsigned int len;
389
15.5k
            len = strlen(cc[i]);
390
15.5k
            if (len !=
391
15.5k
                (unsigned int)(c - ccname))
392
2.29k
            {
393
2.29k
              continue;
394
2.29k
            }
395
13.2k
            if (strncmp(cc[i], ccname, len))
396
12.1k
            {
397
12.1k
              continue;
398
12.1k
            }
399
1.14k
            found = true;
400
1.14k
          }
401
1.29k
          if (!found) {
402
153
            FAIL("unknown cc");
403
153
          }
404
1.14k
          ++c;
405
1.14k
          state = parse_bracket;
406
1.14k
          break;
407
1.29k
        }
408
733
        default:
409
733
          break;
410
2.03k
        }
411
1.87k
        break;
412
7.56k
      default:
413
7.56k
        ++c;
414
7.56k
        break;
415
9.59k
      }
416
9.43k
      break;
417
512k
    }
418
512k
  }
419
7.95k
  if (group != 0) {
420
186
    FAIL("group open");
421
186
  }
422
7.77k
  if (state != none) {
423
744
    FAIL("incomplete");
424
744
  }
425
7.02k
  if (!have_atom) {
426
16
    FAIL("no atom");
427
16
  }
428
7.01k
  return sub;
429
430
1.40k
error:
431
#if VALREGEX_REPORT_REASON
432
  fprintf(stderr, "%s\n", reason);
433
#endif /* if VALREGEX_REPORT_REASON */
434
1.40k
  return -1;
435
7.02k
}