Coverage Report

Created: 2026-01-17 06:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/bind9/lib/isc/regex.c
Line
Count
Source
1
/*
2
 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3
 *
4
 * SPDX-License-Identifier: MPL-2.0
5
 *
6
 * This Source Code Form is subject to the terms of the Mozilla Public
7
 * License, v. 2.0. If a copy of the MPL was not distributed with this
8
 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9
 *
10
 * See the COPYRIGHT file distributed with this work for additional
11
 * information regarding copyright ownership.
12
 */
13
14
#include <stdbool.h>
15
16
#include <isc/file.h>
17
#include <isc/regex.h>
18
#include <isc/string.h>
19
20
#if VALREGEX_REPORT_REASON
21
#define FAIL(x)               \
22
  do {                  \
23
    reason = (x); \
24
    goto error;   \
25
  } while (0)
26
#else /* if VALREGEX_REPORT_REASON */
27
1.38k
#define FAIL(x) goto error
28
#endif /* if VALREGEX_REPORT_REASON */
29
30
/*
31
 * Validate the regular expression 'C' locale.
32
 */
33
int
34
7.41k
isc_regex_validate(const char *c) {
35
7.41k
  enum {
36
7.41k
    none,
37
7.41k
    parse_bracket,
38
7.41k
    parse_bound,
39
7.41k
    parse_ce,
40
7.41k
    parse_ec,
41
7.41k
    parse_cc
42
7.41k
  } state = none;
43
  /* Well known character classes. */
44
7.41k
  const char *cc[] = { ":alnum:", ":digit:", ":punct:", ":alpha:",
45
7.41k
           ":graph:", ":space:", ":blank:", ":lower:",
46
7.41k
           ":upper:", ":cntrl:", ":print:", ":xdigit:" };
47
7.41k
  bool seen_comma = false;
48
7.41k
  bool seen_high = false;
49
7.41k
  bool seen_char = false;
50
7.41k
  bool seen_ec = false;
51
7.41k
  bool seen_ce = false;
52
7.41k
  bool have_atom = false;
53
7.41k
  int group = 0;
54
7.41k
  int range = 0;
55
7.41k
  int sub = 0;
56
7.41k
  bool empty_ok = false;
57
7.41k
  bool neg = false;
58
7.41k
  bool was_multiple = false;
59
7.41k
  unsigned int low = 0;
60
7.41k
  unsigned int high = 0;
61
7.41k
  const char *ccname = NULL;
62
7.41k
  int range_start = 0;
63
#if VALREGEX_REPORT_REASON
64
  const char *reason = "";
65
#endif /* if VALREGEX_REPORT_REASON */
66
67
7.41k
  if (c == NULL || *c == 0) {
68
24
    FAIL("empty string");
69
24
  }
70
71
484k
  while (c != NULL && *c != 0) {
72
477k
    switch (state) {
73
293k
    case none:
74
293k
      switch (*c) {
75
10.1k
      case '\\': /* make literal */
76
10.1k
        ++c;
77
10.1k
        switch (*c) {
78
578
        case '1':
79
1.27k
        case '2':
80
1.77k
        case '3':
81
2.69k
        case '4':
82
3.27k
        case '5':
83
3.82k
        case '6':
84
4.21k
        case '7':
85
4.96k
        case '8':
86
6.12k
        case '9':
87
6.12k
          if ((*c - '0') > sub) {
88
37
            FAIL("bad back reference");
89
37
          }
90
6.09k
          have_atom = true;
91
6.09k
          was_multiple = false;
92
6.09k
          break;
93
0
        case 0:
94
0
          FAIL("escaped end-of-string");
95
3.98k
        default:
96
3.98k
          goto literal;
97
10.1k
        }
98
6.09k
        ++c;
99
6.09k
        break;
100
14.3k
      case '[': /* bracket start */
101
14.3k
        ++c;
102
14.3k
        neg = false;
103
14.3k
        was_multiple = false;
104
14.3k
        seen_char = false;
105
14.3k
        state = parse_bracket;
106
14.3k
        break;
107
15.5k
      case '{': /* bound start */
108
15.5k
        switch (c[1]) {
109
1.25k
        case '0':
110
3.02k
        case '1':
111
3.28k
        case '2':
112
3.95k
        case '3':
113
4.16k
        case '4':
114
4.68k
        case '5':
115
5.10k
        case '6':
116
6.09k
        case '7':
117
6.26k
        case '8':
118
6.54k
        case '9':
119
6.54k
          if (!have_atom) {
120
4
            FAIL("no atom");
121
4
          }
122
6.54k
          if (was_multiple) {
123
7
            FAIL("was multiple");
124
7
          }
125
6.53k
          seen_comma = false;
126
6.53k
          seen_high = false;
127
6.53k
          low = high = 0;
128
6.53k
          state = parse_bound;
129
6.53k
          break;
130
9.01k
        default:
131
9.01k
          goto literal;
132
15.5k
        }
133
6.53k
        ++c;
134
6.53k
        have_atom = true;
135
6.53k
        was_multiple = true;
136
6.53k
        break;
137
6.05k
      case '}':
138
6.05k
        goto literal;
139
16.1k
      case '(': /* group start */
140
16.1k
        have_atom = false;
141
16.1k
        was_multiple = false;
142
16.1k
        empty_ok = true;
143
16.1k
        ++group;
144
16.1k
        ++sub;
145
16.1k
        ++c;
146
16.1k
        break;
147
23.0k
      case ')': /* group end */
148
23.0k
        if (group && !have_atom && !empty_ok) {
149
4
          FAIL("empty alternative");
150
4
        }
151
23.0k
        have_atom = true;
152
23.0k
        was_multiple = false;
153
23.0k
        if (group != 0) {
154
14.0k
          --group;
155
14.0k
        }
156
23.0k
        ++c;
157
23.0k
        break;
158
1.83k
      case '|': /* alternative separator */
159
1.83k
        if (!have_atom) {
160
4
          FAIL("no atom");
161
4
        }
162
1.83k
        have_atom = false;
163
1.83k
        empty_ok = false;
164
1.83k
        was_multiple = false;
165
1.83k
        ++c;
166
1.83k
        break;
167
2.39k
      case '^':
168
7.06k
      case '$':
169
7.06k
        have_atom = true;
170
7.06k
        was_multiple = true;
171
7.06k
        ++c;
172
7.06k
        break;
173
3.49k
      case '+':
174
6.66k
      case '*':
175
10.4k
      case '?':
176
10.4k
        if (was_multiple) {
177
37
          FAIL("was multiple");
178
37
        }
179
10.4k
        if (!have_atom) {
180
11
          FAIL("no atom");
181
11
        }
182
10.4k
        have_atom = true;
183
10.4k
        was_multiple = true;
184
10.4k
        ++c;
185
10.4k
        break;
186
3.77k
      case '.':
187
189k
      default:
188
208k
      literal:
189
208k
        have_atom = true;
190
208k
        was_multiple = false;
191
208k
        ++c;
192
208k
        break;
193
293k
      }
194
293k
      break;
195
293k
    case parse_bound:
196
24.3k
      switch (*c) {
197
3.03k
      case '0':
198
5.51k
      case '1':
199
6.38k
      case '2':
200
7.59k
      case '3':
201
8.39k
      case '4':
202
9.76k
      case '5':
203
11.2k
      case '6':
204
14.1k
      case '7':
205
15.3k
      case '8':
206
16.3k
      case '9':
207
16.3k
        if (!seen_comma) {
208
13.1k
          low = low * 10 + *c - '0';
209
13.1k
          if (low > 255) {
210
42
            FAIL("lower bound too big");
211
42
          }
212
13.1k
        } else {
213
3.19k
          seen_high = true;
214
3.19k
          high = high * 10 + *c - '0';
215
3.19k
          if (high > 255) {
216
22
            FAIL("upper bound too big");
217
22
          }
218
3.19k
        }
219
16.2k
        ++c;
220
16.2k
        break;
221
1.66k
      case ',':
222
1.66k
        if (seen_comma) {
223
6
          FAIL("multiple commas");
224
6
        }
225
1.66k
        seen_comma = true;
226
1.66k
        ++c;
227
1.66k
        break;
228
28
      default:
229
29
      case '{':
230
29
        FAIL("non digit/comma");
231
6.27k
      case '}':
232
6.27k
        if (seen_high && low > high) {
233
24
          FAIL("bad parse bound");
234
24
        }
235
6.25k
        seen_comma = false;
236
6.25k
        state = none;
237
6.25k
        ++c;
238
6.25k
        break;
239
24.3k
      }
240
24.1k
      break;
241
119k
    case parse_bracket:
242
119k
      switch (*c) {
243
3.78k
      case '^':
244
3.78k
        if (seen_char || neg) {
245
2.26k
          goto inside;
246
2.26k
        }
247
1.52k
        neg = true;
248
1.52k
        ++c;
249
1.52k
        break;
250
9.32k
      case '-':
251
9.32k
        if (range == 2) {
252
855
          goto inside;
253
855
        }
254
8.46k
        if (!seen_char) {
255
1.83k
          goto inside;
256
1.83k
        }
257
6.63k
        if (range == 1) {
258
4
          FAIL("bad range");
259
4
        }
260
6.63k
        range = 2;
261
6.63k
        ++c;
262
6.63k
        break;
263
14.5k
      case '[':
264
14.5k
        ++c;
265
14.5k
        switch (*c) {
266
3.21k
        case '.': /* collating element */
267
3.21k
          if (range != 0) {
268
1.54k
            --range;
269
1.54k
          }
270
3.21k
          ++c;
271
3.21k
          state = parse_ce;
272
3.21k
          seen_ce = false;
273
3.21k
          break;
274
330
        case '=': /* equivalence class */
275
330
          if (range == 2) {
276
4
            FAIL("equivalence class in "
277
4
                 "range");
278
4
          }
279
326
          ++c;
280
326
          state = parse_ec;
281
326
          seen_ec = false;
282
326
          break;
283
1.41k
        case ':': /* character class */
284
1.41k
          if (range == 2) {
285
4
            FAIL("character class in "
286
4
                 "range");
287
4
          }
288
1.41k
          ccname = c;
289
1.41k
          ++c;
290
1.41k
          state = parse_cc;
291
1.41k
          break;
292
14.5k
        }
293
14.5k
        seen_char = true;
294
14.5k
        break;
295
15.9k
      case ']':
296
15.9k
        if (!c[1] && !seen_char) {
297
5
          FAIL("unfinished brace");
298
5
        }
299
15.8k
        if (!seen_char) {
300
2.27k
          goto inside;
301
2.27k
        }
302
13.6k
        ++c;
303
13.6k
        range = 0;
304
13.6k
        have_atom = true;
305
13.6k
        state = none;
306
13.6k
        break;
307
76.0k
      default:
308
83.2k
      inside:
309
83.2k
        seen_char = true;
310
83.2k
        if (range == 2 && (*c & 0xff) < range_start) {
311
25
          FAIL("out of order range");
312
25
        }
313
83.2k
        if (range != 0) {
314
10.3k
          --range;
315
10.3k
        }
316
83.2k
        range_start = *c & 0xff;
317
83.2k
        ++c;
318
83.2k
        break;
319
119k
      }
320
119k
      break;
321
119k
    case parse_ce:
322
26.4k
      switch (*c) {
323
9.14k
      case '.':
324
9.14k
        ++c;
325
9.14k
        switch (*c) {
326
3.09k
        case ']':
327
3.09k
          if (!seen_ce) {
328
4
            FAIL("empty ce");
329
4
          }
330
3.08k
          ++c;
331
3.08k
          state = parse_bracket;
332
3.08k
          break;
333
6.05k
        default:
334
6.05k
          if (seen_ce) {
335
4.52k
            range_start = 256;
336
4.52k
          } else {
337
1.53k
            range_start = '.';
338
1.53k
          }
339
6.05k
          seen_ce = true;
340
6.05k
          break;
341
9.14k
        }
342
9.13k
        break;
343
17.2k
      default:
344
17.2k
        if (seen_ce) {
345
15.6k
          range_start = 256;
346
15.6k
        } else {
347
1.66k
          range_start = *c;
348
1.66k
        }
349
17.2k
        seen_ce = true;
350
17.2k
        ++c;
351
17.2k
        break;
352
26.4k
      }
353
26.4k
      break;
354
26.4k
    case parse_ec:
355
3.60k
      switch (*c) {
356
1.07k
      case '=':
357
1.07k
        ++c;
358
1.07k
        switch (*c) {
359
252
        case ']':
360
252
          if (!seen_ec) {
361
4
            FAIL("no ec");
362
4
          }
363
248
          ++c;
364
248
          state = parse_bracket;
365
248
          break;
366
821
        default:
367
821
          seen_ec = true;
368
821
          break;
369
1.07k
        }
370
1.06k
        break;
371
2.53k
      default:
372
2.53k
        seen_ec = true;
373
2.53k
        ++c;
374
2.53k
        break;
375
3.60k
      }
376
3.59k
      break;
377
9.76k
    case parse_cc:
378
9.76k
      switch (*c) {
379
2.03k
      case ':':
380
2.03k
        ++c;
381
2.03k
        switch (*c) {
382
1.32k
        case ']': {
383
1.32k
          unsigned int i;
384
1.32k
          bool found = false;
385
1.32k
          for (i = 0;
386
17.2k
               i < sizeof(cc) / sizeof(*cc); i++)
387
15.8k
          {
388
15.8k
            unsigned int len;
389
15.8k
            len = strlen(cc[i]);
390
15.8k
            if (len !=
391
15.8k
                (unsigned int)(c - ccname))
392
2.13k
            {
393
2.13k
              continue;
394
2.13k
            }
395
13.7k
            if (strncmp(cc[i], ccname, len))
396
12.5k
            {
397
12.5k
              continue;
398
12.5k
            }
399
1.15k
            found = true;
400
1.15k
          }
401
1.32k
          if (!found) {
402
171
            FAIL("unknown cc");
403
171
          }
404
1.15k
          ++c;
405
1.15k
          state = parse_bracket;
406
1.15k
          break;
407
1.32k
        }
408
711
        default:
409
711
          break;
410
2.03k
        }
411
1.86k
        break;
412
7.72k
      default:
413
7.72k
        ++c;
414
7.72k
        break;
415
9.76k
      }
416
9.59k
      break;
417
477k
    }
418
477k
  }
419
6.94k
  if (group != 0) {
420
203
    FAIL("group open");
421
203
  }
422
6.74k
  if (state != none) {
423
691
    FAIL("incomplete");
424
691
  }
425
6.04k
  if (!have_atom) {
426
17
    FAIL("no atom");
427
17
  }
428
6.03k
  return sub;
429
430
1.38k
error:
431
#if VALREGEX_REPORT_REASON
432
  fprintf(stderr, "%s\n", reason);
433
#endif /* if VALREGEX_REPORT_REASON */
434
1.38k
  return -1;
435
6.04k
}