Coverage Report

Created: 2025-11-11 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/bind9/lib/isc/regex.c
Line
Count
Source
1
/*
2
 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3
 *
4
 * SPDX-License-Identifier: MPL-2.0
5
 *
6
 * This Source Code Form is subject to the terms of the Mozilla Public
7
 * License, v. 2.0. If a copy of the MPL was not distributed with this
8
 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9
 *
10
 * See the COPYRIGHT file distributed with this work for additional
11
 * information regarding copyright ownership.
12
 */
13
14
#include <stdbool.h>
15
16
#include <isc/file.h>
17
#include <isc/regex.h>
18
#include <isc/string.h>
19
20
#if VALREGEX_REPORT_REASON
21
#define FAIL(x)               \
22
  do {                  \
23
    reason = (x); \
24
    goto error;   \
25
  } while (0)
26
#else /* if VALREGEX_REPORT_REASON */
27
1.30k
#define FAIL(x) goto error
28
#endif /* if VALREGEX_REPORT_REASON */
29
30
/*
31
 * Validate the regular expression 'C' locale.
32
 */
33
int
34
7.35k
isc_regex_validate(const char *c) {
35
7.35k
  enum {
36
7.35k
    none,
37
7.35k
    parse_bracket,
38
7.35k
    parse_bound,
39
7.35k
    parse_ce,
40
7.35k
    parse_ec,
41
7.35k
    parse_cc
42
7.35k
  } state = none;
43
  /* Well known character classes. */
44
7.35k
  const char *cc[] = { ":alnum:", ":digit:", ":punct:", ":alpha:",
45
7.35k
           ":graph:", ":space:", ":blank:", ":lower:",
46
7.35k
           ":upper:", ":cntrl:", ":print:", ":xdigit:" };
47
7.35k
  bool seen_comma = false;
48
7.35k
  bool seen_high = false;
49
7.35k
  bool seen_char = false;
50
7.35k
  bool seen_ec = false;
51
7.35k
  bool seen_ce = false;
52
7.35k
  bool have_atom = false;
53
7.35k
  int group = 0;
54
7.35k
  int range = 0;
55
7.35k
  int sub = 0;
56
7.35k
  bool empty_ok = false;
57
7.35k
  bool neg = false;
58
7.35k
  bool was_multiple = false;
59
7.35k
  unsigned int low = 0;
60
7.35k
  unsigned int high = 0;
61
7.35k
  const char *ccname = NULL;
62
7.35k
  int range_start = 0;
63
#if VALREGEX_REPORT_REASON
64
  const char *reason = "";
65
#endif /* if VALREGEX_REPORT_REASON */
66
67
7.35k
  if (c == NULL || *c == 0) {
68
22
    FAIL("empty string");
69
22
  }
70
71
478k
  while (c != NULL && *c != 0) {
72
471k
    switch (state) {
73
274k
    case none:
74
274k
      switch (*c) {
75
10.3k
      case '\\': /* make literal */
76
10.3k
        ++c;
77
10.3k
        switch (*c) {
78
589
        case '1':
79
1.23k
        case '2':
80
1.78k
        case '3':
81
2.76k
        case '4':
82
3.38k
        case '5':
83
3.94k
        case '6':
84
4.32k
        case '7':
85
5.11k
        case '8':
86
6.34k
        case '9':
87
6.34k
          if ((*c - '0') > sub) {
88
37
            FAIL("bad back reference");
89
37
          }
90
6.31k
          have_atom = true;
91
6.31k
          was_multiple = false;
92
6.31k
          break;
93
0
        case 0:
94
0
          FAIL("escaped end-of-string");
95
4.00k
        default:
96
4.00k
          goto literal;
97
10.3k
        }
98
6.31k
        ++c;
99
6.31k
        break;
100
14.5k
      case '[': /* bracket start */
101
14.5k
        ++c;
102
14.5k
        neg = false;
103
14.5k
        was_multiple = false;
104
14.5k
        seen_char = false;
105
14.5k
        state = parse_bracket;
106
14.5k
        break;
107
14.4k
      case '{': /* bound start */
108
14.4k
        switch (c[1]) {
109
1.22k
        case '0':
110
2.89k
        case '1':
111
3.14k
        case '2':
112
3.79k
        case '3':
113
3.97k
        case '4':
114
4.41k
        case '5':
115
4.78k
        case '6':
116
5.72k
        case '7':
117
5.89k
        case '8':
118
6.15k
        case '9':
119
6.15k
          if (!have_atom) {
120
4
            FAIL("no atom");
121
4
          }
122
6.15k
          if (was_multiple) {
123
6
            FAIL("was multiple");
124
6
          }
125
6.14k
          seen_comma = false;
126
6.14k
          seen_high = false;
127
6.14k
          low = high = 0;
128
6.14k
          state = parse_bound;
129
6.14k
          break;
130
8.27k
        default:
131
8.27k
          goto literal;
132
14.4k
        }
133
6.14k
        ++c;
134
6.14k
        have_atom = true;
135
6.14k
        was_multiple = true;
136
6.14k
        break;
137
5.19k
      case '}':
138
5.19k
        goto literal;
139
17.3k
      case '(': /* group start */
140
17.3k
        have_atom = false;
141
17.3k
        was_multiple = false;
142
17.3k
        empty_ok = true;
143
17.3k
        ++group;
144
17.3k
        ++sub;
145
17.3k
        ++c;
146
17.3k
        break;
147
24.9k
      case ')': /* group end */
148
24.9k
        if (group && !have_atom && !empty_ok) {
149
4
          FAIL("empty alternative");
150
4
        }
151
24.9k
        have_atom = true;
152
24.9k
        was_multiple = false;
153
24.9k
        if (group != 0) {
154
15.4k
          --group;
155
15.4k
        }
156
24.9k
        ++c;
157
24.9k
        break;
158
1.70k
      case '|': /* alternative separator */
159
1.70k
        if (!have_atom) {
160
4
          FAIL("no atom");
161
4
        }
162
1.70k
        have_atom = false;
163
1.70k
        empty_ok = false;
164
1.70k
        was_multiple = false;
165
1.70k
        ++c;
166
1.70k
        break;
167
2.42k
      case '^':
168
5.70k
      case '$':
169
5.70k
        have_atom = true;
170
5.70k
        was_multiple = true;
171
5.70k
        ++c;
172
5.70k
        break;
173
3.41k
      case '+':
174
6.92k
      case '*':
175
10.6k
      case '?':
176
10.6k
        if (was_multiple) {
177
39
          FAIL("was multiple");
178
39
        }
179
10.6k
        if (!have_atom) {
180
13
          FAIL("no atom");
181
13
        }
182
10.5k
        have_atom = true;
183
10.5k
        was_multiple = true;
184
10.5k
        ++c;
185
10.5k
        break;
186
4.12k
      case '.':
187
169k
      default:
188
187k
      literal:
189
187k
        have_atom = true;
190
187k
        was_multiple = false;
191
187k
        ++c;
192
187k
        break;
193
274k
      }
194
274k
      break;
195
274k
    case parse_bound:
196
22.6k
      switch (*c) {
197
3.03k
      case '0':
198
5.33k
      case '1':
199
6.03k
      case '2':
200
7.17k
      case '3':
201
7.88k
      case '4':
202
9.08k
      case '5':
203
10.4k
      case '6':
204
13.1k
      case '7':
205
14.3k
      case '8':
206
15.2k
      case '9':
207
15.2k
        if (!seen_comma) {
208
12.4k
          low = low * 10 + *c - '0';
209
12.4k
          if (low > 255) {
210
41
            FAIL("lower bound too big");
211
41
          }
212
12.4k
        } else {
213
2.79k
          seen_high = true;
214
2.79k
          high = high * 10 + *c - '0';
215
2.79k
          if (high > 255) {
216
16
            FAIL("upper bound too big");
217
16
          }
218
2.79k
        }
219
15.2k
        ++c;
220
15.2k
        break;
221
1.47k
      case ',':
222
1.47k
        if (seen_comma) {
223
5
          FAIL("multiple commas");
224
5
        }
225
1.46k
        seen_comma = true;
226
1.46k
        ++c;
227
1.46k
        break;
228
19
      default:
229
21
      case '{':
230
21
        FAIL("non digit/comma");
231
5.90k
      case '}':
232
5.90k
        if (seen_high && low > high) {
233
19
          FAIL("bad parse bound");
234
19
        }
235
5.88k
        seen_comma = false;
236
5.88k
        state = none;
237
5.88k
        ++c;
238
5.88k
        break;
239
22.6k
      }
240
22.5k
      break;
241
129k
    case parse_bracket:
242
129k
      switch (*c) {
243
3.81k
      case '^':
244
3.81k
        if (seen_char || neg) {
245
2.42k
          goto inside;
246
2.42k
        }
247
1.39k
        neg = true;
248
1.39k
        ++c;
249
1.39k
        break;
250
10.7k
      case '-':
251
10.7k
        if (range == 2) {
252
1.00k
          goto inside;
253
1.00k
        }
254
9.74k
        if (!seen_char) {
255
1.61k
          goto inside;
256
1.61k
        }
257
8.13k
        if (range == 1) {
258
4
          FAIL("bad range");
259
4
        }
260
8.13k
        range = 2;
261
8.13k
        ++c;
262
8.13k
        break;
263
17.0k
      case '[':
264
17.0k
        ++c;
265
17.0k
        switch (*c) {
266
3.79k
        case '.': /* collating element */
267
3.79k
          if (range != 0) {
268
1.90k
            --range;
269
1.90k
          }
270
3.79k
          ++c;
271
3.79k
          state = parse_ce;
272
3.79k
          seen_ce = false;
273
3.79k
          break;
274
359
        case '=': /* equivalence class */
275
359
          if (range == 2) {
276
4
            FAIL("equivalence class in "
277
4
                 "range");
278
4
          }
279
355
          ++c;
280
355
          state = parse_ec;
281
355
          seen_ec = false;
282
355
          break;
283
1.36k
        case ':': /* character class */
284
1.36k
          if (range == 2) {
285
4
            FAIL("character class in "
286
4
                 "range");
287
4
          }
288
1.35k
          ccname = c;
289
1.35k
          ++c;
290
1.35k
          state = parse_cc;
291
1.35k
          break;
292
17.0k
        }
293
17.0k
        seen_char = true;
294
17.0k
        break;
295
16.1k
      case ']':
296
16.1k
        if (!c[1] && !seen_char) {
297
6
          FAIL("unfinished brace");
298
6
        }
299
16.1k
        if (!seen_char) {
300
2.27k
          goto inside;
301
2.27k
        }
302
13.8k
        ++c;
303
13.8k
        range = 0;
304
13.8k
        have_atom = true;
305
13.8k
        state = none;
306
13.8k
        break;
307
81.3k
      default:
308
88.7k
      inside:
309
88.7k
        seen_char = true;
310
88.7k
        if (range == 2 && (*c & 0xff) < range_start) {
311
30
          FAIL("out of order range");
312
30
        }
313
88.6k
        if (range != 0) {
314
12.7k
          --range;
315
12.7k
        }
316
88.6k
        range_start = *c & 0xff;
317
88.6k
        ++c;
318
88.6k
        break;
319
129k
      }
320
129k
      break;
321
129k
    case parse_ce:
322
31.5k
      switch (*c) {
323
10.9k
      case '.':
324
10.9k
        ++c;
325
10.9k
        switch (*c) {
326
3.66k
        case ']':
327
3.66k
          if (!seen_ce) {
328
4
            FAIL("empty ce");
329
4
          }
330
3.66k
          ++c;
331
3.66k
          state = parse_bracket;
332
3.66k
          break;
333
7.25k
        default:
334
7.25k
          if (seen_ce) {
335
5.36k
            range_start = 256;
336
5.36k
          } else {
337
1.88k
            range_start = '.';
338
1.88k
          }
339
7.25k
          seen_ce = true;
340
7.25k
          break;
341
10.9k
        }
342
10.9k
        break;
343
20.6k
      default:
344
20.6k
        if (seen_ce) {
345
18.7k
          range_start = 256;
346
18.7k
        } else {
347
1.89k
          range_start = *c;
348
1.89k
        }
349
20.6k
        seen_ce = true;
350
20.6k
        ++c;
351
20.6k
        break;
352
31.5k
      }
353
31.5k
      break;
354
31.5k
    case parse_ec:
355
3.60k
      switch (*c) {
356
1.02k
      case '=':
357
1.02k
        ++c;
358
1.02k
        switch (*c) {
359
285
        case ']':
360
285
          if (!seen_ec) {
361
4
            FAIL("no ec");
362
4
          }
363
281
          ++c;
364
281
          state = parse_bracket;
365
281
          break;
366
738
        default:
367
738
          seen_ec = true;
368
738
          break;
369
1.02k
        }
370
1.01k
        break;
371
2.57k
      default:
372
2.57k
        seen_ec = true;
373
2.57k
        ++c;
374
2.57k
        break;
375
3.60k
      }
376
3.59k
      break;
377
9.40k
    case parse_cc:
378
9.40k
      switch (*c) {
379
1.99k
      case ':':
380
1.99k
        ++c;
381
1.99k
        switch (*c) {
382
1.27k
        case ']': {
383
1.27k
          unsigned int i;
384
1.27k
          bool found = false;
385
1.27k
          for (i = 0;
386
16.5k
               i < sizeof(cc) / sizeof(*cc); i++)
387
15.2k
          {
388
15.2k
            unsigned int len;
389
15.2k
            len = strlen(cc[i]);
390
15.2k
            if (len !=
391
15.2k
                (unsigned int)(c - ccname))
392
1.91k
            {
393
1.91k
              continue;
394
1.91k
            }
395
13.3k
            if (strncmp(cc[i], ccname, len))
396
12.1k
            {
397
12.1k
              continue;
398
12.1k
            }
399
1.15k
            found = true;
400
1.15k
          }
401
1.27k
          if (!found) {
402
118
            FAIL("unknown cc");
403
118
          }
404
1.15k
          ++c;
405
1.15k
          state = parse_bracket;
406
1.15k
          break;
407
1.27k
        }
408
725
        default:
409
725
          break;
410
1.99k
        }
411
1.87k
        break;
412
7.41k
      default:
413
7.41k
        ++c;
414
7.41k
        break;
415
9.40k
      }
416
9.29k
      break;
417
471k
    }
418
471k
  }
419
6.95k
  if (group != 0) {
420
199
    FAIL("group open");
421
199
  }
422
6.75k
  if (state != none) {
423
686
    FAIL("incomplete");
424
686
  }
425
6.06k
  if (!have_atom) {
426
19
    FAIL("no atom");
427
19
  }
428
6.04k
  return sub;
429
430
1.30k
error:
431
#if VALREGEX_REPORT_REASON
432
  fprintf(stderr, "%s\n", reason);
433
#endif /* if VALREGEX_REPORT_REASON */
434
1.30k
  return -1;
435
6.06k
}