/src/bind9/lib/isc/regex.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) Internet Systems Consortium, Inc. ("ISC") |
3 | | * |
4 | | * SPDX-License-Identifier: MPL-2.0 |
5 | | * |
6 | | * This Source Code Form is subject to the terms of the Mozilla Public |
7 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
8 | | * file, you can obtain one at https://mozilla.org/MPL/2.0/. |
9 | | * |
10 | | * See the COPYRIGHT file distributed with this work for additional |
11 | | * information regarding copyright ownership. |
12 | | */ |
13 | | |
14 | | #include <stdbool.h> |
15 | | |
16 | | #include <isc/file.h> |
17 | | #include <isc/regex.h> |
18 | | #include <isc/string.h> |
19 | | |
20 | | #if VALREGEX_REPORT_REASON |
21 | | #define FAIL(x) \ |
22 | | do { \ |
23 | | reason = (x); \ |
24 | | goto error; \ |
25 | | } while (0) |
26 | | #else /* if VALREGEX_REPORT_REASON */ |
27 | 1.30k | #define FAIL(x) goto error |
28 | | #endif /* if VALREGEX_REPORT_REASON */ |
29 | | |
30 | | /* |
31 | | * Validate the regular expression 'C' locale. |
32 | | */ |
33 | | int |
34 | 7.35k | isc_regex_validate(const char *c) { |
35 | 7.35k | enum { |
36 | 7.35k | none, |
37 | 7.35k | parse_bracket, |
38 | 7.35k | parse_bound, |
39 | 7.35k | parse_ce, |
40 | 7.35k | parse_ec, |
41 | 7.35k | parse_cc |
42 | 7.35k | } state = none; |
43 | | /* Well known character classes. */ |
44 | 7.35k | const char *cc[] = { ":alnum:", ":digit:", ":punct:", ":alpha:", |
45 | 7.35k | ":graph:", ":space:", ":blank:", ":lower:", |
46 | 7.35k | ":upper:", ":cntrl:", ":print:", ":xdigit:" }; |
47 | 7.35k | bool seen_comma = false; |
48 | 7.35k | bool seen_high = false; |
49 | 7.35k | bool seen_char = false; |
50 | 7.35k | bool seen_ec = false; |
51 | 7.35k | bool seen_ce = false; |
52 | 7.35k | bool have_atom = false; |
53 | 7.35k | int group = 0; |
54 | 7.35k | int range = 0; |
55 | 7.35k | int sub = 0; |
56 | 7.35k | bool empty_ok = false; |
57 | 7.35k | bool neg = false; |
58 | 7.35k | bool was_multiple = false; |
59 | 7.35k | unsigned int low = 0; |
60 | 7.35k | unsigned int high = 0; |
61 | 7.35k | const char *ccname = NULL; |
62 | 7.35k | int range_start = 0; |
63 | | #if VALREGEX_REPORT_REASON |
64 | | const char *reason = ""; |
65 | | #endif /* if VALREGEX_REPORT_REASON */ |
66 | | |
67 | 7.35k | if (c == NULL || *c == 0) { |
68 | 22 | FAIL("empty string"); |
69 | 22 | } |
70 | | |
71 | 478k | while (c != NULL && *c != 0) { |
72 | 471k | switch (state) { |
73 | 274k | case none: |
74 | 274k | switch (*c) { |
75 | 10.3k | case '\\': /* make literal */ |
76 | 10.3k | ++c; |
77 | 10.3k | switch (*c) { |
78 | 589 | case '1': |
79 | 1.23k | case '2': |
80 | 1.78k | case '3': |
81 | 2.76k | case '4': |
82 | 3.38k | case '5': |
83 | 3.94k | case '6': |
84 | 4.32k | case '7': |
85 | 5.11k | case '8': |
86 | 6.34k | case '9': |
87 | 6.34k | if ((*c - '0') > sub) { |
88 | 37 | FAIL("bad back reference"); |
89 | 37 | } |
90 | 6.31k | have_atom = true; |
91 | 6.31k | was_multiple = false; |
92 | 6.31k | break; |
93 | 0 | case 0: |
94 | 0 | FAIL("escaped end-of-string"); |
95 | 4.00k | default: |
96 | 4.00k | goto literal; |
97 | 10.3k | } |
98 | 6.31k | ++c; |
99 | 6.31k | break; |
100 | 14.5k | case '[': /* bracket start */ |
101 | 14.5k | ++c; |
102 | 14.5k | neg = false; |
103 | 14.5k | was_multiple = false; |
104 | 14.5k | seen_char = false; |
105 | 14.5k | state = parse_bracket; |
106 | 14.5k | break; |
107 | 14.4k | case '{': /* bound start */ |
108 | 14.4k | switch (c[1]) { |
109 | 1.22k | case '0': |
110 | 2.89k | case '1': |
111 | 3.14k | case '2': |
112 | 3.79k | case '3': |
113 | 3.97k | case '4': |
114 | 4.41k | case '5': |
115 | 4.78k | case '6': |
116 | 5.72k | case '7': |
117 | 5.89k | case '8': |
118 | 6.15k | case '9': |
119 | 6.15k | if (!have_atom) { |
120 | 4 | FAIL("no atom"); |
121 | 4 | } |
122 | 6.15k | if (was_multiple) { |
123 | 6 | FAIL("was multiple"); |
124 | 6 | } |
125 | 6.14k | seen_comma = false; |
126 | 6.14k | seen_high = false; |
127 | 6.14k | low = high = 0; |
128 | 6.14k | state = parse_bound; |
129 | 6.14k | break; |
130 | 8.27k | default: |
131 | 8.27k | goto literal; |
132 | 14.4k | } |
133 | 6.14k | ++c; |
134 | 6.14k | have_atom = true; |
135 | 6.14k | was_multiple = true; |
136 | 6.14k | break; |
137 | 5.19k | case '}': |
138 | 5.19k | goto literal; |
139 | 17.3k | case '(': /* group start */ |
140 | 17.3k | have_atom = false; |
141 | 17.3k | was_multiple = false; |
142 | 17.3k | empty_ok = true; |
143 | 17.3k | ++group; |
144 | 17.3k | ++sub; |
145 | 17.3k | ++c; |
146 | 17.3k | break; |
147 | 24.9k | case ')': /* group end */ |
148 | 24.9k | if (group && !have_atom && !empty_ok) { |
149 | 4 | FAIL("empty alternative"); |
150 | 4 | } |
151 | 24.9k | have_atom = true; |
152 | 24.9k | was_multiple = false; |
153 | 24.9k | if (group != 0) { |
154 | 15.4k | --group; |
155 | 15.4k | } |
156 | 24.9k | ++c; |
157 | 24.9k | break; |
158 | 1.70k | case '|': /* alternative separator */ |
159 | 1.70k | if (!have_atom) { |
160 | 4 | FAIL("no atom"); |
161 | 4 | } |
162 | 1.70k | have_atom = false; |
163 | 1.70k | empty_ok = false; |
164 | 1.70k | was_multiple = false; |
165 | 1.70k | ++c; |
166 | 1.70k | break; |
167 | 2.42k | case '^': |
168 | 5.70k | case '$': |
169 | 5.70k | have_atom = true; |
170 | 5.70k | was_multiple = true; |
171 | 5.70k | ++c; |
172 | 5.70k | break; |
173 | 3.41k | case '+': |
174 | 6.92k | case '*': |
175 | 10.6k | case '?': |
176 | 10.6k | if (was_multiple) { |
177 | 39 | FAIL("was multiple"); |
178 | 39 | } |
179 | 10.6k | if (!have_atom) { |
180 | 13 | FAIL("no atom"); |
181 | 13 | } |
182 | 10.5k | have_atom = true; |
183 | 10.5k | was_multiple = true; |
184 | 10.5k | ++c; |
185 | 10.5k | break; |
186 | 4.12k | case '.': |
187 | 169k | default: |
188 | 187k | literal: |
189 | 187k | have_atom = true; |
190 | 187k | was_multiple = false; |
191 | 187k | ++c; |
192 | 187k | break; |
193 | 274k | } |
194 | 274k | break; |
195 | 274k | case parse_bound: |
196 | 22.6k | switch (*c) { |
197 | 3.03k | case '0': |
198 | 5.33k | case '1': |
199 | 6.03k | case '2': |
200 | 7.17k | case '3': |
201 | 7.88k | case '4': |
202 | 9.08k | case '5': |
203 | 10.4k | case '6': |
204 | 13.1k | case '7': |
205 | 14.3k | case '8': |
206 | 15.2k | case '9': |
207 | 15.2k | if (!seen_comma) { |
208 | 12.4k | low = low * 10 + *c - '0'; |
209 | 12.4k | if (low > 255) { |
210 | 41 | FAIL("lower bound too big"); |
211 | 41 | } |
212 | 12.4k | } else { |
213 | 2.79k | seen_high = true; |
214 | 2.79k | high = high * 10 + *c - '0'; |
215 | 2.79k | if (high > 255) { |
216 | 16 | FAIL("upper bound too big"); |
217 | 16 | } |
218 | 2.79k | } |
219 | 15.2k | ++c; |
220 | 15.2k | break; |
221 | 1.47k | case ',': |
222 | 1.47k | if (seen_comma) { |
223 | 5 | FAIL("multiple commas"); |
224 | 5 | } |
225 | 1.46k | seen_comma = true; |
226 | 1.46k | ++c; |
227 | 1.46k | break; |
228 | 19 | default: |
229 | 21 | case '{': |
230 | 21 | FAIL("non digit/comma"); |
231 | 5.90k | case '}': |
232 | 5.90k | if (seen_high && low > high) { |
233 | 19 | FAIL("bad parse bound"); |
234 | 19 | } |
235 | 5.88k | seen_comma = false; |
236 | 5.88k | state = none; |
237 | 5.88k | ++c; |
238 | 5.88k | break; |
239 | 22.6k | } |
240 | 22.5k | break; |
241 | 129k | case parse_bracket: |
242 | 129k | switch (*c) { |
243 | 3.81k | case '^': |
244 | 3.81k | if (seen_char || neg) { |
245 | 2.42k | goto inside; |
246 | 2.42k | } |
247 | 1.39k | neg = true; |
248 | 1.39k | ++c; |
249 | 1.39k | break; |
250 | 10.7k | case '-': |
251 | 10.7k | if (range == 2) { |
252 | 1.00k | goto inside; |
253 | 1.00k | } |
254 | 9.74k | if (!seen_char) { |
255 | 1.61k | goto inside; |
256 | 1.61k | } |
257 | 8.13k | if (range == 1) { |
258 | 4 | FAIL("bad range"); |
259 | 4 | } |
260 | 8.13k | range = 2; |
261 | 8.13k | ++c; |
262 | 8.13k | break; |
263 | 17.0k | case '[': |
264 | 17.0k | ++c; |
265 | 17.0k | switch (*c) { |
266 | 3.79k | case '.': /* collating element */ |
267 | 3.79k | if (range != 0) { |
268 | 1.90k | --range; |
269 | 1.90k | } |
270 | 3.79k | ++c; |
271 | 3.79k | state = parse_ce; |
272 | 3.79k | seen_ce = false; |
273 | 3.79k | break; |
274 | 359 | case '=': /* equivalence class */ |
275 | 359 | if (range == 2) { |
276 | 4 | FAIL("equivalence class in " |
277 | 4 | "range"); |
278 | 4 | } |
279 | 355 | ++c; |
280 | 355 | state = parse_ec; |
281 | 355 | seen_ec = false; |
282 | 355 | break; |
283 | 1.36k | case ':': /* character class */ |
284 | 1.36k | if (range == 2) { |
285 | 4 | FAIL("character class in " |
286 | 4 | "range"); |
287 | 4 | } |
288 | 1.35k | ccname = c; |
289 | 1.35k | ++c; |
290 | 1.35k | state = parse_cc; |
291 | 1.35k | break; |
292 | 17.0k | } |
293 | 17.0k | seen_char = true; |
294 | 17.0k | break; |
295 | 16.1k | case ']': |
296 | 16.1k | if (!c[1] && !seen_char) { |
297 | 6 | FAIL("unfinished brace"); |
298 | 6 | } |
299 | 16.1k | if (!seen_char) { |
300 | 2.27k | goto inside; |
301 | 2.27k | } |
302 | 13.8k | ++c; |
303 | 13.8k | range = 0; |
304 | 13.8k | have_atom = true; |
305 | 13.8k | state = none; |
306 | 13.8k | break; |
307 | 81.3k | default: |
308 | 88.7k | inside: |
309 | 88.7k | seen_char = true; |
310 | 88.7k | if (range == 2 && (*c & 0xff) < range_start) { |
311 | 30 | FAIL("out of order range"); |
312 | 30 | } |
313 | 88.6k | if (range != 0) { |
314 | 12.7k | --range; |
315 | 12.7k | } |
316 | 88.6k | range_start = *c & 0xff; |
317 | 88.6k | ++c; |
318 | 88.6k | break; |
319 | 129k | } |
320 | 129k | break; |
321 | 129k | case parse_ce: |
322 | 31.5k | switch (*c) { |
323 | 10.9k | case '.': |
324 | 10.9k | ++c; |
325 | 10.9k | switch (*c) { |
326 | 3.66k | case ']': |
327 | 3.66k | if (!seen_ce) { |
328 | 4 | FAIL("empty ce"); |
329 | 4 | } |
330 | 3.66k | ++c; |
331 | 3.66k | state = parse_bracket; |
332 | 3.66k | break; |
333 | 7.25k | default: |
334 | 7.25k | if (seen_ce) { |
335 | 5.36k | range_start = 256; |
336 | 5.36k | } else { |
337 | 1.88k | range_start = '.'; |
338 | 1.88k | } |
339 | 7.25k | seen_ce = true; |
340 | 7.25k | break; |
341 | 10.9k | } |
342 | 10.9k | break; |
343 | 20.6k | default: |
344 | 20.6k | if (seen_ce) { |
345 | 18.7k | range_start = 256; |
346 | 18.7k | } else { |
347 | 1.89k | range_start = *c; |
348 | 1.89k | } |
349 | 20.6k | seen_ce = true; |
350 | 20.6k | ++c; |
351 | 20.6k | break; |
352 | 31.5k | } |
353 | 31.5k | break; |
354 | 31.5k | case parse_ec: |
355 | 3.60k | switch (*c) { |
356 | 1.02k | case '=': |
357 | 1.02k | ++c; |
358 | 1.02k | switch (*c) { |
359 | 285 | case ']': |
360 | 285 | if (!seen_ec) { |
361 | 4 | FAIL("no ec"); |
362 | 4 | } |
363 | 281 | ++c; |
364 | 281 | state = parse_bracket; |
365 | 281 | break; |
366 | 738 | default: |
367 | 738 | seen_ec = true; |
368 | 738 | break; |
369 | 1.02k | } |
370 | 1.01k | break; |
371 | 2.57k | default: |
372 | 2.57k | seen_ec = true; |
373 | 2.57k | ++c; |
374 | 2.57k | break; |
375 | 3.60k | } |
376 | 3.59k | break; |
377 | 9.40k | case parse_cc: |
378 | 9.40k | switch (*c) { |
379 | 1.99k | case ':': |
380 | 1.99k | ++c; |
381 | 1.99k | switch (*c) { |
382 | 1.27k | case ']': { |
383 | 1.27k | unsigned int i; |
384 | 1.27k | bool found = false; |
385 | 1.27k | for (i = 0; |
386 | 16.5k | i < sizeof(cc) / sizeof(*cc); i++) |
387 | 15.2k | { |
388 | 15.2k | unsigned int len; |
389 | 15.2k | len = strlen(cc[i]); |
390 | 15.2k | if (len != |
391 | 15.2k | (unsigned int)(c - ccname)) |
392 | 1.91k | { |
393 | 1.91k | continue; |
394 | 1.91k | } |
395 | 13.3k | if (strncmp(cc[i], ccname, len)) |
396 | 12.1k | { |
397 | 12.1k | continue; |
398 | 12.1k | } |
399 | 1.15k | found = true; |
400 | 1.15k | } |
401 | 1.27k | if (!found) { |
402 | 118 | FAIL("unknown cc"); |
403 | 118 | } |
404 | 1.15k | ++c; |
405 | 1.15k | state = parse_bracket; |
406 | 1.15k | break; |
407 | 1.27k | } |
408 | 725 | default: |
409 | 725 | break; |
410 | 1.99k | } |
411 | 1.87k | break; |
412 | 7.41k | default: |
413 | 7.41k | ++c; |
414 | 7.41k | break; |
415 | 9.40k | } |
416 | 9.29k | break; |
417 | 471k | } |
418 | 471k | } |
419 | 6.95k | if (group != 0) { |
420 | 199 | FAIL("group open"); |
421 | 199 | } |
422 | 6.75k | if (state != none) { |
423 | 686 | FAIL("incomplete"); |
424 | 686 | } |
425 | 6.06k | if (!have_atom) { |
426 | 19 | FAIL("no atom"); |
427 | 19 | } |
428 | 6.04k | return sub; |
429 | | |
430 | 1.30k | error: |
431 | | #if VALREGEX_REPORT_REASON |
432 | | fprintf(stderr, "%s\n", reason); |
433 | | #endif /* if VALREGEX_REPORT_REASON */ |
434 | 1.30k | return -1; |
435 | 6.06k | } |