/src/php-src/ext/pcre/pcre2lib/pcre2_convert.c
Line | Count | Source (jump to first uncovered line) |
1 | | /************************************************* |
2 | | * Perl-Compatible Regular Expressions * |
3 | | *************************************************/ |
4 | | |
5 | | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | | and semantics are as close as possible to those of the Perl 5 language. |
7 | | |
8 | | Written by Philip Hazel |
9 | | Original API code Copyright (c) 1997-2012 University of Cambridge |
10 | | New API code Copyright (c) 2016-2024 University of Cambridge |
11 | | |
12 | | ----------------------------------------------------------------------------- |
13 | | Redistribution and use in source and binary forms, with or without |
14 | | modification, are permitted provided that the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the University of Cambridge nor the names of its |
24 | | contributors may be used to endorse or promote products derived from |
25 | | this software without specific prior written permission. |
26 | | |
27 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
28 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
29 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
30 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
31 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
32 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
33 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
34 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
35 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
36 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
37 | | POSSIBILITY OF SUCH DAMAGE. |
38 | | ----------------------------------------------------------------------------- |
39 | | */ |
40 | | |
41 | | |
42 | | #ifdef HAVE_CONFIG_H |
43 | | #include "config.h" |
44 | | #endif |
45 | | |
46 | | #include "pcre2_internal.h" |
47 | | |
48 | 0 | #define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \ |
49 | 0 | PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED) |
50 | | |
51 | 0 | #define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \ |
52 | 0 | PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \ |
53 | 0 | PCRE2_CONVERT_GLOB_NO_STARSTAR| \ |
54 | 0 | TYPE_OPTIONS) |
55 | | |
56 | 0 | #define DUMMY_BUFFER_SIZE 100 |
57 | | |
58 | | /* Generated pattern fragments */ |
59 | | |
60 | | #define STR_BACKSLASH_A STR_BACKSLASH STR_A |
61 | | #define STR_BACKSLASH_z STR_BACKSLASH STR_z |
62 | | #define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET |
63 | | #define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN |
64 | | #define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS |
65 | | #define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS |
66 | | #define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS |
67 | | |
68 | | /* States for POSIX processing */ |
69 | | |
70 | | enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET, |
71 | | POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED }; |
72 | | |
73 | | /* Macro to add a character string to the output buffer, checking for overflow. */ |
74 | | |
75 | | #define PUTCHARS(string) \ |
76 | 0 | { \ |
77 | 0 | for (const char *s = string; *s != 0; s++) \ |
78 | 0 | { \ |
79 | 0 | if (p >= endp) return PCRE2_ERROR_NOMEMORY; \ |
80 | 0 | *p++ = *s; \ |
81 | 0 | } \ |
82 | 0 | } |
83 | | |
84 | | /* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */ |
85 | | |
86 | | static const char *pcre2_escaped_literals = |
87 | | STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS |
88 | | STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN |
89 | | STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET |
90 | | STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET |
91 | | STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS; |
92 | | |
93 | | /* Recognized escaped metacharacters in POSIX basic patterns. */ |
94 | | |
95 | | static const char *posix_meta_escapes = |
96 | | STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS |
97 | | STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET |
98 | | STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9; |
99 | | |
100 | | |
101 | | |
102 | | /************************************************* |
103 | | * Convert a POSIX pattern * |
104 | | *************************************************/ |
105 | | |
106 | | /* This function handles both basic and extended POSIX patterns. |
107 | | |
108 | | Arguments: |
109 | | pattype the pattern type |
110 | | pattern the pattern |
111 | | plength length in code units |
112 | | utf TRUE if UTF |
113 | | use_buffer where to put the output |
114 | | use_length length of use_buffer |
115 | | bufflenptr where to put the used length |
116 | | dummyrun TRUE if a dummy run |
117 | | ccontext the convert context |
118 | | |
119 | | Returns: 0 => success |
120 | | !0 => error code |
121 | | */ |
122 | | |
123 | | static int |
124 | | convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength, |
125 | | BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length, |
126 | | PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext) |
127 | 0 | { |
128 | 0 | PCRE2_SPTR posix = pattern; |
129 | 0 | PCRE2_UCHAR *p = use_buffer; |
130 | 0 | PCRE2_UCHAR *pp = p; |
131 | 0 | PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */ |
132 | 0 | PCRE2_SIZE convlength = 0; |
133 | |
|
134 | 0 | uint32_t bracount = 0; |
135 | 0 | uint32_t posix_state = POSIX_START_REGEX; |
136 | 0 | uint32_t lastspecial = 0; |
137 | 0 | BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0; |
138 | 0 | BOOL nextisliteral = FALSE; |
139 | |
|
140 | 0 | (void)utf; /* Not used when Unicode not supported */ |
141 | 0 | (void)ccontext; /* Not currently used */ |
142 | | |
143 | | /* Initialize default for error offset as end of input. */ |
144 | |
|
145 | 0 | *bufflenptr = plength; |
146 | 0 | PUTCHARS(STR_STAR_NUL); |
147 | | |
148 | | /* Now scan the input. */ |
149 | |
|
150 | 0 | while (plength > 0) |
151 | 0 | { |
152 | 0 | uint32_t c, sc; |
153 | 0 | int clength = 1; |
154 | | |
155 | | /* Add in the length of the last item, then, if in the dummy run, pull the |
156 | | pointer back to the start of the (temporary) buffer and then remember the |
157 | | start of the next item. */ |
158 | |
|
159 | 0 | convlength += p - pp; |
160 | 0 | if (dummyrun) p = use_buffer; |
161 | 0 | pp = p; |
162 | | |
163 | | /* Pick up the next character */ |
164 | |
|
165 | | #ifndef SUPPORT_UNICODE |
166 | | c = *posix; |
167 | | #else |
168 | 0 | GETCHARLENTEST(c, posix, clength); |
169 | 0 | #endif |
170 | 0 | posix += clength; |
171 | 0 | plength -= clength; |
172 | |
|
173 | 0 | sc = nextisliteral? 0 : c; |
174 | 0 | nextisliteral = FALSE; |
175 | | |
176 | | /* Handle a character within a class. */ |
177 | |
|
178 | 0 | if (posix_state >= POSIX_CLASS_NOT_STARTED) |
179 | 0 | { |
180 | 0 | if (c == CHAR_RIGHT_SQUARE_BRACKET) |
181 | 0 | { |
182 | 0 | PUTCHARS(STR_RIGHT_SQUARE_BRACKET); |
183 | 0 | posix_state = POSIX_NOT_BRACKET; |
184 | 0 | } |
185 | | |
186 | | /* Not the end of the class */ |
187 | | |
188 | 0 | else |
189 | 0 | { |
190 | 0 | switch (posix_state) |
191 | 0 | { |
192 | 0 | case POSIX_CLASS_STARTED: |
193 | 0 | if (c <= 127 && islower(c)) break; /* Remain in started state */ |
194 | 0 | posix_state = POSIX_CLASS_NOT_STARTED; |
195 | 0 | if (c == CHAR_COLON && plength > 0 && |
196 | 0 | *posix == CHAR_RIGHT_SQUARE_BRACKET) |
197 | 0 | { |
198 | 0 | PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET); |
199 | 0 | plength--; |
200 | 0 | posix++; |
201 | 0 | continue; /* With next character after :] */ |
202 | 0 | } |
203 | | /* Fall through */ |
204 | | |
205 | 0 | case POSIX_CLASS_NOT_STARTED: |
206 | 0 | if (c == CHAR_LEFT_SQUARE_BRACKET) |
207 | 0 | posix_state = POSIX_CLASS_STARTING; |
208 | 0 | break; |
209 | | |
210 | 0 | case POSIX_CLASS_STARTING: |
211 | 0 | if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED; |
212 | 0 | break; |
213 | 0 | } |
214 | | |
215 | 0 | if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH); |
216 | 0 | if (p + clength > endp) return PCRE2_ERROR_NOMEMORY; |
217 | 0 | memcpy(p, posix - clength, CU2BYTES(clength)); |
218 | 0 | p += clength; |
219 | 0 | } |
220 | 0 | } |
221 | | |
222 | | /* Handle a character not within a class. */ |
223 | | |
224 | 0 | else switch(sc) |
225 | 0 | { |
226 | 0 | case CHAR_LEFT_SQUARE_BRACKET: |
227 | 0 | PUTCHARS(STR_LEFT_SQUARE_BRACKET); |
228 | |
|
229 | | #ifdef NEVER |
230 | | /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does |
231 | | support) but they are not part of POSIX 1003.1. */ |
232 | | |
233 | | if (plength >= 6) |
234 | | { |
235 | | if (posix[0] == CHAR_LEFT_SQUARE_BRACKET && |
236 | | posix[1] == CHAR_COLON && |
237 | | (posix[2] == CHAR_LESS_THAN_SIGN || |
238 | | posix[2] == CHAR_GREATER_THAN_SIGN) && |
239 | | posix[3] == CHAR_COLON && |
240 | | posix[4] == CHAR_RIGHT_SQUARE_BRACKET && |
241 | | posix[5] == CHAR_RIGHT_SQUARE_BRACKET) |
242 | | { |
243 | | if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY; |
244 | | memcpy(p, posix, CU2BYTES(6)); |
245 | | p += 6; |
246 | | posix += 6; |
247 | | plength -= 6; |
248 | | continue; /* With next character */ |
249 | | } |
250 | | } |
251 | | #endif |
252 | | |
253 | | /* Handle start of "normal" character classes */ |
254 | |
|
255 | 0 | posix_state = POSIX_CLASS_NOT_STARTED; |
256 | | |
257 | | /* Handle ^ and ] as first characters */ |
258 | |
|
259 | 0 | if (plength > 0) |
260 | 0 | { |
261 | 0 | if (*posix == CHAR_CIRCUMFLEX_ACCENT) |
262 | 0 | { |
263 | 0 | posix++; |
264 | 0 | plength--; |
265 | 0 | PUTCHARS(STR_CIRCUMFLEX_ACCENT); |
266 | 0 | } |
267 | 0 | if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET) |
268 | 0 | { |
269 | 0 | posix++; |
270 | 0 | plength--; |
271 | 0 | PUTCHARS(STR_RIGHT_SQUARE_BRACKET); |
272 | 0 | } |
273 | 0 | } |
274 | 0 | break; |
275 | | |
276 | 0 | case CHAR_BACKSLASH: |
277 | 0 | if (plength == 0) return PCRE2_ERROR_END_BACKSLASH; |
278 | 0 | if (extended) nextisliteral = TRUE; else |
279 | 0 | { |
280 | 0 | if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL) |
281 | 0 | { |
282 | 0 | if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH); |
283 | 0 | if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; |
284 | 0 | lastspecial = *p++ = *posix++; |
285 | 0 | plength--; |
286 | 0 | } |
287 | 0 | else nextisliteral = TRUE; |
288 | 0 | } |
289 | 0 | break; |
290 | | |
291 | 0 | case CHAR_RIGHT_PARENTHESIS: |
292 | 0 | if (!extended || bracount == 0) goto ESCAPE_LITERAL; |
293 | 0 | bracount--; |
294 | 0 | goto COPY_SPECIAL; |
295 | | |
296 | 0 | case CHAR_LEFT_PARENTHESIS: |
297 | 0 | bracount++; |
298 | | /* Fall through */ |
299 | |
|
300 | 0 | case CHAR_QUESTION_MARK: |
301 | 0 | case CHAR_PLUS: |
302 | 0 | case CHAR_LEFT_CURLY_BRACKET: |
303 | 0 | case CHAR_RIGHT_CURLY_BRACKET: |
304 | 0 | case CHAR_VERTICAL_LINE: |
305 | 0 | if (!extended) goto ESCAPE_LITERAL; |
306 | | /* Fall through */ |
307 | | |
308 | 0 | case CHAR_DOT: |
309 | 0 | case CHAR_DOLLAR_SIGN: |
310 | 0 | posix_state = POSIX_NOT_BRACKET; |
311 | 0 | COPY_SPECIAL: |
312 | 0 | lastspecial = c; |
313 | 0 | if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; |
314 | 0 | *p++ = c; |
315 | 0 | break; |
316 | | |
317 | 0 | case CHAR_ASTERISK: |
318 | 0 | if (lastspecial != CHAR_ASTERISK) |
319 | 0 | { |
320 | 0 | if (!extended && (posix_state < POSIX_NOT_BRACKET || |
321 | 0 | lastspecial == CHAR_LEFT_PARENTHESIS)) |
322 | 0 | goto ESCAPE_LITERAL; |
323 | 0 | goto COPY_SPECIAL; |
324 | 0 | } |
325 | 0 | break; /* Ignore second and subsequent asterisks */ |
326 | | |
327 | 0 | case CHAR_CIRCUMFLEX_ACCENT: |
328 | 0 | if (extended) goto COPY_SPECIAL; |
329 | 0 | if (posix_state == POSIX_START_REGEX || |
330 | 0 | lastspecial == CHAR_LEFT_PARENTHESIS) |
331 | 0 | { |
332 | 0 | posix_state = POSIX_ANCHORED; |
333 | 0 | goto COPY_SPECIAL; |
334 | 0 | } |
335 | | /* Fall through */ |
336 | | |
337 | 0 | default: |
338 | 0 | if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL) |
339 | 0 | { |
340 | 0 | ESCAPE_LITERAL: |
341 | 0 | PUTCHARS(STR_BACKSLASH); |
342 | 0 | } |
343 | 0 | lastspecial = 0xff; /* Indicates nothing special */ |
344 | 0 | if (p + clength > endp) return PCRE2_ERROR_NOMEMORY; |
345 | 0 | memcpy(p, posix - clength, CU2BYTES(clength)); |
346 | 0 | p += clength; |
347 | 0 | posix_state = POSIX_NOT_BRACKET; |
348 | 0 | break; |
349 | 0 | } |
350 | 0 | } |
351 | | |
352 | 0 | if (posix_state >= POSIX_CLASS_NOT_STARTED) |
353 | 0 | return PCRE2_ERROR_MISSING_SQUARE_BRACKET; |
354 | 0 | convlength += p - pp; /* Final segment */ |
355 | 0 | *bufflenptr = convlength; |
356 | 0 | *p++ = 0; |
357 | 0 | return 0; |
358 | 0 | } |
359 | | |
360 | | |
361 | | /************************************************* |
362 | | * Convert a glob pattern * |
363 | | *************************************************/ |
364 | | |
365 | | /* Context for writing the output into a buffer. */ |
366 | | |
367 | | typedef struct pcre2_output_context { |
368 | | PCRE2_UCHAR *output; /* current output position */ |
369 | | PCRE2_SPTR output_end; /* output end */ |
370 | | PCRE2_SIZE output_size; /* size of the output */ |
371 | | uint8_t out_str[8]; /* string copied to the output */ |
372 | | } pcre2_output_context; |
373 | | |
374 | | |
375 | | /* Write a character into the output. |
376 | | |
377 | | Arguments: |
378 | | out output context |
379 | | chr the next character |
380 | | */ |
381 | | |
382 | | static void |
383 | | convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr) |
384 | 0 | { |
385 | 0 | out->output_size++; |
386 | |
|
387 | 0 | if (out->output < out->output_end) |
388 | 0 | *out->output++ = chr; |
389 | 0 | } |
390 | | |
391 | | |
392 | | /* Write a string into the output. |
393 | | |
394 | | Arguments: |
395 | | out output context |
396 | | length length of out->out_str |
397 | | */ |
398 | | |
399 | | static void |
400 | | convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length) |
401 | 0 | { |
402 | 0 | uint8_t *out_str = out->out_str; |
403 | 0 | PCRE2_UCHAR *output = out->output; |
404 | 0 | PCRE2_SPTR output_end = out->output_end; |
405 | 0 | PCRE2_SIZE output_size = out->output_size; |
406 | |
|
407 | 0 | do |
408 | 0 | { |
409 | 0 | output_size++; |
410 | |
|
411 | 0 | if (output < output_end) |
412 | 0 | *output++ = *out_str++; |
413 | 0 | } |
414 | 0 | while (--length != 0); |
415 | |
|
416 | 0 | out->output = output; |
417 | 0 | out->output_size = output_size; |
418 | 0 | } |
419 | | |
420 | | |
421 | | /* Prints the separator into the output. |
422 | | |
423 | | Arguments: |
424 | | out output context |
425 | | separator glob separator |
426 | | with_escape backslash is needed before separator |
427 | | */ |
428 | | |
429 | | static void |
430 | | convert_glob_print_separator(pcre2_output_context *out, |
431 | | PCRE2_UCHAR separator, BOOL with_escape) |
432 | 0 | { |
433 | 0 | if (with_escape) |
434 | 0 | convert_glob_write(out, CHAR_BACKSLASH); |
435 | |
|
436 | 0 | convert_glob_write(out, separator); |
437 | 0 | } |
438 | | |
439 | | |
440 | | /* Prints a wildcard into the output. |
441 | | |
442 | | Arguments: |
443 | | out output context |
444 | | separator glob separator |
445 | | with_escape backslash is needed before separator |
446 | | */ |
447 | | |
448 | | static void |
449 | | convert_glob_print_wildcard(pcre2_output_context *out, |
450 | | PCRE2_UCHAR separator, BOOL with_escape) |
451 | 0 | { |
452 | 0 | out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET; |
453 | 0 | out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT; |
454 | 0 | convert_glob_write_str(out, 2); |
455 | |
|
456 | 0 | convert_glob_print_separator(out, separator, with_escape); |
457 | |
|
458 | 0 | convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET); |
459 | 0 | } |
460 | | |
461 | | |
462 | | /* Parse a posix class. |
463 | | |
464 | | Arguments: |
465 | | from starting point of scanning the range |
466 | | pattern_end end of pattern |
467 | | out output context |
468 | | |
469 | | Returns: >0 => class index |
470 | | 0 => malformed class |
471 | | */ |
472 | | |
473 | | static int |
474 | | convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end, |
475 | | pcre2_output_context *out) |
476 | 0 | { |
477 | 0 | static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:" |
478 | 0 | "graph:lower:print:punct:space:upper:word:xdigit:"; |
479 | 0 | PCRE2_SPTR start = *from + 1; |
480 | 0 | PCRE2_SPTR pattern = start; |
481 | 0 | const char *class_ptr; |
482 | 0 | PCRE2_UCHAR c; |
483 | 0 | int class_index; |
484 | |
|
485 | 0 | while (TRUE) |
486 | 0 | { |
487 | 0 | if (pattern >= pattern_end) return 0; |
488 | | |
489 | 0 | c = *pattern++; |
490 | |
|
491 | 0 | if (c < CHAR_a || c > CHAR_z) break; |
492 | 0 | } |
493 | | |
494 | 0 | if (c != CHAR_COLON || pattern >= pattern_end || |
495 | 0 | *pattern != CHAR_RIGHT_SQUARE_BRACKET) |
496 | 0 | return 0; |
497 | | |
498 | 0 | class_ptr = posix_classes; |
499 | 0 | class_index = 1; |
500 | |
|
501 | 0 | while (TRUE) |
502 | 0 | { |
503 | 0 | if (*class_ptr == CHAR_NUL) return 0; |
504 | | |
505 | 0 | pattern = start; |
506 | |
|
507 | 0 | while (*pattern == (PCRE2_UCHAR) *class_ptr) |
508 | 0 | { |
509 | 0 | if (*pattern == CHAR_COLON) |
510 | 0 | { |
511 | 0 | pattern += 2; |
512 | 0 | start -= 2; |
513 | |
|
514 | 0 | do convert_glob_write(out, *start++); while (start < pattern); |
515 | |
|
516 | 0 | *from = pattern; |
517 | 0 | return class_index; |
518 | 0 | } |
519 | 0 | pattern++; |
520 | 0 | class_ptr++; |
521 | 0 | } |
522 | | |
523 | 0 | while (*class_ptr != CHAR_COLON) class_ptr++; |
524 | 0 | class_ptr++; |
525 | 0 | class_index++; |
526 | 0 | } |
527 | 0 | } |
528 | | |
529 | | /* Checks whether the character is in the class. |
530 | | |
531 | | Arguments: |
532 | | class_index class index |
533 | | c character |
534 | | |
535 | | Returns: !0 => character is found in the class |
536 | | 0 => otherwise |
537 | | */ |
538 | | |
539 | | static BOOL |
540 | | convert_glob_char_in_class(int class_index, PCRE2_UCHAR c) |
541 | 0 | { |
542 | | #if PCRE2_CODE_UNIT_WIDTH != 8 |
543 | | if (c > 0xff) |
544 | | { |
545 | | /* ctype functions are not sane for c > 0xff */ |
546 | | return 0; |
547 | | } |
548 | | #endif |
549 | |
|
550 | 0 | switch (class_index) |
551 | 0 | { |
552 | 0 | case 1: return isalnum(c); |
553 | 0 | case 2: return isalpha(c); |
554 | 0 | case 3: return 1; |
555 | 0 | case 4: return c == CHAR_HT || c == CHAR_SPACE; |
556 | 0 | case 5: return iscntrl(c); |
557 | 0 | case 6: return isdigit(c); |
558 | 0 | case 7: return isgraph(c); |
559 | 0 | case 8: return islower(c); |
560 | 0 | case 9: return isprint(c); |
561 | 0 | case 10: return ispunct(c); |
562 | 0 | case 11: return isspace(c); |
563 | 0 | case 12: return isupper(c); |
564 | 0 | case 13: return isalnum(c) || c == CHAR_UNDERSCORE; |
565 | 0 | default: return isxdigit(c); |
566 | 0 | } |
567 | 0 | } |
568 | | |
569 | | /* Parse a range of characters. |
570 | | |
571 | | Arguments: |
572 | | from starting point of scanning the range |
573 | | pattern_end end of pattern |
574 | | out output context |
575 | | separator glob separator |
576 | | with_escape backslash is needed before separator |
577 | | |
578 | | Returns: 0 => success |
579 | | !0 => error code |
580 | | */ |
581 | | |
582 | | static int |
583 | | convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end, |
584 | | pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator, |
585 | | BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep) |
586 | 0 | { |
587 | 0 | BOOL is_negative = FALSE; |
588 | 0 | BOOL separator_seen = FALSE; |
589 | 0 | BOOL has_prev_c; |
590 | 0 | PCRE2_SPTR pattern = *from; |
591 | 0 | PCRE2_SPTR char_start = NULL; |
592 | 0 | uint32_t c, prev_c; |
593 | 0 | int len, class_index; |
594 | |
|
595 | 0 | (void)utf; /* Avoid compiler warning. */ |
596 | |
|
597 | 0 | if (pattern >= pattern_end) |
598 | 0 | { |
599 | 0 | *from = pattern; |
600 | 0 | return PCRE2_ERROR_MISSING_SQUARE_BRACKET; |
601 | 0 | } |
602 | | |
603 | 0 | if (*pattern == CHAR_EXCLAMATION_MARK |
604 | 0 | || *pattern == CHAR_CIRCUMFLEX_ACCENT) |
605 | 0 | { |
606 | 0 | pattern++; |
607 | |
|
608 | 0 | if (pattern >= pattern_end) |
609 | 0 | { |
610 | 0 | *from = pattern; |
611 | 0 | return PCRE2_ERROR_MISSING_SQUARE_BRACKET; |
612 | 0 | } |
613 | | |
614 | 0 | is_negative = TRUE; |
615 | |
|
616 | 0 | out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET; |
617 | 0 | out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT; |
618 | 0 | len = 2; |
619 | |
|
620 | 0 | if (!no_wildsep) |
621 | 0 | { |
622 | 0 | if (with_escape) |
623 | 0 | { |
624 | 0 | out->out_str[len] = CHAR_BACKSLASH; |
625 | 0 | len++; |
626 | 0 | } |
627 | 0 | out->out_str[len] = (uint8_t) separator; |
628 | 0 | } |
629 | |
|
630 | 0 | convert_glob_write_str(out, len + 1); |
631 | 0 | } |
632 | 0 | else |
633 | 0 | convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET); |
634 | | |
635 | 0 | has_prev_c = FALSE; |
636 | 0 | prev_c = 0; |
637 | |
|
638 | 0 | if (*pattern == CHAR_RIGHT_SQUARE_BRACKET) |
639 | 0 | { |
640 | 0 | out->out_str[0] = CHAR_BACKSLASH; |
641 | 0 | out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET; |
642 | 0 | convert_glob_write_str(out, 2); |
643 | 0 | has_prev_c = TRUE; |
644 | 0 | prev_c = CHAR_RIGHT_SQUARE_BRACKET; |
645 | 0 | pattern++; |
646 | 0 | } |
647 | |
|
648 | 0 | while (pattern < pattern_end) |
649 | 0 | { |
650 | 0 | char_start = pattern; |
651 | 0 | GETCHARINCTEST(c, pattern); |
652 | |
|
653 | 0 | if (c == CHAR_RIGHT_SQUARE_BRACKET) |
654 | 0 | { |
655 | 0 | convert_glob_write(out, c); |
656 | |
|
657 | 0 | if (!is_negative && !no_wildsep && separator_seen) |
658 | 0 | { |
659 | 0 | out->out_str[0] = CHAR_LEFT_PARENTHESIS; |
660 | 0 | out->out_str[1] = CHAR_QUESTION_MARK; |
661 | 0 | out->out_str[2] = CHAR_LESS_THAN_SIGN; |
662 | 0 | out->out_str[3] = CHAR_EXCLAMATION_MARK; |
663 | 0 | convert_glob_write_str(out, 4); |
664 | |
|
665 | 0 | convert_glob_print_separator(out, separator, with_escape); |
666 | 0 | convert_glob_write(out, CHAR_RIGHT_PARENTHESIS); |
667 | 0 | } |
668 | |
|
669 | 0 | *from = pattern; |
670 | 0 | return 0; |
671 | 0 | } |
672 | | |
673 | 0 | if (pattern >= pattern_end) break; |
674 | | |
675 | 0 | if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON) |
676 | 0 | { |
677 | 0 | *from = pattern; |
678 | 0 | class_index = convert_glob_parse_class(from, pattern_end, out); |
679 | |
|
680 | 0 | if (class_index != 0) |
681 | 0 | { |
682 | 0 | pattern = *from; |
683 | |
|
684 | 0 | has_prev_c = FALSE; |
685 | 0 | prev_c = 0; |
686 | |
|
687 | 0 | if (!is_negative && |
688 | 0 | convert_glob_char_in_class (class_index, separator)) |
689 | 0 | separator_seen = TRUE; |
690 | 0 | continue; |
691 | 0 | } |
692 | 0 | } |
693 | 0 | else if (c == CHAR_MINUS && has_prev_c && |
694 | 0 | *pattern != CHAR_RIGHT_SQUARE_BRACKET) |
695 | 0 | { |
696 | 0 | convert_glob_write(out, CHAR_MINUS); |
697 | |
|
698 | 0 | char_start = pattern; |
699 | 0 | GETCHARINCTEST(c, pattern); |
700 | |
|
701 | 0 | if (pattern >= pattern_end) break; |
702 | | |
703 | 0 | if (escape != 0 && c == escape) |
704 | 0 | { |
705 | 0 | char_start = pattern; |
706 | 0 | GETCHARINCTEST(c, pattern); |
707 | 0 | } |
708 | 0 | else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON) |
709 | 0 | { |
710 | 0 | *from = pattern; |
711 | 0 | return PCRE2_ERROR_CONVERT_SYNTAX; |
712 | 0 | } |
713 | | |
714 | 0 | if (prev_c > c) |
715 | 0 | { |
716 | 0 | *from = pattern; |
717 | 0 | return PCRE2_ERROR_CONVERT_SYNTAX; |
718 | 0 | } |
719 | | |
720 | 0 | if (prev_c < separator && separator < c) separator_seen = TRUE; |
721 | |
|
722 | 0 | has_prev_c = FALSE; |
723 | 0 | prev_c = 0; |
724 | 0 | } |
725 | 0 | else |
726 | 0 | { |
727 | 0 | if (escape != 0 && c == escape) |
728 | 0 | { |
729 | 0 | char_start = pattern; |
730 | 0 | GETCHARINCTEST(c, pattern); |
731 | |
|
732 | 0 | if (pattern >= pattern_end) break; |
733 | 0 | } |
734 | | |
735 | 0 | has_prev_c = TRUE; |
736 | 0 | prev_c = c; |
737 | 0 | } |
738 | | |
739 | 0 | if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET || |
740 | 0 | c == CHAR_BACKSLASH || c == CHAR_MINUS) |
741 | 0 | convert_glob_write(out, CHAR_BACKSLASH); |
742 | |
|
743 | 0 | if (c == separator) separator_seen = TRUE; |
744 | |
|
745 | 0 | do convert_glob_write(out, *char_start++); while (char_start < pattern); |
746 | 0 | } |
747 | | |
748 | 0 | *from = pattern; |
749 | 0 | return PCRE2_ERROR_MISSING_SQUARE_BRACKET; |
750 | 0 | } |
751 | | |
752 | | |
753 | | /* Prints a (*COMMIT) into the output. |
754 | | |
755 | | Arguments: |
756 | | out output context |
757 | | */ |
758 | | |
759 | | static void |
760 | | convert_glob_print_commit(pcre2_output_context *out) |
761 | 0 | { |
762 | 0 | out->out_str[0] = CHAR_LEFT_PARENTHESIS; |
763 | 0 | out->out_str[1] = CHAR_ASTERISK; |
764 | 0 | out->out_str[2] = CHAR_C; |
765 | 0 | out->out_str[3] = CHAR_O; |
766 | 0 | out->out_str[4] = CHAR_M; |
767 | 0 | out->out_str[5] = CHAR_M; |
768 | 0 | out->out_str[6] = CHAR_I; |
769 | 0 | out->out_str[7] = CHAR_T; |
770 | 0 | convert_glob_write_str(out, 8); |
771 | 0 | convert_glob_write(out, CHAR_RIGHT_PARENTHESIS); |
772 | 0 | } |
773 | | |
774 | | |
775 | | /* Bash glob converter. |
776 | | |
777 | | Arguments: |
778 | | pattype the pattern type |
779 | | pattern the pattern |
780 | | plength length in code units |
781 | | utf TRUE if UTF |
782 | | use_buffer where to put the output |
783 | | use_length length of use_buffer |
784 | | bufflenptr where to put the used length |
785 | | dummyrun TRUE if a dummy run |
786 | | ccontext the convert context |
787 | | |
788 | | Returns: 0 => success |
789 | | !0 => error code |
790 | | */ |
791 | | |
792 | | static int |
793 | | convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength, |
794 | | BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length, |
795 | | PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext) |
796 | 0 | { |
797 | 0 | pcre2_output_context out; |
798 | 0 | PCRE2_SPTR pattern_start = pattern; |
799 | 0 | PCRE2_SPTR pattern_end = pattern + plength; |
800 | 0 | PCRE2_UCHAR separator = ccontext->glob_separator; |
801 | 0 | PCRE2_UCHAR escape = ccontext->glob_escape; |
802 | 0 | PCRE2_UCHAR c; |
803 | 0 | BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0; |
804 | 0 | BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0; |
805 | 0 | BOOL in_atomic = FALSE; |
806 | 0 | BOOL after_starstar = FALSE; |
807 | 0 | BOOL no_slash_z = FALSE; |
808 | 0 | BOOL with_escape, is_start, after_separator; |
809 | 0 | int result = 0; |
810 | |
|
811 | 0 | (void)utf; /* Avoid compiler warning. */ |
812 | |
|
813 | 0 | #ifdef SUPPORT_UNICODE |
814 | 0 | if (utf && (separator >= 128 || escape >= 128)) |
815 | 0 | { |
816 | | /* Currently only ASCII characters are supported. */ |
817 | 0 | *bufflenptr = 0; |
818 | 0 | return PCRE2_ERROR_CONVERT_SYNTAX; |
819 | 0 | } |
820 | 0 | #endif |
821 | | |
822 | 0 | with_escape = strchr(pcre2_escaped_literals, separator) != NULL; |
823 | | |
824 | | /* Initialize default for error offset as end of input. */ |
825 | 0 | out.output = use_buffer; |
826 | 0 | out.output_end = use_buffer + use_length; |
827 | 0 | out.output_size = 0; |
828 | |
|
829 | 0 | out.out_str[0] = CHAR_LEFT_PARENTHESIS; |
830 | 0 | out.out_str[1] = CHAR_QUESTION_MARK; |
831 | 0 | out.out_str[2] = CHAR_s; |
832 | 0 | out.out_str[3] = CHAR_RIGHT_PARENTHESIS; |
833 | 0 | convert_glob_write_str(&out, 4); |
834 | |
|
835 | 0 | is_start = TRUE; |
836 | |
|
837 | 0 | if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK) |
838 | 0 | { |
839 | 0 | if (no_wildsep) |
840 | 0 | is_start = FALSE; |
841 | 0 | else if (!no_starstar && pattern + 1 < pattern_end && |
842 | 0 | pattern[1] == CHAR_ASTERISK) |
843 | 0 | is_start = FALSE; |
844 | 0 | } |
845 | |
|
846 | 0 | if (is_start) |
847 | 0 | { |
848 | 0 | out.out_str[0] = CHAR_BACKSLASH; |
849 | 0 | out.out_str[1] = CHAR_A; |
850 | 0 | convert_glob_write_str(&out, 2); |
851 | 0 | } |
852 | |
|
853 | 0 | while (pattern < pattern_end) |
854 | 0 | { |
855 | 0 | c = *pattern++; |
856 | |
|
857 | 0 | if (c == CHAR_ASTERISK) |
858 | 0 | { |
859 | 0 | is_start = pattern == pattern_start + 1; |
860 | |
|
861 | 0 | if (in_atomic) |
862 | 0 | { |
863 | 0 | convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS); |
864 | 0 | in_atomic = FALSE; |
865 | 0 | } |
866 | |
|
867 | 0 | if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK) |
868 | 0 | { |
869 | 0 | after_separator = is_start || (pattern[-2] == separator); |
870 | |
|
871 | 0 | do pattern++; while (pattern < pattern_end && |
872 | 0 | *pattern == CHAR_ASTERISK); |
873 | |
|
874 | 0 | if (pattern >= pattern_end) |
875 | 0 | { |
876 | 0 | no_slash_z = TRUE; |
877 | 0 | break; |
878 | 0 | } |
879 | | |
880 | 0 | after_starstar = TRUE; |
881 | |
|
882 | 0 | if (after_separator && escape != 0 && *pattern == escape && |
883 | 0 | pattern + 1 < pattern_end && pattern[1] == separator) |
884 | 0 | pattern++; |
885 | |
|
886 | 0 | if (is_start) |
887 | 0 | { |
888 | 0 | if (*pattern != separator) continue; |
889 | | |
890 | 0 | out.out_str[0] = CHAR_LEFT_PARENTHESIS; |
891 | 0 | out.out_str[1] = CHAR_QUESTION_MARK; |
892 | 0 | out.out_str[2] = CHAR_COLON; |
893 | 0 | out.out_str[3] = CHAR_BACKSLASH; |
894 | 0 | out.out_str[4] = CHAR_A; |
895 | 0 | out.out_str[5] = CHAR_VERTICAL_LINE; |
896 | 0 | convert_glob_write_str(&out, 6); |
897 | |
|
898 | 0 | convert_glob_print_separator(&out, separator, with_escape); |
899 | 0 | convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS); |
900 | |
|
901 | 0 | pattern++; |
902 | 0 | continue; |
903 | 0 | } |
904 | | |
905 | 0 | convert_glob_print_commit(&out); |
906 | |
|
907 | 0 | if (!after_separator || *pattern != separator) |
908 | 0 | { |
909 | 0 | out.out_str[0] = CHAR_DOT; |
910 | 0 | out.out_str[1] = CHAR_ASTERISK; |
911 | 0 | out.out_str[2] = CHAR_QUESTION_MARK; |
912 | 0 | convert_glob_write_str(&out, 3); |
913 | 0 | continue; |
914 | 0 | } |
915 | | |
916 | 0 | out.out_str[0] = CHAR_LEFT_PARENTHESIS; |
917 | 0 | out.out_str[1] = CHAR_QUESTION_MARK; |
918 | 0 | out.out_str[2] = CHAR_COLON; |
919 | 0 | out.out_str[3] = CHAR_DOT; |
920 | 0 | out.out_str[4] = CHAR_ASTERISK; |
921 | 0 | out.out_str[5] = CHAR_QUESTION_MARK; |
922 | |
|
923 | 0 | convert_glob_write_str(&out, 6); |
924 | |
|
925 | 0 | convert_glob_print_separator(&out, separator, with_escape); |
926 | |
|
927 | 0 | out.out_str[0] = CHAR_RIGHT_PARENTHESIS; |
928 | 0 | out.out_str[1] = CHAR_QUESTION_MARK; |
929 | 0 | out.out_str[2] = CHAR_QUESTION_MARK; |
930 | 0 | convert_glob_write_str(&out, 3); |
931 | |
|
932 | 0 | pattern++; |
933 | 0 | continue; |
934 | 0 | } |
935 | | |
936 | 0 | if (pattern < pattern_end && *pattern == CHAR_ASTERISK) |
937 | 0 | { |
938 | 0 | do pattern++; while (pattern < pattern_end && |
939 | 0 | *pattern == CHAR_ASTERISK); |
940 | 0 | } |
941 | |
|
942 | 0 | if (no_wildsep) |
943 | 0 | { |
944 | 0 | if (pattern >= pattern_end) |
945 | 0 | { |
946 | 0 | no_slash_z = TRUE; |
947 | 0 | break; |
948 | 0 | } |
949 | | |
950 | | /* Start check must be after the end check. */ |
951 | 0 | if (is_start) continue; |
952 | 0 | } |
953 | | |
954 | 0 | if (!is_start) |
955 | 0 | { |
956 | 0 | if (after_starstar) |
957 | 0 | { |
958 | 0 | out.out_str[0] = CHAR_LEFT_PARENTHESIS; |
959 | 0 | out.out_str[1] = CHAR_QUESTION_MARK; |
960 | 0 | out.out_str[2] = CHAR_GREATER_THAN_SIGN; |
961 | 0 | convert_glob_write_str(&out, 3); |
962 | 0 | in_atomic = TRUE; |
963 | 0 | } |
964 | 0 | else |
965 | 0 | convert_glob_print_commit(&out); |
966 | 0 | } |
967 | |
|
968 | 0 | if (no_wildsep) |
969 | 0 | convert_glob_write(&out, CHAR_DOT); |
970 | 0 | else |
971 | 0 | convert_glob_print_wildcard(&out, separator, with_escape); |
972 | |
|
973 | 0 | out.out_str[0] = CHAR_ASTERISK; |
974 | 0 | out.out_str[1] = CHAR_QUESTION_MARK; |
975 | 0 | if (pattern >= pattern_end) |
976 | 0 | out.out_str[1] = CHAR_PLUS; |
977 | 0 | convert_glob_write_str(&out, 2); |
978 | 0 | continue; |
979 | 0 | } |
980 | | |
981 | 0 | if (c == CHAR_QUESTION_MARK) |
982 | 0 | { |
983 | 0 | if (no_wildsep) |
984 | 0 | convert_glob_write(&out, CHAR_DOT); |
985 | 0 | else |
986 | 0 | convert_glob_print_wildcard(&out, separator, with_escape); |
987 | 0 | continue; |
988 | 0 | } |
989 | | |
990 | 0 | if (c == CHAR_LEFT_SQUARE_BRACKET) |
991 | 0 | { |
992 | 0 | result = convert_glob_parse_range(&pattern, pattern_end, |
993 | 0 | &out, utf, separator, with_escape, escape, no_wildsep); |
994 | 0 | if (result != 0) break; |
995 | 0 | continue; |
996 | 0 | } |
997 | | |
998 | 0 | if (escape != 0 && c == escape) |
999 | 0 | { |
1000 | 0 | if (pattern >= pattern_end) |
1001 | 0 | { |
1002 | 0 | result = PCRE2_ERROR_CONVERT_SYNTAX; |
1003 | 0 | break; |
1004 | 0 | } |
1005 | 0 | c = *pattern++; |
1006 | 0 | } |
1007 | | |
1008 | 0 | if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL) |
1009 | 0 | convert_glob_write(&out, CHAR_BACKSLASH); |
1010 | |
|
1011 | 0 | convert_glob_write(&out, c); |
1012 | 0 | } |
1013 | |
|
1014 | 0 | if (result == 0) |
1015 | 0 | { |
1016 | 0 | if (!no_slash_z) |
1017 | 0 | { |
1018 | 0 | out.out_str[0] = CHAR_BACKSLASH; |
1019 | 0 | out.out_str[1] = CHAR_z; |
1020 | 0 | convert_glob_write_str(&out, 2); |
1021 | 0 | } |
1022 | |
|
1023 | 0 | if (in_atomic) |
1024 | 0 | convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS); |
1025 | |
|
1026 | 0 | convert_glob_write(&out, CHAR_NUL); |
1027 | |
|
1028 | 0 | if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer)) |
1029 | 0 | result = PCRE2_ERROR_NOMEMORY; |
1030 | 0 | } |
1031 | |
|
1032 | 0 | if (result != 0) |
1033 | 0 | { |
1034 | 0 | *bufflenptr = pattern - pattern_start; |
1035 | 0 | return result; |
1036 | 0 | } |
1037 | | |
1038 | 0 | *bufflenptr = out.output_size - 1; |
1039 | 0 | return 0; |
1040 | 0 | } |
1041 | | |
1042 | | |
1043 | | /************************************************* |
1044 | | * Convert pattern * |
1045 | | *************************************************/ |
1046 | | |
1047 | | /* This is the external-facing function for converting other forms of pattern |
1048 | | into PCRE2 regular expression patterns. On error, the bufflenptr argument is |
1049 | | used to return an offset in the original pattern. |
1050 | | |
1051 | | Arguments: |
1052 | | pattern the input pattern |
1053 | | plength length of input, or PCRE2_ZERO_TERMINATED |
1054 | | options options bits |
1055 | | buffptr pointer to pointer to output buffer |
1056 | | bufflenptr pointer to length of output buffer |
1057 | | ccontext convert context or NULL |
1058 | | |
1059 | | Returns: 0 for success, else an error code (+ve or -ve) |
1060 | | */ |
1061 | | |
1062 | | PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION |
1063 | | pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options, |
1064 | | PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr, |
1065 | | pcre2_convert_context *ccontext) |
1066 | 0 | { |
1067 | 0 | int rc; |
1068 | 0 | PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE]; |
1069 | 0 | PCRE2_UCHAR *use_buffer = dummy_buffer; |
1070 | 0 | PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE; |
1071 | 0 | BOOL utf = (options & PCRE2_CONVERT_UTF) != 0; |
1072 | 0 | uint32_t pattype = options & TYPE_OPTIONS; |
1073 | |
|
1074 | 0 | if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL; |
1075 | | |
1076 | 0 | if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */ |
1077 | 0 | (pattype & (~pattype+1)) != pattype || /* More than one type set */ |
1078 | 0 | pattype == 0) /* No type set */ |
1079 | 0 | { |
1080 | 0 | *bufflenptr = 0; /* Error offset */ |
1081 | 0 | return PCRE2_ERROR_BADOPTION; |
1082 | 0 | } |
1083 | | |
1084 | 0 | if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern); |
1085 | 0 | if (ccontext == NULL) ccontext = |
1086 | 0 | (pcre2_convert_context *)(&PRIV(default_convert_context)); |
1087 | | |
1088 | | /* Check UTF if required. */ |
1089 | |
|
1090 | | #ifndef SUPPORT_UNICODE |
1091 | | if (utf) |
1092 | | { |
1093 | | *bufflenptr = 0; /* Error offset */ |
1094 | | return PCRE2_ERROR_UNICODE_NOT_SUPPORTED; |
1095 | | } |
1096 | | #else |
1097 | 0 | if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0) |
1098 | 0 | { |
1099 | 0 | PCRE2_SIZE erroroffset; |
1100 | 0 | rc = PRIV(valid_utf)(pattern, plength, &erroroffset); |
1101 | 0 | if (rc != 0) |
1102 | 0 | { |
1103 | 0 | *bufflenptr = erroroffset; |
1104 | 0 | return rc; |
1105 | 0 | } |
1106 | 0 | } |
1107 | 0 | #endif |
1108 | | |
1109 | | /* If buffptr is not NULL, and what it points to is not NULL, we are being |
1110 | | provided with a buffer and a length, so set them as the buffer to use. */ |
1111 | | |
1112 | 0 | if (buffptr != NULL && *buffptr != NULL) |
1113 | 0 | { |
1114 | 0 | use_buffer = *buffptr; |
1115 | 0 | use_length = *bufflenptr; |
1116 | 0 | } |
1117 | | |
1118 | | /* Call an individual converter, either just once (if a buffer was provided or |
1119 | | just the length is needed), or twice (if a memory allocation is required). */ |
1120 | |
|
1121 | 0 | for (int i = 0; i < 2; i++) |
1122 | 0 | { |
1123 | 0 | PCRE2_UCHAR *allocated; |
1124 | 0 | BOOL dummyrun = buffptr == NULL || *buffptr == NULL; |
1125 | |
|
1126 | 0 | switch(pattype) |
1127 | 0 | { |
1128 | 0 | case PCRE2_CONVERT_GLOB: |
1129 | 0 | rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf, |
1130 | 0 | use_buffer, use_length, bufflenptr, dummyrun, ccontext); |
1131 | 0 | break; |
1132 | | |
1133 | 0 | case PCRE2_CONVERT_POSIX_BASIC: |
1134 | 0 | case PCRE2_CONVERT_POSIX_EXTENDED: |
1135 | 0 | rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length, |
1136 | 0 | bufflenptr, dummyrun, ccontext); |
1137 | 0 | break; |
1138 | | |
1139 | 0 | default: |
1140 | 0 | goto EXIT; |
1141 | 0 | } |
1142 | | |
1143 | 0 | if (rc != 0 || /* Error */ |
1144 | 0 | buffptr == NULL || /* Just the length is required */ |
1145 | 0 | *buffptr != NULL) /* Buffer was provided or allocated */ |
1146 | 0 | return rc; |
1147 | | |
1148 | | /* Allocate memory for the buffer, with hidden space for an allocator at |
1149 | | the start. The next time round the loop runs the conversion for real. */ |
1150 | | |
1151 | 0 | allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) + |
1152 | 0 | (*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext); |
1153 | 0 | if (allocated == NULL) return PCRE2_ERROR_NOMEMORY; |
1154 | 0 | *buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl)); |
1155 | |
|
1156 | 0 | use_buffer = *buffptr; |
1157 | 0 | use_length = *bufflenptr + 1; |
1158 | 0 | } |
1159 | | |
1160 | | /* Something went terribly wrong. Trigger an assert and return an error */ |
1161 | 0 | PCRE2_DEBUG_UNREACHABLE(); |
1162 | |
|
1163 | 0 | EXIT: |
1164 | |
|
1165 | 0 | *bufflenptr = 0; /* Error offset */ |
1166 | 0 | return PCRE2_ERROR_INTERNAL; |
1167 | 0 | } |
1168 | | |
1169 | | |
1170 | | /************************************************* |
1171 | | * Free converted pattern * |
1172 | | *************************************************/ |
1173 | | |
1174 | | /* This frees a converted pattern that was put in newly-allocated memory. |
1175 | | |
1176 | | Argument: the converted pattern |
1177 | | Returns: nothing |
1178 | | */ |
1179 | | |
1180 | | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION |
1181 | | pcre2_converted_pattern_free(PCRE2_UCHAR *converted) |
1182 | 0 | { |
1183 | 0 | if (converted != NULL) |
1184 | 0 | { |
1185 | 0 | pcre2_memctl *memctl = |
1186 | 0 | (pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl)); |
1187 | 0 | memctl->free(memctl, memctl->memory_data); |
1188 | 0 | } |
1189 | 0 | } |
1190 | | |
1191 | | /* End of pcre2_convert.c */ |