/src/php-src/ext/pcre/pcre2lib/pcre2_convert.c
Line | Count | Source |
1 | | /************************************************* |
2 | | * Perl-Compatible Regular Expressions * |
3 | | *************************************************/ |
4 | | |
5 | | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | | and semantics are as close as possible to those of the Perl 5 language. |
7 | | |
8 | | Written by Philip Hazel |
9 | | Original API code Copyright (c) 1997-2012 University of Cambridge |
10 | | New API code Copyright (c) 2016-2022 University of Cambridge |
11 | | |
12 | | ----------------------------------------------------------------------------- |
13 | | Redistribution and use in source and binary forms, with or without |
14 | | modification, are permitted provided that the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the University of Cambridge nor the names of its |
24 | | contributors may be used to endorse or promote products derived from |
25 | | this software without specific prior written permission. |
26 | | |
27 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
28 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
29 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
30 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
31 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
32 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
33 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
34 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
35 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
36 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
37 | | POSSIBILITY OF SUCH DAMAGE. |
38 | | ----------------------------------------------------------------------------- |
39 | | */ |
40 | | |
41 | | |
42 | | #ifdef HAVE_CONFIG_H |
43 | | #include "config.h" |
44 | | #endif |
45 | | |
46 | | #include "pcre2_internal.h" |
47 | | |
48 | 0 | #define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \ |
49 | 0 | PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED) |
50 | | |
51 | 0 | #define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \ |
52 | 0 | PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \ |
53 | 0 | PCRE2_CONVERT_GLOB_NO_STARSTAR| \ |
54 | 0 | TYPE_OPTIONS) |
55 | | |
56 | 0 | #define DUMMY_BUFFER_SIZE 100 |
57 | | |
58 | | /* Generated pattern fragments */ |
59 | | |
60 | | #define STR_BACKSLASH_A STR_BACKSLASH STR_A |
61 | | #define STR_BACKSLASH_z STR_BACKSLASH STR_z |
62 | | #define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET |
63 | | #define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN |
64 | | #define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS |
65 | | #define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS |
66 | | #define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS |
67 | | |
68 | | /* States for POSIX processing */ |
69 | | |
70 | | enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET, |
71 | | POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED }; |
72 | | |
73 | | /* Macro to add a character string to the output buffer, checking for overflow. */ |
74 | | |
75 | | #define PUTCHARS(string) \ |
76 | 0 | { \ |
77 | 0 | for (s = (char *)(string); *s != 0; s++) \ |
78 | 0 | { \ |
79 | 0 | if (p >= endp) return PCRE2_ERROR_NOMEMORY; \ |
80 | 0 | *p++ = *s; \ |
81 | 0 | } \ |
82 | 0 | } |
83 | | |
84 | | /* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */ |
85 | | |
86 | | static const char *pcre2_escaped_literals = |
87 | | STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS |
88 | | STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN |
89 | | STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET |
90 | | STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET |
91 | | STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS; |
92 | | |
93 | | /* Recognized escaped metacharacters in POSIX basic patterns. */ |
94 | | |
95 | | static const char *posix_meta_escapes = |
96 | | STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS |
97 | | STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET |
98 | | STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9; |
99 | | |
100 | | |
101 | | |
102 | | /************************************************* |
103 | | * Convert a POSIX pattern * |
104 | | *************************************************/ |
105 | | |
106 | | /* This function handles both basic and extended POSIX patterns. |
107 | | |
108 | | Arguments: |
109 | | pattype the pattern type |
110 | | pattern the pattern |
111 | | plength length in code units |
112 | | utf TRUE if UTF |
113 | | use_buffer where to put the output |
114 | | use_length length of use_buffer |
115 | | bufflenptr where to put the used length |
116 | | dummyrun TRUE if a dummy run |
117 | | ccontext the convert context |
118 | | |
119 | | Returns: 0 => success |
120 | | !0 => error code |
121 | | */ |
122 | | |
123 | | static int |
124 | | convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength, |
125 | | BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length, |
126 | | PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext) |
127 | 0 | { |
128 | 0 | char *s; |
129 | 0 | PCRE2_SPTR posix = pattern; |
130 | 0 | PCRE2_UCHAR *p = use_buffer; |
131 | 0 | PCRE2_UCHAR *pp = p; |
132 | 0 | PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */ |
133 | 0 | PCRE2_SIZE convlength = 0; |
134 | |
|
135 | 0 | uint32_t bracount = 0; |
136 | 0 | uint32_t posix_state = POSIX_START_REGEX; |
137 | 0 | uint32_t lastspecial = 0; |
138 | 0 | BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0; |
139 | 0 | BOOL nextisliteral = FALSE; |
140 | |
|
141 | 0 | (void)utf; /* Not used when Unicode not supported */ |
142 | 0 | (void)ccontext; /* Not currently used */ |
143 | | |
144 | | /* Initialize default for error offset as end of input. */ |
145 | |
|
146 | 0 | *bufflenptr = plength; |
147 | 0 | PUTCHARS(STR_STAR_NUL); |
148 | | |
149 | | /* Now scan the input. */ |
150 | |
|
151 | 0 | while (plength > 0) |
152 | 0 | { |
153 | 0 | uint32_t c, sc; |
154 | 0 | int clength = 1; |
155 | | |
156 | | /* Add in the length of the last item, then, if in the dummy run, pull the |
157 | | pointer back to the start of the (temporary) buffer and then remember the |
158 | | start of the next item. */ |
159 | |
|
160 | 0 | convlength += p - pp; |
161 | 0 | if (dummyrun) p = use_buffer; |
162 | 0 | pp = p; |
163 | | |
164 | | /* Pick up the next character */ |
165 | |
|
166 | | #ifndef SUPPORT_UNICODE |
167 | | c = *posix; |
168 | | #else |
169 | 0 | GETCHARLENTEST(c, posix, clength); |
170 | 0 | #endif |
171 | 0 | posix += clength; |
172 | 0 | plength -= clength; |
173 | |
|
174 | 0 | sc = nextisliteral? 0 : c; |
175 | 0 | nextisliteral = FALSE; |
176 | | |
177 | | /* Handle a character within a class. */ |
178 | |
|
179 | 0 | if (posix_state >= POSIX_CLASS_NOT_STARTED) |
180 | 0 | { |
181 | 0 | if (c == CHAR_RIGHT_SQUARE_BRACKET) |
182 | 0 | { |
183 | 0 | PUTCHARS(STR_RIGHT_SQUARE_BRACKET); |
184 | 0 | posix_state = POSIX_NOT_BRACKET; |
185 | 0 | } |
186 | | |
187 | | /* Not the end of the class */ |
188 | | |
189 | 0 | else |
190 | 0 | { |
191 | 0 | switch (posix_state) |
192 | 0 | { |
193 | 0 | case POSIX_CLASS_STARTED: |
194 | 0 | if (c <= 127 && islower(c)) break; /* Remain in started state */ |
195 | 0 | posix_state = POSIX_CLASS_NOT_STARTED; |
196 | 0 | if (c == CHAR_COLON && plength > 0 && |
197 | 0 | *posix == CHAR_RIGHT_SQUARE_BRACKET) |
198 | 0 | { |
199 | 0 | PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET); |
200 | 0 | plength--; |
201 | 0 | posix++; |
202 | 0 | continue; /* With next character after :] */ |
203 | 0 | } |
204 | | /* Fall through */ |
205 | | |
206 | 0 | case POSIX_CLASS_NOT_STARTED: |
207 | 0 | if (c == CHAR_LEFT_SQUARE_BRACKET) |
208 | 0 | posix_state = POSIX_CLASS_STARTING; |
209 | 0 | break; |
210 | | |
211 | 0 | case POSIX_CLASS_STARTING: |
212 | 0 | if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED; |
213 | 0 | break; |
214 | 0 | } |
215 | | |
216 | 0 | if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH); |
217 | 0 | if (p + clength > endp) return PCRE2_ERROR_NOMEMORY; |
218 | 0 | memcpy(p, posix - clength, CU2BYTES(clength)); |
219 | 0 | p += clength; |
220 | 0 | } |
221 | 0 | } |
222 | | |
223 | | /* Handle a character not within a class. */ |
224 | | |
225 | 0 | else switch(sc) |
226 | 0 | { |
227 | 0 | case CHAR_LEFT_SQUARE_BRACKET: |
228 | 0 | PUTCHARS(STR_LEFT_SQUARE_BRACKET); |
229 | |
|
230 | | #ifdef NEVER |
231 | | /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does |
232 | | support) but they are not part of POSIX 1003.1. */ |
233 | | |
234 | | if (plength >= 6) |
235 | | { |
236 | | if (posix[0] == CHAR_LEFT_SQUARE_BRACKET && |
237 | | posix[1] == CHAR_COLON && |
238 | | (posix[2] == CHAR_LESS_THAN_SIGN || |
239 | | posix[2] == CHAR_GREATER_THAN_SIGN) && |
240 | | posix[3] == CHAR_COLON && |
241 | | posix[4] == CHAR_RIGHT_SQUARE_BRACKET && |
242 | | posix[5] == CHAR_RIGHT_SQUARE_BRACKET) |
243 | | { |
244 | | if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY; |
245 | | memcpy(p, posix, CU2BYTES(6)); |
246 | | p += 6; |
247 | | posix += 6; |
248 | | plength -= 6; |
249 | | continue; /* With next character */ |
250 | | } |
251 | | } |
252 | | #endif |
253 | | |
254 | | /* Handle start of "normal" character classes */ |
255 | |
|
256 | 0 | posix_state = POSIX_CLASS_NOT_STARTED; |
257 | | |
258 | | /* Handle ^ and ] as first characters */ |
259 | |
|
260 | 0 | if (plength > 0) |
261 | 0 | { |
262 | 0 | if (*posix == CHAR_CIRCUMFLEX_ACCENT) |
263 | 0 | { |
264 | 0 | posix++; |
265 | 0 | plength--; |
266 | 0 | PUTCHARS(STR_CIRCUMFLEX_ACCENT); |
267 | 0 | } |
268 | 0 | if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET) |
269 | 0 | { |
270 | 0 | posix++; |
271 | 0 | plength--; |
272 | 0 | PUTCHARS(STR_RIGHT_SQUARE_BRACKET); |
273 | 0 | } |
274 | 0 | } |
275 | 0 | break; |
276 | | |
277 | 0 | case CHAR_BACKSLASH: |
278 | 0 | if (plength == 0) return PCRE2_ERROR_END_BACKSLASH; |
279 | 0 | if (extended) nextisliteral = TRUE; else |
280 | 0 | { |
281 | 0 | if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL) |
282 | 0 | { |
283 | 0 | if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH); |
284 | 0 | if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; |
285 | 0 | lastspecial = *p++ = *posix++; |
286 | 0 | plength--; |
287 | 0 | } |
288 | 0 | else nextisliteral = TRUE; |
289 | 0 | } |
290 | 0 | break; |
291 | | |
292 | 0 | case CHAR_RIGHT_PARENTHESIS: |
293 | 0 | if (!extended || bracount == 0) goto ESCAPE_LITERAL; |
294 | 0 | bracount--; |
295 | 0 | goto COPY_SPECIAL; |
296 | | |
297 | 0 | case CHAR_LEFT_PARENTHESIS: |
298 | 0 | bracount++; |
299 | | /* Fall through */ |
300 | |
|
301 | 0 | case CHAR_QUESTION_MARK: |
302 | 0 | case CHAR_PLUS: |
303 | 0 | case CHAR_LEFT_CURLY_BRACKET: |
304 | 0 | case CHAR_RIGHT_CURLY_BRACKET: |
305 | 0 | case CHAR_VERTICAL_LINE: |
306 | 0 | if (!extended) goto ESCAPE_LITERAL; |
307 | | /* Fall through */ |
308 | | |
309 | 0 | case CHAR_DOT: |
310 | 0 | case CHAR_DOLLAR_SIGN: |
311 | 0 | posix_state = POSIX_NOT_BRACKET; |
312 | 0 | COPY_SPECIAL: |
313 | 0 | lastspecial = c; |
314 | 0 | if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; |
315 | 0 | *p++ = c; |
316 | 0 | break; |
317 | | |
318 | 0 | case CHAR_ASTERISK: |
319 | 0 | if (lastspecial != CHAR_ASTERISK) |
320 | 0 | { |
321 | 0 | if (!extended && (posix_state < POSIX_NOT_BRACKET || |
322 | 0 | lastspecial == CHAR_LEFT_PARENTHESIS)) |
323 | 0 | goto ESCAPE_LITERAL; |
324 | 0 | goto COPY_SPECIAL; |
325 | 0 | } |
326 | 0 | break; /* Ignore second and subsequent asterisks */ |
327 | | |
328 | 0 | case CHAR_CIRCUMFLEX_ACCENT: |
329 | 0 | if (extended) goto COPY_SPECIAL; |
330 | 0 | if (posix_state == POSIX_START_REGEX || |
331 | 0 | lastspecial == CHAR_LEFT_PARENTHESIS) |
332 | 0 | { |
333 | 0 | posix_state = POSIX_ANCHORED; |
334 | 0 | goto COPY_SPECIAL; |
335 | 0 | } |
336 | | /* Fall through */ |
337 | | |
338 | 0 | default: |
339 | 0 | if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL) |
340 | 0 | { |
341 | 0 | ESCAPE_LITERAL: |
342 | 0 | PUTCHARS(STR_BACKSLASH); |
343 | 0 | } |
344 | 0 | lastspecial = 0xff; /* Indicates nothing special */ |
345 | 0 | if (p + clength > endp) return PCRE2_ERROR_NOMEMORY; |
346 | 0 | memcpy(p, posix - clength, CU2BYTES(clength)); |
347 | 0 | p += clength; |
348 | 0 | posix_state = POSIX_NOT_BRACKET; |
349 | 0 | break; |
350 | 0 | } |
351 | 0 | } |
352 | | |
353 | 0 | if (posix_state >= POSIX_CLASS_NOT_STARTED) |
354 | 0 | return PCRE2_ERROR_MISSING_SQUARE_BRACKET; |
355 | 0 | convlength += p - pp; /* Final segment */ |
356 | 0 | *bufflenptr = convlength; |
357 | 0 | *p++ = 0; |
358 | 0 | return 0; |
359 | 0 | } |
360 | | |
361 | | |
362 | | /************************************************* |
363 | | * Convert a glob pattern * |
364 | | *************************************************/ |
365 | | |
366 | | /* Context for writing the output into a buffer. */ |
367 | | |
368 | | typedef struct pcre2_output_context { |
369 | | PCRE2_UCHAR *output; /* current output position */ |
370 | | PCRE2_SPTR output_end; /* output end */ |
371 | | PCRE2_SIZE output_size; /* size of the output */ |
372 | | uint8_t out_str[8]; /* string copied to the output */ |
373 | | } pcre2_output_context; |
374 | | |
375 | | |
376 | | /* Write a character into the output. |
377 | | |
378 | | Arguments: |
379 | | out output context |
380 | | chr the next character |
381 | | */ |
382 | | |
383 | | static void |
384 | | convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr) |
385 | 0 | { |
386 | 0 | out->output_size++; |
387 | |
|
388 | 0 | if (out->output < out->output_end) |
389 | 0 | *out->output++ = chr; |
390 | 0 | } |
391 | | |
392 | | |
393 | | /* Write a string into the output. |
394 | | |
395 | | Arguments: |
396 | | out output context |
397 | | length length of out->out_str |
398 | | */ |
399 | | |
400 | | static void |
401 | | convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length) |
402 | 0 | { |
403 | 0 | uint8_t *out_str = out->out_str; |
404 | 0 | PCRE2_UCHAR *output = out->output; |
405 | 0 | PCRE2_SPTR output_end = out->output_end; |
406 | 0 | PCRE2_SIZE output_size = out->output_size; |
407 | |
|
408 | 0 | do |
409 | 0 | { |
410 | 0 | output_size++; |
411 | |
|
412 | 0 | if (output < output_end) |
413 | 0 | *output++ = *out_str++; |
414 | 0 | } |
415 | 0 | while (--length != 0); |
416 | |
|
417 | 0 | out->output = output; |
418 | 0 | out->output_size = output_size; |
419 | 0 | } |
420 | | |
421 | | |
422 | | /* Prints the separator into the output. |
423 | | |
424 | | Arguments: |
425 | | out output context |
426 | | separator glob separator |
427 | | with_escape backslash is needed before separator |
428 | | */ |
429 | | |
430 | | static void |
431 | | convert_glob_print_separator(pcre2_output_context *out, |
432 | | PCRE2_UCHAR separator, BOOL with_escape) |
433 | 0 | { |
434 | 0 | if (with_escape) |
435 | 0 | convert_glob_write(out, CHAR_BACKSLASH); |
436 | |
|
437 | 0 | convert_glob_write(out, separator); |
438 | 0 | } |
439 | | |
440 | | |
441 | | /* Prints a wildcard into the output. |
442 | | |
443 | | Arguments: |
444 | | out output context |
445 | | separator glob separator |
446 | | with_escape backslash is needed before separator |
447 | | */ |
448 | | |
449 | | static void |
450 | | convert_glob_print_wildcard(pcre2_output_context *out, |
451 | | PCRE2_UCHAR separator, BOOL with_escape) |
452 | 0 | { |
453 | 0 | out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET; |
454 | 0 | out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT; |
455 | 0 | convert_glob_write_str(out, 2); |
456 | |
|
457 | 0 | convert_glob_print_separator(out, separator, with_escape); |
458 | |
|
459 | 0 | convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET); |
460 | 0 | } |
461 | | |
462 | | |
463 | | /* Parse a posix class. |
464 | | |
465 | | Arguments: |
466 | | from starting point of scanning the range |
467 | | pattern_end end of pattern |
468 | | out output context |
469 | | |
470 | | Returns: >0 => class index |
471 | | 0 => malformed class |
472 | | */ |
473 | | |
474 | | static int |
475 | | convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end, |
476 | | pcre2_output_context *out) |
477 | 0 | { |
478 | 0 | static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:" |
479 | 0 | "graph:lower:print:punct:space:upper:word:xdigit:"; |
480 | 0 | PCRE2_SPTR start = *from + 1; |
481 | 0 | PCRE2_SPTR pattern = start; |
482 | 0 | const char *class_ptr; |
483 | 0 | PCRE2_UCHAR c; |
484 | 0 | int class_index; |
485 | |
|
486 | 0 | while (TRUE) |
487 | 0 | { |
488 | 0 | if (pattern >= pattern_end) return 0; |
489 | | |
490 | 0 | c = *pattern++; |
491 | |
|
492 | 0 | if (c < CHAR_a || c > CHAR_z) break; |
493 | 0 | } |
494 | | |
495 | 0 | if (c != CHAR_COLON || pattern >= pattern_end || |
496 | 0 | *pattern != CHAR_RIGHT_SQUARE_BRACKET) |
497 | 0 | return 0; |
498 | | |
499 | 0 | class_ptr = posix_classes; |
500 | 0 | class_index = 1; |
501 | |
|
502 | 0 | while (TRUE) |
503 | 0 | { |
504 | 0 | if (*class_ptr == CHAR_NUL) return 0; |
505 | | |
506 | 0 | pattern = start; |
507 | |
|
508 | 0 | while (*pattern == (PCRE2_UCHAR) *class_ptr) |
509 | 0 | { |
510 | 0 | if (*pattern == CHAR_COLON) |
511 | 0 | { |
512 | 0 | pattern += 2; |
513 | 0 | start -= 2; |
514 | |
|
515 | 0 | do convert_glob_write(out, *start++); while (start < pattern); |
516 | |
|
517 | 0 | *from = pattern; |
518 | 0 | return class_index; |
519 | 0 | } |
520 | 0 | pattern++; |
521 | 0 | class_ptr++; |
522 | 0 | } |
523 | | |
524 | 0 | while (*class_ptr != CHAR_COLON) class_ptr++; |
525 | 0 | class_ptr++; |
526 | 0 | class_index++; |
527 | 0 | } |
528 | 0 | } |
529 | | |
530 | | /* Checks whether the character is in the class. |
531 | | |
532 | | Arguments: |
533 | | class_index class index |
534 | | c character |
535 | | |
536 | | Returns: !0 => character is found in the class |
537 | | 0 => otherwise |
538 | | */ |
539 | | |
540 | | static BOOL |
541 | | convert_glob_char_in_class(int class_index, PCRE2_UCHAR c) |
542 | 0 | { |
543 | | #if PCRE2_CODE_UNIT_WIDTH != 8 |
544 | | if (c > 0xff) |
545 | | { |
546 | | /* ctype functions are not sane for c > 0xff */ |
547 | | return 0; |
548 | | } |
549 | | #endif |
550 | |
|
551 | 0 | switch (class_index) |
552 | 0 | { |
553 | 0 | case 1: return isalnum(c); |
554 | 0 | case 2: return isalpha(c); |
555 | 0 | case 3: return 1; |
556 | 0 | case 4: return c == CHAR_HT || c == CHAR_SPACE; |
557 | 0 | case 5: return iscntrl(c); |
558 | 0 | case 6: return isdigit(c); |
559 | 0 | case 7: return isgraph(c); |
560 | 0 | case 8: return islower(c); |
561 | 0 | case 9: return isprint(c); |
562 | 0 | case 10: return ispunct(c); |
563 | 0 | case 11: return isspace(c); |
564 | 0 | case 12: return isupper(c); |
565 | 0 | case 13: return isalnum(c) || c == CHAR_UNDERSCORE; |
566 | 0 | default: return isxdigit(c); |
567 | 0 | } |
568 | 0 | } |
569 | | |
570 | | /* Parse a range of characters. |
571 | | |
572 | | Arguments: |
573 | | from starting point of scanning the range |
574 | | pattern_end end of pattern |
575 | | out output context |
576 | | separator glob separator |
577 | | with_escape backslash is needed before separator |
578 | | |
579 | | Returns: 0 => success |
580 | | !0 => error code |
581 | | */ |
582 | | |
583 | | static int |
584 | | convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end, |
585 | | pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator, |
586 | | BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep) |
587 | 0 | { |
588 | 0 | BOOL is_negative = FALSE; |
589 | 0 | BOOL separator_seen = FALSE; |
590 | 0 | BOOL has_prev_c; |
591 | 0 | PCRE2_SPTR pattern = *from; |
592 | 0 | PCRE2_SPTR char_start = NULL; |
593 | 0 | uint32_t c, prev_c; |
594 | 0 | int len, class_index; |
595 | |
|
596 | 0 | (void)utf; /* Avoid compiler warning. */ |
597 | |
|
598 | 0 | if (pattern >= pattern_end) |
599 | 0 | { |
600 | 0 | *from = pattern; |
601 | 0 | return PCRE2_ERROR_MISSING_SQUARE_BRACKET; |
602 | 0 | } |
603 | | |
604 | 0 | if (*pattern == CHAR_EXCLAMATION_MARK |
605 | 0 | || *pattern == CHAR_CIRCUMFLEX_ACCENT) |
606 | 0 | { |
607 | 0 | pattern++; |
608 | |
|
609 | 0 | if (pattern >= pattern_end) |
610 | 0 | { |
611 | 0 | *from = pattern; |
612 | 0 | return PCRE2_ERROR_MISSING_SQUARE_BRACKET; |
613 | 0 | } |
614 | | |
615 | 0 | is_negative = TRUE; |
616 | |
|
617 | 0 | out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET; |
618 | 0 | out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT; |
619 | 0 | len = 2; |
620 | |
|
621 | 0 | if (!no_wildsep) |
622 | 0 | { |
623 | 0 | if (with_escape) |
624 | 0 | { |
625 | 0 | out->out_str[len] = CHAR_BACKSLASH; |
626 | 0 | len++; |
627 | 0 | } |
628 | 0 | out->out_str[len] = (uint8_t) separator; |
629 | 0 | } |
630 | |
|
631 | 0 | convert_glob_write_str(out, len + 1); |
632 | 0 | } |
633 | 0 | else |
634 | 0 | convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET); |
635 | | |
636 | 0 | has_prev_c = FALSE; |
637 | 0 | prev_c = 0; |
638 | |
|
639 | 0 | if (*pattern == CHAR_RIGHT_SQUARE_BRACKET) |
640 | 0 | { |
641 | 0 | out->out_str[0] = CHAR_BACKSLASH; |
642 | 0 | out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET; |
643 | 0 | convert_glob_write_str(out, 2); |
644 | 0 | has_prev_c = TRUE; |
645 | 0 | prev_c = CHAR_RIGHT_SQUARE_BRACKET; |
646 | 0 | pattern++; |
647 | 0 | } |
648 | |
|
649 | 0 | while (pattern < pattern_end) |
650 | 0 | { |
651 | 0 | char_start = pattern; |
652 | 0 | GETCHARINCTEST(c, pattern); |
653 | |
|
654 | 0 | if (c == CHAR_RIGHT_SQUARE_BRACKET) |
655 | 0 | { |
656 | 0 | convert_glob_write(out, c); |
657 | |
|
658 | 0 | if (!is_negative && !no_wildsep && separator_seen) |
659 | 0 | { |
660 | 0 | out->out_str[0] = CHAR_LEFT_PARENTHESIS; |
661 | 0 | out->out_str[1] = CHAR_QUESTION_MARK; |
662 | 0 | out->out_str[2] = CHAR_LESS_THAN_SIGN; |
663 | 0 | out->out_str[3] = CHAR_EXCLAMATION_MARK; |
664 | 0 | convert_glob_write_str(out, 4); |
665 | |
|
666 | 0 | convert_glob_print_separator(out, separator, with_escape); |
667 | 0 | convert_glob_write(out, CHAR_RIGHT_PARENTHESIS); |
668 | 0 | } |
669 | |
|
670 | 0 | *from = pattern; |
671 | 0 | return 0; |
672 | 0 | } |
673 | | |
674 | 0 | if (pattern >= pattern_end) break; |
675 | | |
676 | 0 | if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON) |
677 | 0 | { |
678 | 0 | *from = pattern; |
679 | 0 | class_index = convert_glob_parse_class(from, pattern_end, out); |
680 | |
|
681 | 0 | if (class_index != 0) |
682 | 0 | { |
683 | 0 | pattern = *from; |
684 | |
|
685 | 0 | has_prev_c = FALSE; |
686 | 0 | prev_c = 0; |
687 | |
|
688 | 0 | if (!is_negative && |
689 | 0 | convert_glob_char_in_class (class_index, separator)) |
690 | 0 | separator_seen = TRUE; |
691 | 0 | continue; |
692 | 0 | } |
693 | 0 | } |
694 | 0 | else if (c == CHAR_MINUS && has_prev_c && |
695 | 0 | *pattern != CHAR_RIGHT_SQUARE_BRACKET) |
696 | 0 | { |
697 | 0 | convert_glob_write(out, CHAR_MINUS); |
698 | |
|
699 | 0 | char_start = pattern; |
700 | 0 | GETCHARINCTEST(c, pattern); |
701 | |
|
702 | 0 | if (pattern >= pattern_end) break; |
703 | | |
704 | 0 | if (escape != 0 && c == escape) |
705 | 0 | { |
706 | 0 | char_start = pattern; |
707 | 0 | GETCHARINCTEST(c, pattern); |
708 | 0 | } |
709 | 0 | else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON) |
710 | 0 | { |
711 | 0 | *from = pattern; |
712 | 0 | return PCRE2_ERROR_CONVERT_SYNTAX; |
713 | 0 | } |
714 | | |
715 | 0 | if (prev_c > c) |
716 | 0 | { |
717 | 0 | *from = pattern; |
718 | 0 | return PCRE2_ERROR_CONVERT_SYNTAX; |
719 | 0 | } |
720 | | |
721 | 0 | if (prev_c < separator && separator < c) separator_seen = TRUE; |
722 | |
|
723 | 0 | has_prev_c = FALSE; |
724 | 0 | prev_c = 0; |
725 | 0 | } |
726 | 0 | else |
727 | 0 | { |
728 | 0 | if (escape != 0 && c == escape) |
729 | 0 | { |
730 | 0 | char_start = pattern; |
731 | 0 | GETCHARINCTEST(c, pattern); |
732 | |
|
733 | 0 | if (pattern >= pattern_end) break; |
734 | 0 | } |
735 | | |
736 | 0 | has_prev_c = TRUE; |
737 | 0 | prev_c = c; |
738 | 0 | } |
739 | | |
740 | 0 | if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET || |
741 | 0 | c == CHAR_BACKSLASH || c == CHAR_MINUS) |
742 | 0 | convert_glob_write(out, CHAR_BACKSLASH); |
743 | |
|
744 | 0 | if (c == separator) separator_seen = TRUE; |
745 | |
|
746 | 0 | do convert_glob_write(out, *char_start++); while (char_start < pattern); |
747 | 0 | } |
748 | | |
749 | 0 | *from = pattern; |
750 | 0 | return PCRE2_ERROR_MISSING_SQUARE_BRACKET; |
751 | 0 | } |
752 | | |
753 | | |
754 | | /* Prints a (*COMMIT) into the output. |
755 | | |
756 | | Arguments: |
757 | | out output context |
758 | | */ |
759 | | |
760 | | static void |
761 | | convert_glob_print_commit(pcre2_output_context *out) |
762 | 0 | { |
763 | 0 | out->out_str[0] = CHAR_LEFT_PARENTHESIS; |
764 | 0 | out->out_str[1] = CHAR_ASTERISK; |
765 | 0 | out->out_str[2] = CHAR_C; |
766 | 0 | out->out_str[3] = CHAR_O; |
767 | 0 | out->out_str[4] = CHAR_M; |
768 | 0 | out->out_str[5] = CHAR_M; |
769 | 0 | out->out_str[6] = CHAR_I; |
770 | 0 | out->out_str[7] = CHAR_T; |
771 | 0 | convert_glob_write_str(out, 8); |
772 | 0 | convert_glob_write(out, CHAR_RIGHT_PARENTHESIS); |
773 | 0 | } |
774 | | |
775 | | |
776 | | /* Bash glob converter. |
777 | | |
778 | | Arguments: |
779 | | pattype the pattern type |
780 | | pattern the pattern |
781 | | plength length in code units |
782 | | utf TRUE if UTF |
783 | | use_buffer where to put the output |
784 | | use_length length of use_buffer |
785 | | bufflenptr where to put the used length |
786 | | dummyrun TRUE if a dummy run |
787 | | ccontext the convert context |
788 | | |
789 | | Returns: 0 => success |
790 | | !0 => error code |
791 | | */ |
792 | | |
793 | | static int |
794 | | convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength, |
795 | | BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length, |
796 | | PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext) |
797 | 0 | { |
798 | 0 | pcre2_output_context out; |
799 | 0 | PCRE2_SPTR pattern_start = pattern; |
800 | 0 | PCRE2_SPTR pattern_end = pattern + plength; |
801 | 0 | PCRE2_UCHAR separator = ccontext->glob_separator; |
802 | 0 | PCRE2_UCHAR escape = ccontext->glob_escape; |
803 | 0 | PCRE2_UCHAR c; |
804 | 0 | BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0; |
805 | 0 | BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0; |
806 | 0 | BOOL in_atomic = FALSE; |
807 | 0 | BOOL after_starstar = FALSE; |
808 | 0 | BOOL no_slash_z = FALSE; |
809 | 0 | BOOL with_escape, is_start, after_separator; |
810 | 0 | int result = 0; |
811 | |
|
812 | 0 | (void)utf; /* Avoid compiler warning. */ |
813 | |
|
814 | 0 | #ifdef SUPPORT_UNICODE |
815 | 0 | if (utf && (separator >= 128 || escape >= 128)) |
816 | 0 | { |
817 | | /* Currently only ASCII characters are supported. */ |
818 | 0 | *bufflenptr = 0; |
819 | 0 | return PCRE2_ERROR_CONVERT_SYNTAX; |
820 | 0 | } |
821 | 0 | #endif |
822 | | |
823 | 0 | with_escape = strchr(pcre2_escaped_literals, separator) != NULL; |
824 | | |
825 | | /* Initialize default for error offset as end of input. */ |
826 | 0 | out.output = use_buffer; |
827 | 0 | out.output_end = use_buffer + use_length; |
828 | 0 | out.output_size = 0; |
829 | |
|
830 | 0 | out.out_str[0] = CHAR_LEFT_PARENTHESIS; |
831 | 0 | out.out_str[1] = CHAR_QUESTION_MARK; |
832 | 0 | out.out_str[2] = CHAR_s; |
833 | 0 | out.out_str[3] = CHAR_RIGHT_PARENTHESIS; |
834 | 0 | convert_glob_write_str(&out, 4); |
835 | |
|
836 | 0 | is_start = TRUE; |
837 | |
|
838 | 0 | if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK) |
839 | 0 | { |
840 | 0 | if (no_wildsep) |
841 | 0 | is_start = FALSE; |
842 | 0 | else if (!no_starstar && pattern + 1 < pattern_end && |
843 | 0 | pattern[1] == CHAR_ASTERISK) |
844 | 0 | is_start = FALSE; |
845 | 0 | } |
846 | |
|
847 | 0 | if (is_start) |
848 | 0 | { |
849 | 0 | out.out_str[0] = CHAR_BACKSLASH; |
850 | 0 | out.out_str[1] = CHAR_A; |
851 | 0 | convert_glob_write_str(&out, 2); |
852 | 0 | } |
853 | |
|
854 | 0 | while (pattern < pattern_end) |
855 | 0 | { |
856 | 0 | c = *pattern++; |
857 | |
|
858 | 0 | if (c == CHAR_ASTERISK) |
859 | 0 | { |
860 | 0 | is_start = pattern == pattern_start + 1; |
861 | |
|
862 | 0 | if (in_atomic) |
863 | 0 | { |
864 | 0 | convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS); |
865 | 0 | in_atomic = FALSE; |
866 | 0 | } |
867 | |
|
868 | 0 | if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK) |
869 | 0 | { |
870 | 0 | after_separator = is_start || (pattern[-2] == separator); |
871 | |
|
872 | 0 | do pattern++; while (pattern < pattern_end && |
873 | 0 | *pattern == CHAR_ASTERISK); |
874 | |
|
875 | 0 | if (pattern >= pattern_end) |
876 | 0 | { |
877 | 0 | no_slash_z = TRUE; |
878 | 0 | break; |
879 | 0 | } |
880 | | |
881 | 0 | after_starstar = TRUE; |
882 | |
|
883 | 0 | if (after_separator && escape != 0 && *pattern == escape && |
884 | 0 | pattern + 1 < pattern_end && pattern[1] == separator) |
885 | 0 | pattern++; |
886 | |
|
887 | 0 | if (is_start) |
888 | 0 | { |
889 | 0 | if (*pattern != separator) continue; |
890 | | |
891 | 0 | out.out_str[0] = CHAR_LEFT_PARENTHESIS; |
892 | 0 | out.out_str[1] = CHAR_QUESTION_MARK; |
893 | 0 | out.out_str[2] = CHAR_COLON; |
894 | 0 | out.out_str[3] = CHAR_BACKSLASH; |
895 | 0 | out.out_str[4] = CHAR_A; |
896 | 0 | out.out_str[5] = CHAR_VERTICAL_LINE; |
897 | 0 | convert_glob_write_str(&out, 6); |
898 | |
|
899 | 0 | convert_glob_print_separator(&out, separator, with_escape); |
900 | 0 | convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS); |
901 | |
|
902 | 0 | pattern++; |
903 | 0 | continue; |
904 | 0 | } |
905 | | |
906 | 0 | convert_glob_print_commit(&out); |
907 | |
|
908 | 0 | if (!after_separator || *pattern != separator) |
909 | 0 | { |
910 | 0 | out.out_str[0] = CHAR_DOT; |
911 | 0 | out.out_str[1] = CHAR_ASTERISK; |
912 | 0 | out.out_str[2] = CHAR_QUESTION_MARK; |
913 | 0 | convert_glob_write_str(&out, 3); |
914 | 0 | continue; |
915 | 0 | } |
916 | | |
917 | 0 | out.out_str[0] = CHAR_LEFT_PARENTHESIS; |
918 | 0 | out.out_str[1] = CHAR_QUESTION_MARK; |
919 | 0 | out.out_str[2] = CHAR_COLON; |
920 | 0 | out.out_str[3] = CHAR_DOT; |
921 | 0 | out.out_str[4] = CHAR_ASTERISK; |
922 | 0 | out.out_str[5] = CHAR_QUESTION_MARK; |
923 | |
|
924 | 0 | convert_glob_write_str(&out, 6); |
925 | |
|
926 | 0 | convert_glob_print_separator(&out, separator, with_escape); |
927 | |
|
928 | 0 | out.out_str[0] = CHAR_RIGHT_PARENTHESIS; |
929 | 0 | out.out_str[1] = CHAR_QUESTION_MARK; |
930 | 0 | out.out_str[2] = CHAR_QUESTION_MARK; |
931 | 0 | convert_glob_write_str(&out, 3); |
932 | |
|
933 | 0 | pattern++; |
934 | 0 | continue; |
935 | 0 | } |
936 | | |
937 | 0 | if (pattern < pattern_end && *pattern == CHAR_ASTERISK) |
938 | 0 | { |
939 | 0 | do pattern++; while (pattern < pattern_end && |
940 | 0 | *pattern == CHAR_ASTERISK); |
941 | 0 | } |
942 | |
|
943 | 0 | if (no_wildsep) |
944 | 0 | { |
945 | 0 | if (pattern >= pattern_end) |
946 | 0 | { |
947 | 0 | no_slash_z = TRUE; |
948 | 0 | break; |
949 | 0 | } |
950 | | |
951 | | /* Start check must be after the end check. */ |
952 | 0 | if (is_start) continue; |
953 | 0 | } |
954 | | |
955 | 0 | if (!is_start) |
956 | 0 | { |
957 | 0 | if (after_starstar) |
958 | 0 | { |
959 | 0 | out.out_str[0] = CHAR_LEFT_PARENTHESIS; |
960 | 0 | out.out_str[1] = CHAR_QUESTION_MARK; |
961 | 0 | out.out_str[2] = CHAR_GREATER_THAN_SIGN; |
962 | 0 | convert_glob_write_str(&out, 3); |
963 | 0 | in_atomic = TRUE; |
964 | 0 | } |
965 | 0 | else |
966 | 0 | convert_glob_print_commit(&out); |
967 | 0 | } |
968 | |
|
969 | 0 | if (no_wildsep) |
970 | 0 | convert_glob_write(&out, CHAR_DOT); |
971 | 0 | else |
972 | 0 | convert_glob_print_wildcard(&out, separator, with_escape); |
973 | |
|
974 | 0 | out.out_str[0] = CHAR_ASTERISK; |
975 | 0 | out.out_str[1] = CHAR_QUESTION_MARK; |
976 | 0 | if (pattern >= pattern_end) |
977 | 0 | out.out_str[1] = CHAR_PLUS; |
978 | 0 | convert_glob_write_str(&out, 2); |
979 | 0 | continue; |
980 | 0 | } |
981 | | |
982 | 0 | if (c == CHAR_QUESTION_MARK) |
983 | 0 | { |
984 | 0 | if (no_wildsep) |
985 | 0 | convert_glob_write(&out, CHAR_DOT); |
986 | 0 | else |
987 | 0 | convert_glob_print_wildcard(&out, separator, with_escape); |
988 | 0 | continue; |
989 | 0 | } |
990 | | |
991 | 0 | if (c == CHAR_LEFT_SQUARE_BRACKET) |
992 | 0 | { |
993 | 0 | result = convert_glob_parse_range(&pattern, pattern_end, |
994 | 0 | &out, utf, separator, with_escape, escape, no_wildsep); |
995 | 0 | if (result != 0) break; |
996 | 0 | continue; |
997 | 0 | } |
998 | | |
999 | 0 | if (escape != 0 && c == escape) |
1000 | 0 | { |
1001 | 0 | if (pattern >= pattern_end) |
1002 | 0 | { |
1003 | 0 | result = PCRE2_ERROR_CONVERT_SYNTAX; |
1004 | 0 | break; |
1005 | 0 | } |
1006 | 0 | c = *pattern++; |
1007 | 0 | } |
1008 | | |
1009 | 0 | if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL) |
1010 | 0 | convert_glob_write(&out, CHAR_BACKSLASH); |
1011 | |
|
1012 | 0 | convert_glob_write(&out, c); |
1013 | 0 | } |
1014 | |
|
1015 | 0 | if (result == 0) |
1016 | 0 | { |
1017 | 0 | if (!no_slash_z) |
1018 | 0 | { |
1019 | 0 | out.out_str[0] = CHAR_BACKSLASH; |
1020 | 0 | out.out_str[1] = CHAR_z; |
1021 | 0 | convert_glob_write_str(&out, 2); |
1022 | 0 | } |
1023 | |
|
1024 | 0 | if (in_atomic) |
1025 | 0 | convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS); |
1026 | |
|
1027 | 0 | convert_glob_write(&out, CHAR_NUL); |
1028 | |
|
1029 | 0 | if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer)) |
1030 | 0 | result = PCRE2_ERROR_NOMEMORY; |
1031 | 0 | } |
1032 | |
|
1033 | 0 | if (result != 0) |
1034 | 0 | { |
1035 | 0 | *bufflenptr = pattern - pattern_start; |
1036 | 0 | return result; |
1037 | 0 | } |
1038 | | |
1039 | 0 | *bufflenptr = out.output_size - 1; |
1040 | 0 | return 0; |
1041 | 0 | } |
1042 | | |
1043 | | |
1044 | | /************************************************* |
1045 | | * Convert pattern * |
1046 | | *************************************************/ |
1047 | | |
1048 | | /* This is the external-facing function for converting other forms of pattern |
1049 | | into PCRE2 regular expression patterns. On error, the bufflenptr argument is |
1050 | | used to return an offset in the original pattern. |
1051 | | |
1052 | | Arguments: |
1053 | | pattern the input pattern |
1054 | | plength length of input, or PCRE2_ZERO_TERMINATED |
1055 | | options options bits |
1056 | | buffptr pointer to pointer to output buffer |
1057 | | bufflenptr pointer to length of output buffer |
1058 | | ccontext convert context or NULL |
1059 | | |
1060 | | Returns: 0 for success, else an error code (+ve or -ve) |
1061 | | */ |
1062 | | |
1063 | | PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION |
1064 | | pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options, |
1065 | | PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr, |
1066 | | pcre2_convert_context *ccontext) |
1067 | 0 | { |
1068 | 0 | int i, rc; |
1069 | 0 | PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE]; |
1070 | 0 | PCRE2_UCHAR *use_buffer = dummy_buffer; |
1071 | 0 | PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE; |
1072 | 0 | BOOL utf = (options & PCRE2_CONVERT_UTF) != 0; |
1073 | 0 | uint32_t pattype = options & TYPE_OPTIONS; |
1074 | |
|
1075 | 0 | if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL; |
1076 | | |
1077 | 0 | if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */ |
1078 | 0 | (pattype & (~pattype+1)) != pattype || /* More than one type set */ |
1079 | 0 | pattype == 0) /* No type set */ |
1080 | 0 | { |
1081 | 0 | *bufflenptr = 0; /* Error offset */ |
1082 | 0 | return PCRE2_ERROR_BADOPTION; |
1083 | 0 | } |
1084 | | |
1085 | 0 | if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern); |
1086 | 0 | if (ccontext == NULL) ccontext = |
1087 | 0 | (pcre2_convert_context *)(&PRIV(default_convert_context)); |
1088 | | |
1089 | | /* Check UTF if required. */ |
1090 | |
|
1091 | | #ifndef SUPPORT_UNICODE |
1092 | | if (utf) |
1093 | | { |
1094 | | *bufflenptr = 0; /* Error offset */ |
1095 | | return PCRE2_ERROR_UNICODE_NOT_SUPPORTED; |
1096 | | } |
1097 | | #else |
1098 | 0 | if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0) |
1099 | 0 | { |
1100 | 0 | PCRE2_SIZE erroroffset; |
1101 | 0 | rc = PRIV(valid_utf)(pattern, plength, &erroroffset); |
1102 | 0 | if (rc != 0) |
1103 | 0 | { |
1104 | 0 | *bufflenptr = erroroffset; |
1105 | 0 | return rc; |
1106 | 0 | } |
1107 | 0 | } |
1108 | 0 | #endif |
1109 | | |
1110 | | /* If buffptr is not NULL, and what it points to is not NULL, we are being |
1111 | | provided with a buffer and a length, so set them as the buffer to use. */ |
1112 | | |
1113 | 0 | if (buffptr != NULL && *buffptr != NULL) |
1114 | 0 | { |
1115 | 0 | use_buffer = *buffptr; |
1116 | 0 | use_length = *bufflenptr; |
1117 | 0 | } |
1118 | | |
1119 | | /* Call an individual converter, either just once (if a buffer was provided or |
1120 | | just the length is needed), or twice (if a memory allocation is required). */ |
1121 | |
|
1122 | 0 | for (i = 0; i < 2; i++) |
1123 | 0 | { |
1124 | 0 | PCRE2_UCHAR *allocated; |
1125 | 0 | BOOL dummyrun = buffptr == NULL || *buffptr == NULL; |
1126 | |
|
1127 | 0 | switch(pattype) |
1128 | 0 | { |
1129 | 0 | case PCRE2_CONVERT_GLOB: |
1130 | 0 | rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf, |
1131 | 0 | use_buffer, use_length, bufflenptr, dummyrun, ccontext); |
1132 | 0 | break; |
1133 | | |
1134 | 0 | case PCRE2_CONVERT_POSIX_BASIC: |
1135 | 0 | case PCRE2_CONVERT_POSIX_EXTENDED: |
1136 | 0 | rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length, |
1137 | 0 | bufflenptr, dummyrun, ccontext); |
1138 | 0 | break; |
1139 | | |
1140 | 0 | default: |
1141 | 0 | *bufflenptr = 0; /* Error offset */ |
1142 | 0 | return PCRE2_ERROR_INTERNAL; |
1143 | 0 | } |
1144 | | |
1145 | 0 | if (rc != 0 || /* Error */ |
1146 | 0 | buffptr == NULL || /* Just the length is required */ |
1147 | 0 | *buffptr != NULL) /* Buffer was provided or allocated */ |
1148 | 0 | return rc; |
1149 | | |
1150 | | /* Allocate memory for the buffer, with hidden space for an allocator at |
1151 | | the start. The next time round the loop runs the conversion for real. */ |
1152 | | |
1153 | 0 | allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) + |
1154 | 0 | (*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext); |
1155 | 0 | if (allocated == NULL) return PCRE2_ERROR_NOMEMORY; |
1156 | 0 | *buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl)); |
1157 | |
|
1158 | 0 | use_buffer = *buffptr; |
1159 | 0 | use_length = *bufflenptr + 1; |
1160 | 0 | } |
1161 | | |
1162 | | /* Control should never get here. */ |
1163 | | |
1164 | 0 | return PCRE2_ERROR_INTERNAL; |
1165 | 0 | } |
1166 | | |
1167 | | |
1168 | | /************************************************* |
1169 | | * Free converted pattern * |
1170 | | *************************************************/ |
1171 | | |
1172 | | /* This frees a converted pattern that was put in newly-allocated memory. |
1173 | | |
1174 | | Argument: the converted pattern |
1175 | | Returns: nothing |
1176 | | */ |
1177 | | |
1178 | | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION |
1179 | | pcre2_converted_pattern_free(PCRE2_UCHAR *converted) |
1180 | 0 | { |
1181 | 0 | if (converted != NULL) |
1182 | 0 | { |
1183 | 0 | pcre2_memctl *memctl = |
1184 | 0 | (pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl)); |
1185 | 0 | memctl->free(memctl, memctl->memory_data); |
1186 | 0 | } |
1187 | 0 | } |
1188 | | |
1189 | | /* End of pcre2_convert.c */ |