Coverage Report

Created: 2025-06-13 06:43

/src/php-src/ext/pcre/pcre2lib/pcre2_convert.c
Line
Count
Source (jump to first uncovered line)
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2016-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#include "pcre2_internal.h"
47
48
0
#define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \
49
0
  PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
50
51
0
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
52
0
  PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
53
0
  PCRE2_CONVERT_GLOB_NO_STARSTAR| \
54
0
  TYPE_OPTIONS)
55
56
0
#define DUMMY_BUFFER_SIZE 100
57
58
/* Generated pattern fragments */
59
60
#define STR_BACKSLASH_A STR_BACKSLASH STR_A
61
#define STR_BACKSLASH_z STR_BACKSLASH STR_z
62
#define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET
63
#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
64
#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
65
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
66
#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
67
68
/* States for POSIX processing */
69
70
enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
71
       POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
72
73
/* Macro to add a character string to the output buffer, checking for overflow. */
74
75
#define PUTCHARS(string) \
76
0
  { \
77
0
  for (const char *s = string; *s != 0; s++) \
78
0
    { \
79
0
    if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
80
0
    *p++ = *s; \
81
0
    } \
82
0
  }
83
84
/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
85
86
static const char *pcre2_escaped_literals =
87
  STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
88
  STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
89
  STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
90
  STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
91
  STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
92
93
/* Recognized escaped metacharacters in POSIX basic patterns. */
94
95
static const char *posix_meta_escapes =
96
  STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
97
  STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
98
  STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
99
100
101
102
/*************************************************
103
*           Convert a POSIX pattern              *
104
*************************************************/
105
106
/* This function handles both basic and extended POSIX patterns.
107
108
Arguments:
109
  pattype        the pattern type
110
  pattern        the pattern
111
  plength        length in code units
112
  utf            TRUE if UTF
113
  use_buffer     where to put the output
114
  use_length     length of use_buffer
115
  bufflenptr     where to put the used length
116
  dummyrun       TRUE if a dummy run
117
  ccontext       the convert context
118
119
Returns:         0 => success
120
                !0 => error code
121
*/
122
123
static int
124
convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
125
  BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
126
  PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
127
0
{
128
0
PCRE2_SPTR posix = pattern;
129
0
PCRE2_UCHAR *p = use_buffer;
130
0
PCRE2_UCHAR *pp = p;
131
0
PCRE2_UCHAR *endp = p + use_length - 1;  /* Allow for trailing zero */
132
0
PCRE2_SIZE convlength = 0;
133
134
0
uint32_t bracount = 0;
135
0
uint32_t posix_state = POSIX_START_REGEX;
136
0
uint32_t lastspecial = 0;
137
0
BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
138
0
BOOL nextisliteral = FALSE;
139
140
0
(void)utf;       /* Not used when Unicode not supported */
141
0
(void)ccontext;  /* Not currently used */
142
143
/* Initialize default for error offset as end of input. */
144
145
0
*bufflenptr = plength;
146
0
PUTCHARS(STR_STAR_NUL);
147
148
/* Now scan the input. */
149
150
0
while (plength > 0)
151
0
  {
152
0
  uint32_t c, sc;
153
0
  int clength = 1;
154
155
  /* Add in the length of the last item, then, if in the dummy run, pull the
156
  pointer back to the start of the (temporary) buffer and then remember the
157
  start of the next item. */
158
159
0
  convlength += p - pp;
160
0
  if (dummyrun) p = use_buffer;
161
0
  pp = p;
162
163
  /* Pick up the next character */
164
165
#ifndef SUPPORT_UNICODE
166
  c = *posix;
167
#else
168
0
  GETCHARLENTEST(c, posix, clength);
169
0
#endif
170
0
  posix += clength;
171
0
  plength -= clength;
172
173
0
  sc = nextisliteral? 0 : c;
174
0
  nextisliteral = FALSE;
175
176
  /* Handle a character within a class. */
177
178
0
  if (posix_state >= POSIX_CLASS_NOT_STARTED)
179
0
    {
180
0
    if (c == CHAR_RIGHT_SQUARE_BRACKET)
181
0
      {
182
0
      PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
183
0
      posix_state = POSIX_NOT_BRACKET;
184
0
      }
185
186
    /* Not the end of the class */
187
188
0
    else
189
0
      {
190
0
      switch (posix_state)
191
0
        {
192
0
        case POSIX_CLASS_STARTED:
193
0
        if (c <= 127 && islower(c)) break;  /* Remain in started state */
194
0
        posix_state = POSIX_CLASS_NOT_STARTED;
195
0
        if (c == CHAR_COLON  && plength > 0 &&
196
0
            *posix == CHAR_RIGHT_SQUARE_BRACKET)
197
0
          {
198
0
          PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
199
0
          plength--;
200
0
          posix++;
201
0
          continue;    /* With next character after :] */
202
0
          }
203
        /* Fall through */
204
205
0
        case POSIX_CLASS_NOT_STARTED:
206
0
        if (c == CHAR_LEFT_SQUARE_BRACKET)
207
0
          posix_state = POSIX_CLASS_STARTING;
208
0
        break;
209
210
0
        case POSIX_CLASS_STARTING:
211
0
        if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
212
0
        break;
213
0
        }
214
215
0
      if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
216
0
      if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
217
0
      memcpy(p, posix - clength, CU2BYTES(clength));
218
0
      p += clength;
219
0
      }
220
0
    }
221
222
  /* Handle a character not within a class. */
223
224
0
  else switch(sc)
225
0
    {
226
0
    case CHAR_LEFT_SQUARE_BRACKET:
227
0
    PUTCHARS(STR_LEFT_SQUARE_BRACKET);
228
229
#ifdef NEVER
230
    /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
231
    support) but they are not part of POSIX 1003.1. */
232
233
    if (plength >= 6)
234
      {
235
      if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
236
          posix[1] == CHAR_COLON &&
237
          (posix[2] == CHAR_LESS_THAN_SIGN ||
238
           posix[2] == CHAR_GREATER_THAN_SIGN) &&
239
          posix[3] == CHAR_COLON &&
240
          posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
241
          posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
242
        {
243
        if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
244
        memcpy(p, posix, CU2BYTES(6));
245
        p += 6;
246
        posix += 6;
247
        plength -= 6;
248
        continue;  /* With next character */
249
        }
250
      }
251
#endif
252
253
    /* Handle start of "normal" character classes */
254
255
0
    posix_state = POSIX_CLASS_NOT_STARTED;
256
257
    /* Handle ^ and ] as first characters */
258
259
0
    if (plength > 0)
260
0
      {
261
0
      if (*posix == CHAR_CIRCUMFLEX_ACCENT)
262
0
        {
263
0
        posix++;
264
0
        plength--;
265
0
        PUTCHARS(STR_CIRCUMFLEX_ACCENT);
266
0
        }
267
0
      if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
268
0
        {
269
0
        posix++;
270
0
        plength--;
271
0
        PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
272
0
        }
273
0
      }
274
0
    break;
275
276
0
    case CHAR_BACKSLASH:
277
0
    if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;
278
0
    if (extended) nextisliteral = TRUE; else
279
0
      {
280
0
      if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
281
0
        {
282
0
        if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
283
0
        if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
284
0
        lastspecial = *p++ = *posix++;
285
0
        plength--;
286
0
        }
287
0
      else nextisliteral = TRUE;
288
0
      }
289
0
    break;
290
291
0
    case CHAR_RIGHT_PARENTHESIS:
292
0
    if (!extended || bracount == 0) goto ESCAPE_LITERAL;
293
0
    bracount--;
294
0
    goto COPY_SPECIAL;
295
296
0
    case CHAR_LEFT_PARENTHESIS:
297
0
    bracount++;
298
    /* Fall through */
299
300
0
    case CHAR_QUESTION_MARK:
301
0
    case CHAR_PLUS:
302
0
    case CHAR_LEFT_CURLY_BRACKET:
303
0
    case CHAR_RIGHT_CURLY_BRACKET:
304
0
    case CHAR_VERTICAL_LINE:
305
0
    if (!extended) goto ESCAPE_LITERAL;
306
    /* Fall through */
307
308
0
    case CHAR_DOT:
309
0
    case CHAR_DOLLAR_SIGN:
310
0
    posix_state = POSIX_NOT_BRACKET;
311
0
    COPY_SPECIAL:
312
0
    lastspecial = c;
313
0
    if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
314
0
    *p++ = c;
315
0
    break;
316
317
0
    case CHAR_ASTERISK:
318
0
    if (lastspecial != CHAR_ASTERISK)
319
0
      {
320
0
      if (!extended && (posix_state < POSIX_NOT_BRACKET ||
321
0
          lastspecial == CHAR_LEFT_PARENTHESIS))
322
0
        goto ESCAPE_LITERAL;
323
0
      goto COPY_SPECIAL;
324
0
      }
325
0
    break;   /* Ignore second and subsequent asterisks */
326
327
0
    case CHAR_CIRCUMFLEX_ACCENT:
328
0
    if (extended) goto COPY_SPECIAL;
329
0
    if (posix_state == POSIX_START_REGEX ||
330
0
        lastspecial == CHAR_LEFT_PARENTHESIS)
331
0
      {
332
0
      posix_state = POSIX_ANCHORED;
333
0
      goto COPY_SPECIAL;
334
0
      }
335
    /* Fall through */
336
337
0
    default:
338
0
    if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
339
0
      {
340
0
      ESCAPE_LITERAL:
341
0
      PUTCHARS(STR_BACKSLASH);
342
0
      }
343
0
    lastspecial = 0xff;  /* Indicates nothing special */
344
0
    if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
345
0
    memcpy(p, posix - clength, CU2BYTES(clength));
346
0
    p += clength;
347
0
    posix_state = POSIX_NOT_BRACKET;
348
0
    break;
349
0
    }
350
0
  }
351
352
0
if (posix_state >= POSIX_CLASS_NOT_STARTED)
353
0
  return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
354
0
convlength += p - pp;        /* Final segment */
355
0
*bufflenptr = convlength;
356
0
*p++ = 0;
357
0
return 0;
358
0
}
359
360
361
/*************************************************
362
*           Convert a glob pattern               *
363
*************************************************/
364
365
/* Context for writing the output into a buffer. */
366
367
typedef struct pcre2_output_context {
368
  PCRE2_UCHAR *output;                  /* current output position */
369
  PCRE2_SPTR output_end;                /* output end */
370
  PCRE2_SIZE output_size;               /* size of the output */
371
  uint8_t out_str[8];                   /* string copied to the output */
372
} pcre2_output_context;
373
374
375
/* Write a character into the output.
376
377
Arguments:
378
  out            output context
379
  chr            the next character
380
*/
381
382
static void
383
convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr)
384
0
{
385
0
out->output_size++;
386
387
0
if (out->output < out->output_end)
388
0
  *out->output++ = chr;
389
0
}
390
391
392
/* Write a string into the output.
393
394
Arguments:
395
  out            output context
396
  length         length of out->out_str
397
*/
398
399
static void
400
convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length)
401
0
{
402
0
uint8_t *out_str = out->out_str;
403
0
PCRE2_UCHAR *output = out->output;
404
0
PCRE2_SPTR output_end = out->output_end;
405
0
PCRE2_SIZE output_size = out->output_size;
406
407
0
do
408
0
  {
409
0
  output_size++;
410
411
0
  if (output < output_end)
412
0
    *output++ = *out_str++;
413
0
  }
414
0
while (--length != 0);
415
416
0
out->output = output;
417
0
out->output_size = output_size;
418
0
}
419
420
421
/* Prints the separator into the output.
422
423
Arguments:
424
  out            output context
425
  separator      glob separator
426
  with_escape    backslash is needed before separator
427
*/
428
429
static void
430
convert_glob_print_separator(pcre2_output_context *out,
431
  PCRE2_UCHAR separator, BOOL with_escape)
432
0
{
433
0
if (with_escape)
434
0
  convert_glob_write(out, CHAR_BACKSLASH);
435
436
0
convert_glob_write(out, separator);
437
0
}
438
439
440
/* Prints a wildcard into the output.
441
442
Arguments:
443
  out            output context
444
  separator      glob separator
445
  with_escape    backslash is needed before separator
446
*/
447
448
static void
449
convert_glob_print_wildcard(pcre2_output_context *out,
450
  PCRE2_UCHAR separator, BOOL with_escape)
451
0
{
452
0
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
453
0
out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
454
0
convert_glob_write_str(out, 2);
455
456
0
convert_glob_print_separator(out, separator, with_escape);
457
458
0
convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);
459
0
}
460
461
462
/* Parse a posix class.
463
464
Arguments:
465
  from           starting point of scanning the range
466
  pattern_end    end of pattern
467
  out            output context
468
469
Returns:  >0 => class index
470
          0  => malformed class
471
*/
472
473
static int
474
convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
475
  pcre2_output_context *out)
476
0
{
477
0
static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
478
0
  "graph:lower:print:punct:space:upper:word:xdigit:";
479
0
PCRE2_SPTR start = *from + 1;
480
0
PCRE2_SPTR pattern = start;
481
0
const char *class_ptr;
482
0
PCRE2_UCHAR c;
483
0
int class_index;
484
485
0
while (TRUE)
486
0
  {
487
0
  if (pattern >= pattern_end) return 0;
488
489
0
  c = *pattern++;
490
491
0
  if (c < CHAR_a || c > CHAR_z) break;
492
0
  }
493
494
0
if (c != CHAR_COLON || pattern >= pattern_end ||
495
0
    *pattern != CHAR_RIGHT_SQUARE_BRACKET)
496
0
  return 0;
497
498
0
class_ptr = posix_classes;
499
0
class_index = 1;
500
501
0
while (TRUE)
502
0
  {
503
0
  if (*class_ptr == CHAR_NUL) return 0;
504
505
0
  pattern = start;
506
507
0
  while (*pattern == (PCRE2_UCHAR) *class_ptr)
508
0
    {
509
0
    if (*pattern == CHAR_COLON)
510
0
      {
511
0
      pattern += 2;
512
0
      start -= 2;
513
514
0
      do convert_glob_write(out, *start++); while (start < pattern);
515
516
0
      *from = pattern;
517
0
      return class_index;
518
0
      }
519
0
    pattern++;
520
0
    class_ptr++;
521
0
    }
522
523
0
  while (*class_ptr != CHAR_COLON) class_ptr++;
524
0
  class_ptr++;
525
0
  class_index++;
526
0
  }
527
0
}
528
529
/* Checks whether the character is in the class.
530
531
Arguments:
532
  class_index    class index
533
  c              character
534
535
Returns:   !0 => character is found in the class
536
            0 => otherwise
537
*/
538
539
static BOOL
540
convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
541
0
{
542
#if PCRE2_CODE_UNIT_WIDTH != 8
543
if (c > 0xff)
544
  {
545
  /* ctype functions are not sane for c > 0xff */
546
  return 0;
547
  }
548
#endif
549
550
0
switch (class_index)
551
0
  {
552
0
  case 1: return isalnum(c);
553
0
  case 2: return isalpha(c);
554
0
  case 3: return 1;
555
0
  case 4: return c == CHAR_HT || c == CHAR_SPACE;
556
0
  case 5: return iscntrl(c);
557
0
  case 6: return isdigit(c);
558
0
  case 7: return isgraph(c);
559
0
  case 8: return islower(c);
560
0
  case 9: return isprint(c);
561
0
  case 10: return ispunct(c);
562
0
  case 11: return isspace(c);
563
0
  case 12: return isupper(c);
564
0
  case 13: return isalnum(c) || c == CHAR_UNDERSCORE;
565
0
  default: return isxdigit(c);
566
0
  }
567
0
}
568
569
/* Parse a range of characters.
570
571
Arguments:
572
  from           starting point of scanning the range
573
  pattern_end    end of pattern
574
  out            output context
575
  separator      glob separator
576
  with_escape    backslash is needed before separator
577
578
Returns:         0 => success
579
                !0 => error code
580
*/
581
582
static int
583
convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
584
  pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,
585
  BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)
586
0
{
587
0
BOOL is_negative = FALSE;
588
0
BOOL separator_seen = FALSE;
589
0
BOOL has_prev_c;
590
0
PCRE2_SPTR pattern = *from;
591
0
PCRE2_SPTR char_start = NULL;
592
0
uint32_t c, prev_c;
593
0
int len, class_index;
594
595
0
(void)utf; /* Avoid compiler warning. */
596
597
0
if (pattern >= pattern_end)
598
0
  {
599
0
  *from = pattern;
600
0
  return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
601
0
  }
602
603
0
if (*pattern == CHAR_EXCLAMATION_MARK
604
0
    || *pattern == CHAR_CIRCUMFLEX_ACCENT)
605
0
  {
606
0
  pattern++;
607
608
0
  if (pattern >= pattern_end)
609
0
    {
610
0
    *from = pattern;
611
0
    return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
612
0
    }
613
614
0
  is_negative = TRUE;
615
616
0
  out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
617
0
  out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
618
0
  len = 2;
619
620
0
  if (!no_wildsep)
621
0
    {
622
0
    if (with_escape)
623
0
      {
624
0
      out->out_str[len] = CHAR_BACKSLASH;
625
0
      len++;
626
0
      }
627
0
    out->out_str[len] = (uint8_t) separator;
628
0
    }
629
630
0
  convert_glob_write_str(out, len + 1);
631
0
  }
632
0
else
633
0
  convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);
634
635
0
has_prev_c = FALSE;
636
0
prev_c = 0;
637
638
0
if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
639
0
  {
640
0
  out->out_str[0] = CHAR_BACKSLASH;
641
0
  out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
642
0
  convert_glob_write_str(out, 2);
643
0
  has_prev_c = TRUE;
644
0
  prev_c = CHAR_RIGHT_SQUARE_BRACKET;
645
0
  pattern++;
646
0
  }
647
648
0
while (pattern < pattern_end)
649
0
  {
650
0
  char_start = pattern;
651
0
  GETCHARINCTEST(c, pattern);
652
653
0
  if (c == CHAR_RIGHT_SQUARE_BRACKET)
654
0
    {
655
0
    convert_glob_write(out, c);
656
657
0
    if (!is_negative && !no_wildsep && separator_seen)
658
0
      {
659
0
      out->out_str[0] = CHAR_LEFT_PARENTHESIS;
660
0
      out->out_str[1] = CHAR_QUESTION_MARK;
661
0
      out->out_str[2] = CHAR_LESS_THAN_SIGN;
662
0
      out->out_str[3] = CHAR_EXCLAMATION_MARK;
663
0
      convert_glob_write_str(out, 4);
664
665
0
      convert_glob_print_separator(out, separator, with_escape);
666
0
      convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
667
0
      }
668
669
0
    *from = pattern;
670
0
    return 0;
671
0
    }
672
673
0
  if (pattern >= pattern_end) break;
674
675
0
  if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
676
0
    {
677
0
    *from = pattern;
678
0
    class_index = convert_glob_parse_class(from, pattern_end, out);
679
680
0
    if (class_index != 0)
681
0
      {
682
0
      pattern = *from;
683
684
0
      has_prev_c = FALSE;
685
0
      prev_c = 0;
686
687
0
      if (!is_negative &&
688
0
          convert_glob_char_in_class (class_index, separator))
689
0
        separator_seen = TRUE;
690
0
      continue;
691
0
      }
692
0
    }
693
0
  else if (c == CHAR_MINUS && has_prev_c &&
694
0
           *pattern != CHAR_RIGHT_SQUARE_BRACKET)
695
0
    {
696
0
    convert_glob_write(out, CHAR_MINUS);
697
698
0
    char_start = pattern;
699
0
    GETCHARINCTEST(c, pattern);
700
701
0
    if (pattern >= pattern_end) break;
702
703
0
    if (escape != 0 && c == escape)
704
0
      {
705
0
      char_start = pattern;
706
0
      GETCHARINCTEST(c, pattern);
707
0
      }
708
0
    else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
709
0
      {
710
0
      *from = pattern;
711
0
      return PCRE2_ERROR_CONVERT_SYNTAX;
712
0
      }
713
714
0
    if (prev_c > c)
715
0
      {
716
0
      *from = pattern;
717
0
      return PCRE2_ERROR_CONVERT_SYNTAX;
718
0
      }
719
720
0
    if (prev_c < separator && separator < c) separator_seen = TRUE;
721
722
0
    has_prev_c = FALSE;
723
0
    prev_c = 0;
724
0
    }
725
0
  else
726
0
    {
727
0
    if (escape != 0 && c == escape)
728
0
      {
729
0
      char_start = pattern;
730
0
      GETCHARINCTEST(c, pattern);
731
732
0
      if (pattern >= pattern_end) break;
733
0
      }
734
735
0
    has_prev_c = TRUE;
736
0
    prev_c = c;
737
0
    }
738
739
0
  if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET ||
740
0
      c == CHAR_BACKSLASH || c == CHAR_MINUS)
741
0
    convert_glob_write(out, CHAR_BACKSLASH);
742
743
0
  if (c == separator) separator_seen = TRUE;
744
745
0
  do convert_glob_write(out, *char_start++); while (char_start < pattern);
746
0
  }
747
748
0
*from = pattern;
749
0
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
750
0
}
751
752
753
/* Prints a (*COMMIT) into the output.
754
755
Arguments:
756
  out            output context
757
*/
758
759
static void
760
convert_glob_print_commit(pcre2_output_context *out)
761
0
{
762
0
out->out_str[0] = CHAR_LEFT_PARENTHESIS;
763
0
out->out_str[1] = CHAR_ASTERISK;
764
0
out->out_str[2] = CHAR_C;
765
0
out->out_str[3] = CHAR_O;
766
0
out->out_str[4] = CHAR_M;
767
0
out->out_str[5] = CHAR_M;
768
0
out->out_str[6] = CHAR_I;
769
0
out->out_str[7] = CHAR_T;
770
0
convert_glob_write_str(out, 8);
771
0
convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
772
0
}
773
774
775
/* Bash glob converter.
776
777
Arguments:
778
  pattype        the pattern type
779
  pattern        the pattern
780
  plength        length in code units
781
  utf            TRUE if UTF
782
  use_buffer     where to put the output
783
  use_length     length of use_buffer
784
  bufflenptr     where to put the used length
785
  dummyrun       TRUE if a dummy run
786
  ccontext       the convert context
787
788
Returns:         0 => success
789
                !0 => error code
790
*/
791
792
static int
793
convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,
794
  BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
795
  PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
796
0
{
797
0
pcre2_output_context out;
798
0
PCRE2_SPTR pattern_start = pattern;
799
0
PCRE2_SPTR pattern_end = pattern + plength;
800
0
PCRE2_UCHAR separator = ccontext->glob_separator;
801
0
PCRE2_UCHAR escape = ccontext->glob_escape;
802
0
PCRE2_UCHAR c;
803
0
BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
804
0
BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
805
0
BOOL in_atomic = FALSE;
806
0
BOOL after_starstar = FALSE;
807
0
BOOL no_slash_z = FALSE;
808
0
BOOL with_escape, is_start, after_separator;
809
0
int result = 0;
810
811
0
(void)utf; /* Avoid compiler warning. */
812
813
0
#ifdef SUPPORT_UNICODE
814
0
if (utf && (separator >= 128 || escape >= 128))
815
0
  {
816
  /* Currently only ASCII characters are supported. */
817
0
  *bufflenptr = 0;
818
0
  return PCRE2_ERROR_CONVERT_SYNTAX;
819
0
  }
820
0
#endif
821
822
0
with_escape = strchr(pcre2_escaped_literals, separator) != NULL;
823
824
/* Initialize default for error offset as end of input. */
825
0
out.output = use_buffer;
826
0
out.output_end = use_buffer + use_length;
827
0
out.output_size = 0;
828
829
0
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
830
0
out.out_str[1] = CHAR_QUESTION_MARK;
831
0
out.out_str[2] = CHAR_s;
832
0
out.out_str[3] = CHAR_RIGHT_PARENTHESIS;
833
0
convert_glob_write_str(&out, 4);
834
835
0
is_start = TRUE;
836
837
0
if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK)
838
0
  {
839
0
  if (no_wildsep)
840
0
    is_start = FALSE;
841
0
  else if (!no_starstar && pattern + 1 < pattern_end &&
842
0
           pattern[1] == CHAR_ASTERISK)
843
0
    is_start = FALSE;
844
0
  }
845
846
0
if (is_start)
847
0
  {
848
0
  out.out_str[0] = CHAR_BACKSLASH;
849
0
  out.out_str[1] = CHAR_A;
850
0
  convert_glob_write_str(&out, 2);
851
0
  }
852
853
0
while (pattern < pattern_end)
854
0
  {
855
0
  c = *pattern++;
856
857
0
  if (c == CHAR_ASTERISK)
858
0
    {
859
0
    is_start = pattern == pattern_start + 1;
860
861
0
    if (in_atomic)
862
0
      {
863
0
      convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
864
0
      in_atomic = FALSE;
865
0
      }
866
867
0
    if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)
868
0
      {
869
0
      after_separator = is_start || (pattern[-2] == separator);
870
871
0
      do pattern++; while (pattern < pattern_end &&
872
0
                           *pattern == CHAR_ASTERISK);
873
874
0
      if (pattern >= pattern_end)
875
0
        {
876
0
        no_slash_z = TRUE;
877
0
        break;
878
0
        }
879
880
0
      after_starstar = TRUE;
881
882
0
      if (after_separator && escape != 0 && *pattern == escape &&
883
0
          pattern + 1 < pattern_end && pattern[1] == separator)
884
0
        pattern++;
885
886
0
      if (is_start)
887
0
        {
888
0
        if (*pattern != separator) continue;
889
890
0
        out.out_str[0] = CHAR_LEFT_PARENTHESIS;
891
0
        out.out_str[1] = CHAR_QUESTION_MARK;
892
0
        out.out_str[2] = CHAR_COLON;
893
0
        out.out_str[3] = CHAR_BACKSLASH;
894
0
        out.out_str[4] = CHAR_A;
895
0
        out.out_str[5] = CHAR_VERTICAL_LINE;
896
0
        convert_glob_write_str(&out, 6);
897
898
0
        convert_glob_print_separator(&out, separator, with_escape);
899
0
        convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
900
901
0
        pattern++;
902
0
        continue;
903
0
        }
904
905
0
      convert_glob_print_commit(&out);
906
907
0
      if (!after_separator || *pattern != separator)
908
0
        {
909
0
        out.out_str[0] = CHAR_DOT;
910
0
        out.out_str[1] = CHAR_ASTERISK;
911
0
        out.out_str[2] = CHAR_QUESTION_MARK;
912
0
        convert_glob_write_str(&out, 3);
913
0
        continue;
914
0
        }
915
916
0
      out.out_str[0] = CHAR_LEFT_PARENTHESIS;
917
0
      out.out_str[1] = CHAR_QUESTION_MARK;
918
0
      out.out_str[2] = CHAR_COLON;
919
0
      out.out_str[3] = CHAR_DOT;
920
0
      out.out_str[4] = CHAR_ASTERISK;
921
0
      out.out_str[5] = CHAR_QUESTION_MARK;
922
923
0
      convert_glob_write_str(&out, 6);
924
925
0
      convert_glob_print_separator(&out, separator, with_escape);
926
927
0
      out.out_str[0] = CHAR_RIGHT_PARENTHESIS;
928
0
      out.out_str[1] = CHAR_QUESTION_MARK;
929
0
      out.out_str[2] = CHAR_QUESTION_MARK;
930
0
      convert_glob_write_str(&out, 3);
931
932
0
      pattern++;
933
0
      continue;
934
0
      }
935
936
0
    if (pattern < pattern_end && *pattern == CHAR_ASTERISK)
937
0
      {
938
0
      do pattern++; while (pattern < pattern_end &&
939
0
                           *pattern == CHAR_ASTERISK);
940
0
      }
941
942
0
    if (no_wildsep)
943
0
      {
944
0
      if (pattern >= pattern_end)
945
0
        {
946
0
        no_slash_z = TRUE;
947
0
        break;
948
0
        }
949
950
      /* Start check must be after the end check. */
951
0
      if (is_start) continue;
952
0
      }
953
954
0
    if (!is_start)
955
0
      {
956
0
      if (after_starstar)
957
0
        {
958
0
        out.out_str[0] = CHAR_LEFT_PARENTHESIS;
959
0
        out.out_str[1] = CHAR_QUESTION_MARK;
960
0
        out.out_str[2] = CHAR_GREATER_THAN_SIGN;
961
0
        convert_glob_write_str(&out, 3);
962
0
        in_atomic = TRUE;
963
0
        }
964
0
      else
965
0
        convert_glob_print_commit(&out);
966
0
      }
967
968
0
    if (no_wildsep)
969
0
      convert_glob_write(&out, CHAR_DOT);
970
0
    else
971
0
      convert_glob_print_wildcard(&out, separator, with_escape);
972
973
0
    out.out_str[0] = CHAR_ASTERISK;
974
0
    out.out_str[1] = CHAR_QUESTION_MARK;
975
0
    if (pattern >= pattern_end)
976
0
      out.out_str[1] = CHAR_PLUS;
977
0
    convert_glob_write_str(&out, 2);
978
0
    continue;
979
0
    }
980
981
0
  if (c == CHAR_QUESTION_MARK)
982
0
    {
983
0
    if (no_wildsep)
984
0
      convert_glob_write(&out, CHAR_DOT);
985
0
    else
986
0
      convert_glob_print_wildcard(&out, separator, with_escape);
987
0
    continue;
988
0
    }
989
990
0
  if (c == CHAR_LEFT_SQUARE_BRACKET)
991
0
    {
992
0
    result = convert_glob_parse_range(&pattern, pattern_end,
993
0
      &out, utf, separator, with_escape, escape, no_wildsep);
994
0
    if (result != 0) break;
995
0
    continue;
996
0
    }
997
998
0
  if (escape != 0 && c == escape)
999
0
    {
1000
0
    if (pattern >= pattern_end)
1001
0
      {
1002
0
      result = PCRE2_ERROR_CONVERT_SYNTAX;
1003
0
      break;
1004
0
      }
1005
0
    c = *pattern++;
1006
0
    }
1007
1008
0
  if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
1009
0
    convert_glob_write(&out, CHAR_BACKSLASH);
1010
1011
0
  convert_glob_write(&out, c);
1012
0
  }
1013
1014
0
if (result == 0)
1015
0
  {
1016
0
  if (!no_slash_z)
1017
0
    {
1018
0
    out.out_str[0] = CHAR_BACKSLASH;
1019
0
    out.out_str[1] = CHAR_z;
1020
0
    convert_glob_write_str(&out, 2);
1021
0
    }
1022
1023
0
  if (in_atomic)
1024
0
    convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
1025
1026
0
  convert_glob_write(&out, CHAR_NUL);
1027
1028
0
  if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
1029
0
    result = PCRE2_ERROR_NOMEMORY;
1030
0
  }
1031
1032
0
if (result != 0)
1033
0
  {
1034
0
  *bufflenptr = pattern - pattern_start;
1035
0
  return result;
1036
0
  }
1037
1038
0
*bufflenptr = out.output_size - 1;
1039
0
return 0;
1040
0
}
1041
1042
1043
/*************************************************
1044
*                Convert pattern                 *
1045
*************************************************/
1046
1047
/* This is the external-facing function for converting other forms of pattern
1048
into PCRE2 regular expression patterns. On error, the bufflenptr argument is
1049
used to return an offset in the original pattern.
1050
1051
Arguments:
1052
  pattern     the input pattern
1053
  plength     length of input, or PCRE2_ZERO_TERMINATED
1054
  options     options bits
1055
  buffptr     pointer to pointer to output buffer
1056
  bufflenptr  pointer to length of output buffer
1057
  ccontext    convert context or NULL
1058
1059
Returns:      0 for success, else an error code (+ve or -ve)
1060
*/
1061
1062
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
1063
pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
1064
  PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
1065
  pcre2_convert_context *ccontext)
1066
0
{
1067
0
int rc;
1068
0
PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
1069
0
PCRE2_UCHAR *use_buffer = dummy_buffer;
1070
0
PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
1071
0
BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
1072
0
uint32_t pattype = options & TYPE_OPTIONS;
1073
1074
0
if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
1075
1076
0
if ((options & ~ALL_OPTIONS) != 0 ||        /* Undefined bit set */
1077
0
    (pattype & (~pattype+1)) != pattype ||  /* More than one type set */
1078
0
    pattype == 0)                           /* No type set */
1079
0
  {
1080
0
  *bufflenptr = 0;                          /* Error offset */
1081
0
  return PCRE2_ERROR_BADOPTION;
1082
0
  }
1083
1084
0
if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);
1085
0
if (ccontext == NULL) ccontext =
1086
0
  (pcre2_convert_context *)(&PRIV(default_convert_context));
1087
1088
/* Check UTF if required. */
1089
1090
#ifndef SUPPORT_UNICODE
1091
if (utf)
1092
  {
1093
  *bufflenptr = 0;  /* Error offset */
1094
  return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
1095
  }
1096
#else
1097
0
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
1098
0
  {
1099
0
  PCRE2_SIZE erroroffset;
1100
0
  rc = PRIV(valid_utf)(pattern, plength, &erroroffset);
1101
0
  if (rc != 0)
1102
0
    {
1103
0
    *bufflenptr = erroroffset;
1104
0
    return rc;
1105
0
    }
1106
0
  }
1107
0
#endif
1108
1109
/* If buffptr is not NULL, and what it points to is not NULL, we are being
1110
provided with a buffer and a length, so set them as the buffer to use. */
1111
1112
0
if (buffptr != NULL && *buffptr != NULL)
1113
0
  {
1114
0
  use_buffer = *buffptr;
1115
0
  use_length = *bufflenptr;
1116
0
  }
1117
1118
/* Call an individual converter, either just once (if a buffer was provided or
1119
just the length is needed), or twice (if a memory allocation is required). */
1120
1121
0
for (int i = 0; i < 2; i++)
1122
0
  {
1123
0
  PCRE2_UCHAR *allocated;
1124
0
  BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
1125
1126
0
  switch(pattype)
1127
0
    {
1128
0
    case PCRE2_CONVERT_GLOB:
1129
0
    rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf,
1130
0
      use_buffer, use_length, bufflenptr, dummyrun, ccontext);
1131
0
    break;
1132
1133
0
    case PCRE2_CONVERT_POSIX_BASIC:
1134
0
    case PCRE2_CONVERT_POSIX_EXTENDED:
1135
0
    rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,
1136
0
      bufflenptr, dummyrun, ccontext);
1137
0
    break;
1138
1139
0
    default:
1140
0
    goto EXIT;
1141
0
    }
1142
1143
0
  if (rc != 0 ||           /* Error */
1144
0
      buffptr == NULL ||   /* Just the length is required */
1145
0
      *buffptr != NULL)    /* Buffer was provided or allocated */
1146
0
    return rc;
1147
1148
  /* Allocate memory for the buffer, with hidden space for an allocator at
1149
  the start. The next time round the loop runs the conversion for real. */
1150
1151
0
  allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
1152
0
    (*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);
1153
0
  if (allocated == NULL) return PCRE2_ERROR_NOMEMORY;
1154
0
  *buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));
1155
1156
0
  use_buffer = *buffptr;
1157
0
  use_length = *bufflenptr + 1;
1158
0
  }
1159
1160
/* Something went terribly wrong. Trigger an assert and return an error */
1161
0
PCRE2_DEBUG_UNREACHABLE();
1162
1163
0
EXIT:
1164
1165
0
*bufflenptr = 0;  /* Error offset */
1166
0
return PCRE2_ERROR_INTERNAL;
1167
0
}
1168
1169
1170
/*************************************************
1171
*            Free converted pattern              *
1172
*************************************************/
1173
1174
/* This frees a converted pattern that was put in newly-allocated memory.
1175
1176
Argument:   the converted pattern
1177
Returns:    nothing
1178
*/
1179
1180
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
1181
pcre2_converted_pattern_free(PCRE2_UCHAR *converted)
1182
0
{
1183
0
if (converted != NULL)
1184
0
  {
1185
0
  pcre2_memctl *memctl =
1186
0
    (pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));
1187
0
  memctl->free(memctl, memctl->memory_data);
1188
0
  }
1189
0
}
1190
1191
/* End of pcre2_convert.c */