Coverage Report

Created: 2025-11-16 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/pcre/pcre2lib/pcre2_convert.c
Line
Count
Source
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2016-2022 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#include "pcre2_internal.h"
47
48
0
#define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \
49
0
  PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
50
51
0
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
52
0
  PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
53
0
  PCRE2_CONVERT_GLOB_NO_STARSTAR| \
54
0
  TYPE_OPTIONS)
55
56
0
#define DUMMY_BUFFER_SIZE 100
57
58
/* Generated pattern fragments */
59
60
#define STR_BACKSLASH_A STR_BACKSLASH STR_A
61
#define STR_BACKSLASH_z STR_BACKSLASH STR_z
62
#define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET
63
#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
64
#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
65
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
66
#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
67
68
/* States for POSIX processing */
69
70
enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
71
       POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
72
73
/* Macro to add a character string to the output buffer, checking for overflow. */
74
75
#define PUTCHARS(string) \
76
0
  { \
77
0
  for (s = (char *)(string); *s != 0; s++) \
78
0
    { \
79
0
    if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
80
0
    *p++ = *s; \
81
0
    } \
82
0
  }
83
84
/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
85
86
static const char *pcre2_escaped_literals =
87
  STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
88
  STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
89
  STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
90
  STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
91
  STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
92
93
/* Recognized escaped metacharacters in POSIX basic patterns. */
94
95
static const char *posix_meta_escapes =
96
  STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
97
  STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
98
  STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
99
100
101
102
/*************************************************
103
*           Convert a POSIX pattern              *
104
*************************************************/
105
106
/* This function handles both basic and extended POSIX patterns.
107
108
Arguments:
109
  pattype        the pattern type
110
  pattern        the pattern
111
  plength        length in code units
112
  utf            TRUE if UTF
113
  use_buffer     where to put the output
114
  use_length     length of use_buffer
115
  bufflenptr     where to put the used length
116
  dummyrun       TRUE if a dummy run
117
  ccontext       the convert context
118
119
Returns:         0 => success
120
                !0 => error code
121
*/
122
123
static int
124
convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
125
  BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
126
  PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
127
0
{
128
0
char *s;
129
0
PCRE2_SPTR posix = pattern;
130
0
PCRE2_UCHAR *p = use_buffer;
131
0
PCRE2_UCHAR *pp = p;
132
0
PCRE2_UCHAR *endp = p + use_length - 1;  /* Allow for trailing zero */
133
0
PCRE2_SIZE convlength = 0;
134
135
0
uint32_t bracount = 0;
136
0
uint32_t posix_state = POSIX_START_REGEX;
137
0
uint32_t lastspecial = 0;
138
0
BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
139
0
BOOL nextisliteral = FALSE;
140
141
0
(void)utf;       /* Not used when Unicode not supported */
142
0
(void)ccontext;  /* Not currently used */
143
144
/* Initialize default for error offset as end of input. */
145
146
0
*bufflenptr = plength;
147
0
PUTCHARS(STR_STAR_NUL);
148
149
/* Now scan the input. */
150
151
0
while (plength > 0)
152
0
  {
153
0
  uint32_t c, sc;
154
0
  int clength = 1;
155
156
  /* Add in the length of the last item, then, if in the dummy run, pull the
157
  pointer back to the start of the (temporary) buffer and then remember the
158
  start of the next item. */
159
160
0
  convlength += p - pp;
161
0
  if (dummyrun) p = use_buffer;
162
0
  pp = p;
163
164
  /* Pick up the next character */
165
166
#ifndef SUPPORT_UNICODE
167
  c = *posix;
168
#else
169
0
  GETCHARLENTEST(c, posix, clength);
170
0
#endif
171
0
  posix += clength;
172
0
  plength -= clength;
173
174
0
  sc = nextisliteral? 0 : c;
175
0
  nextisliteral = FALSE;
176
177
  /* Handle a character within a class. */
178
179
0
  if (posix_state >= POSIX_CLASS_NOT_STARTED)
180
0
    {
181
0
    if (c == CHAR_RIGHT_SQUARE_BRACKET)
182
0
      {
183
0
      PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
184
0
      posix_state = POSIX_NOT_BRACKET;
185
0
      }
186
187
    /* Not the end of the class */
188
189
0
    else
190
0
      {
191
0
      switch (posix_state)
192
0
        {
193
0
        case POSIX_CLASS_STARTED:
194
0
        if (c <= 127 && islower(c)) break;  /* Remain in started state */
195
0
        posix_state = POSIX_CLASS_NOT_STARTED;
196
0
        if (c == CHAR_COLON  && plength > 0 &&
197
0
            *posix == CHAR_RIGHT_SQUARE_BRACKET)
198
0
          {
199
0
          PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
200
0
          plength--;
201
0
          posix++;
202
0
          continue;    /* With next character after :] */
203
0
          }
204
        /* Fall through */
205
206
0
        case POSIX_CLASS_NOT_STARTED:
207
0
        if (c == CHAR_LEFT_SQUARE_BRACKET)
208
0
          posix_state = POSIX_CLASS_STARTING;
209
0
        break;
210
211
0
        case POSIX_CLASS_STARTING:
212
0
        if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
213
0
        break;
214
0
        }
215
216
0
      if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
217
0
      if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
218
0
      memcpy(p, posix - clength, CU2BYTES(clength));
219
0
      p += clength;
220
0
      }
221
0
    }
222
223
  /* Handle a character not within a class. */
224
225
0
  else switch(sc)
226
0
    {
227
0
    case CHAR_LEFT_SQUARE_BRACKET:
228
0
    PUTCHARS(STR_LEFT_SQUARE_BRACKET);
229
230
#ifdef NEVER
231
    /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
232
    support) but they are not part of POSIX 1003.1. */
233
234
    if (plength >= 6)
235
      {
236
      if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
237
          posix[1] == CHAR_COLON &&
238
          (posix[2] == CHAR_LESS_THAN_SIGN ||
239
           posix[2] == CHAR_GREATER_THAN_SIGN) &&
240
          posix[3] == CHAR_COLON &&
241
          posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
242
          posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
243
        {
244
        if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
245
        memcpy(p, posix, CU2BYTES(6));
246
        p += 6;
247
        posix += 6;
248
        plength -= 6;
249
        continue;  /* With next character */
250
        }
251
      }
252
#endif
253
254
    /* Handle start of "normal" character classes */
255
256
0
    posix_state = POSIX_CLASS_NOT_STARTED;
257
258
    /* Handle ^ and ] as first characters */
259
260
0
    if (plength > 0)
261
0
      {
262
0
      if (*posix == CHAR_CIRCUMFLEX_ACCENT)
263
0
        {
264
0
        posix++;
265
0
        plength--;
266
0
        PUTCHARS(STR_CIRCUMFLEX_ACCENT);
267
0
        }
268
0
      if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
269
0
        {
270
0
        posix++;
271
0
        plength--;
272
0
        PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
273
0
        }
274
0
      }
275
0
    break;
276
277
0
    case CHAR_BACKSLASH:
278
0
    if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;
279
0
    if (extended) nextisliteral = TRUE; else
280
0
      {
281
0
      if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
282
0
        {
283
0
        if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
284
0
        if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
285
0
        lastspecial = *p++ = *posix++;
286
0
        plength--;
287
0
        }
288
0
      else nextisliteral = TRUE;
289
0
      }
290
0
    break;
291
292
0
    case CHAR_RIGHT_PARENTHESIS:
293
0
    if (!extended || bracount == 0) goto ESCAPE_LITERAL;
294
0
    bracount--;
295
0
    goto COPY_SPECIAL;
296
297
0
    case CHAR_LEFT_PARENTHESIS:
298
0
    bracount++;
299
    /* Fall through */
300
301
0
    case CHAR_QUESTION_MARK:
302
0
    case CHAR_PLUS:
303
0
    case CHAR_LEFT_CURLY_BRACKET:
304
0
    case CHAR_RIGHT_CURLY_BRACKET:
305
0
    case CHAR_VERTICAL_LINE:
306
0
    if (!extended) goto ESCAPE_LITERAL;
307
    /* Fall through */
308
309
0
    case CHAR_DOT:
310
0
    case CHAR_DOLLAR_SIGN:
311
0
    posix_state = POSIX_NOT_BRACKET;
312
0
    COPY_SPECIAL:
313
0
    lastspecial = c;
314
0
    if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
315
0
    *p++ = c;
316
0
    break;
317
318
0
    case CHAR_ASTERISK:
319
0
    if (lastspecial != CHAR_ASTERISK)
320
0
      {
321
0
      if (!extended && (posix_state < POSIX_NOT_BRACKET ||
322
0
          lastspecial == CHAR_LEFT_PARENTHESIS))
323
0
        goto ESCAPE_LITERAL;
324
0
      goto COPY_SPECIAL;
325
0
      }
326
0
    break;   /* Ignore second and subsequent asterisks */
327
328
0
    case CHAR_CIRCUMFLEX_ACCENT:
329
0
    if (extended) goto COPY_SPECIAL;
330
0
    if (posix_state == POSIX_START_REGEX ||
331
0
        lastspecial == CHAR_LEFT_PARENTHESIS)
332
0
      {
333
0
      posix_state = POSIX_ANCHORED;
334
0
      goto COPY_SPECIAL;
335
0
      }
336
    /* Fall through */
337
338
0
    default:
339
0
    if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
340
0
      {
341
0
      ESCAPE_LITERAL:
342
0
      PUTCHARS(STR_BACKSLASH);
343
0
      }
344
0
    lastspecial = 0xff;  /* Indicates nothing special */
345
0
    if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
346
0
    memcpy(p, posix - clength, CU2BYTES(clength));
347
0
    p += clength;
348
0
    posix_state = POSIX_NOT_BRACKET;
349
0
    break;
350
0
    }
351
0
  }
352
353
0
if (posix_state >= POSIX_CLASS_NOT_STARTED)
354
0
  return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
355
0
convlength += p - pp;        /* Final segment */
356
0
*bufflenptr = convlength;
357
0
*p++ = 0;
358
0
return 0;
359
0
}
360
361
362
/*************************************************
363
*           Convert a glob pattern               *
364
*************************************************/
365
366
/* Context for writing the output into a buffer. */
367
368
typedef struct pcre2_output_context {
369
  PCRE2_UCHAR *output;                  /* current output position */
370
  PCRE2_SPTR output_end;                /* output end */
371
  PCRE2_SIZE output_size;               /* size of the output */
372
  uint8_t out_str[8];                   /* string copied to the output */
373
} pcre2_output_context;
374
375
376
/* Write a character into the output.
377
378
Arguments:
379
  out            output context
380
  chr            the next character
381
*/
382
383
static void
384
convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr)
385
0
{
386
0
out->output_size++;
387
388
0
if (out->output < out->output_end)
389
0
  *out->output++ = chr;
390
0
}
391
392
393
/* Write a string into the output.
394
395
Arguments:
396
  out            output context
397
  length         length of out->out_str
398
*/
399
400
static void
401
convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length)
402
0
{
403
0
uint8_t *out_str = out->out_str;
404
0
PCRE2_UCHAR *output = out->output;
405
0
PCRE2_SPTR output_end = out->output_end;
406
0
PCRE2_SIZE output_size = out->output_size;
407
408
0
do
409
0
  {
410
0
  output_size++;
411
412
0
  if (output < output_end)
413
0
    *output++ = *out_str++;
414
0
  }
415
0
while (--length != 0);
416
417
0
out->output = output;
418
0
out->output_size = output_size;
419
0
}
420
421
422
/* Prints the separator into the output.
423
424
Arguments:
425
  out            output context
426
  separator      glob separator
427
  with_escape    backslash is needed before separator
428
*/
429
430
static void
431
convert_glob_print_separator(pcre2_output_context *out,
432
  PCRE2_UCHAR separator, BOOL with_escape)
433
0
{
434
0
if (with_escape)
435
0
  convert_glob_write(out, CHAR_BACKSLASH);
436
437
0
convert_glob_write(out, separator);
438
0
}
439
440
441
/* Prints a wildcard into the output.
442
443
Arguments:
444
  out            output context
445
  separator      glob separator
446
  with_escape    backslash is needed before separator
447
*/
448
449
static void
450
convert_glob_print_wildcard(pcre2_output_context *out,
451
  PCRE2_UCHAR separator, BOOL with_escape)
452
0
{
453
0
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
454
0
out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
455
0
convert_glob_write_str(out, 2);
456
457
0
convert_glob_print_separator(out, separator, with_escape);
458
459
0
convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);
460
0
}
461
462
463
/* Parse a posix class.
464
465
Arguments:
466
  from           starting point of scanning the range
467
  pattern_end    end of pattern
468
  out            output context
469
470
Returns:  >0 => class index
471
          0  => malformed class
472
*/
473
474
static int
475
convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
476
  pcre2_output_context *out)
477
0
{
478
0
static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
479
0
  "graph:lower:print:punct:space:upper:word:xdigit:";
480
0
PCRE2_SPTR start = *from + 1;
481
0
PCRE2_SPTR pattern = start;
482
0
const char *class_ptr;
483
0
PCRE2_UCHAR c;
484
0
int class_index;
485
486
0
while (TRUE)
487
0
  {
488
0
  if (pattern >= pattern_end) return 0;
489
490
0
  c = *pattern++;
491
492
0
  if (c < CHAR_a || c > CHAR_z) break;
493
0
  }
494
495
0
if (c != CHAR_COLON || pattern >= pattern_end ||
496
0
    *pattern != CHAR_RIGHT_SQUARE_BRACKET)
497
0
  return 0;
498
499
0
class_ptr = posix_classes;
500
0
class_index = 1;
501
502
0
while (TRUE)
503
0
  {
504
0
  if (*class_ptr == CHAR_NUL) return 0;
505
506
0
  pattern = start;
507
508
0
  while (*pattern == (PCRE2_UCHAR) *class_ptr)
509
0
    {
510
0
    if (*pattern == CHAR_COLON)
511
0
      {
512
0
      pattern += 2;
513
0
      start -= 2;
514
515
0
      do convert_glob_write(out, *start++); while (start < pattern);
516
517
0
      *from = pattern;
518
0
      return class_index;
519
0
      }
520
0
    pattern++;
521
0
    class_ptr++;
522
0
    }
523
524
0
  while (*class_ptr != CHAR_COLON) class_ptr++;
525
0
  class_ptr++;
526
0
  class_index++;
527
0
  }
528
0
}
529
530
/* Checks whether the character is in the class.
531
532
Arguments:
533
  class_index    class index
534
  c              character
535
536
Returns:   !0 => character is found in the class
537
            0 => otherwise
538
*/
539
540
static BOOL
541
convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
542
0
{
543
#if PCRE2_CODE_UNIT_WIDTH != 8
544
if (c > 0xff)
545
  {
546
  /* ctype functions are not sane for c > 0xff */
547
  return 0;
548
  }
549
#endif
550
551
0
switch (class_index)
552
0
  {
553
0
  case 1: return isalnum(c);
554
0
  case 2: return isalpha(c);
555
0
  case 3: return 1;
556
0
  case 4: return c == CHAR_HT || c == CHAR_SPACE;
557
0
  case 5: return iscntrl(c);
558
0
  case 6: return isdigit(c);
559
0
  case 7: return isgraph(c);
560
0
  case 8: return islower(c);
561
0
  case 9: return isprint(c);
562
0
  case 10: return ispunct(c);
563
0
  case 11: return isspace(c);
564
0
  case 12: return isupper(c);
565
0
  case 13: return isalnum(c) || c == CHAR_UNDERSCORE;
566
0
  default: return isxdigit(c);
567
0
  }
568
0
}
569
570
/* Parse a range of characters.
571
572
Arguments:
573
  from           starting point of scanning the range
574
  pattern_end    end of pattern
575
  out            output context
576
  separator      glob separator
577
  with_escape    backslash is needed before separator
578
579
Returns:         0 => success
580
                !0 => error code
581
*/
582
583
static int
584
convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
585
  pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,
586
  BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)
587
0
{
588
0
BOOL is_negative = FALSE;
589
0
BOOL separator_seen = FALSE;
590
0
BOOL has_prev_c;
591
0
PCRE2_SPTR pattern = *from;
592
0
PCRE2_SPTR char_start = NULL;
593
0
uint32_t c, prev_c;
594
0
int len, class_index;
595
596
0
(void)utf; /* Avoid compiler warning. */
597
598
0
if (pattern >= pattern_end)
599
0
  {
600
0
  *from = pattern;
601
0
  return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
602
0
  }
603
604
0
if (*pattern == CHAR_EXCLAMATION_MARK
605
0
    || *pattern == CHAR_CIRCUMFLEX_ACCENT)
606
0
  {
607
0
  pattern++;
608
609
0
  if (pattern >= pattern_end)
610
0
    {
611
0
    *from = pattern;
612
0
    return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
613
0
    }
614
615
0
  is_negative = TRUE;
616
617
0
  out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
618
0
  out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
619
0
  len = 2;
620
621
0
  if (!no_wildsep)
622
0
    {
623
0
    if (with_escape)
624
0
      {
625
0
      out->out_str[len] = CHAR_BACKSLASH;
626
0
      len++;
627
0
      }
628
0
    out->out_str[len] = (uint8_t) separator;
629
0
    }
630
631
0
  convert_glob_write_str(out, len + 1);
632
0
  }
633
0
else
634
0
  convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);
635
636
0
has_prev_c = FALSE;
637
0
prev_c = 0;
638
639
0
if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
640
0
  {
641
0
  out->out_str[0] = CHAR_BACKSLASH;
642
0
  out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
643
0
  convert_glob_write_str(out, 2);
644
0
  has_prev_c = TRUE;
645
0
  prev_c = CHAR_RIGHT_SQUARE_BRACKET;
646
0
  pattern++;
647
0
  }
648
649
0
while (pattern < pattern_end)
650
0
  {
651
0
  char_start = pattern;
652
0
  GETCHARINCTEST(c, pattern);
653
654
0
  if (c == CHAR_RIGHT_SQUARE_BRACKET)
655
0
    {
656
0
    convert_glob_write(out, c);
657
658
0
    if (!is_negative && !no_wildsep && separator_seen)
659
0
      {
660
0
      out->out_str[0] = CHAR_LEFT_PARENTHESIS;
661
0
      out->out_str[1] = CHAR_QUESTION_MARK;
662
0
      out->out_str[2] = CHAR_LESS_THAN_SIGN;
663
0
      out->out_str[3] = CHAR_EXCLAMATION_MARK;
664
0
      convert_glob_write_str(out, 4);
665
666
0
      convert_glob_print_separator(out, separator, with_escape);
667
0
      convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
668
0
      }
669
670
0
    *from = pattern;
671
0
    return 0;
672
0
    }
673
674
0
  if (pattern >= pattern_end) break;
675
676
0
  if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
677
0
    {
678
0
    *from = pattern;
679
0
    class_index = convert_glob_parse_class(from, pattern_end, out);
680
681
0
    if (class_index != 0)
682
0
      {
683
0
      pattern = *from;
684
685
0
      has_prev_c = FALSE;
686
0
      prev_c = 0;
687
688
0
      if (!is_negative &&
689
0
          convert_glob_char_in_class (class_index, separator))
690
0
        separator_seen = TRUE;
691
0
      continue;
692
0
      }
693
0
    }
694
0
  else if (c == CHAR_MINUS && has_prev_c &&
695
0
           *pattern != CHAR_RIGHT_SQUARE_BRACKET)
696
0
    {
697
0
    convert_glob_write(out, CHAR_MINUS);
698
699
0
    char_start = pattern;
700
0
    GETCHARINCTEST(c, pattern);
701
702
0
    if (pattern >= pattern_end) break;
703
704
0
    if (escape != 0 && c == escape)
705
0
      {
706
0
      char_start = pattern;
707
0
      GETCHARINCTEST(c, pattern);
708
0
      }
709
0
    else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
710
0
      {
711
0
      *from = pattern;
712
0
      return PCRE2_ERROR_CONVERT_SYNTAX;
713
0
      }
714
715
0
    if (prev_c > c)
716
0
      {
717
0
      *from = pattern;
718
0
      return PCRE2_ERROR_CONVERT_SYNTAX;
719
0
      }
720
721
0
    if (prev_c < separator && separator < c) separator_seen = TRUE;
722
723
0
    has_prev_c = FALSE;
724
0
    prev_c = 0;
725
0
    }
726
0
  else
727
0
    {
728
0
    if (escape != 0 && c == escape)
729
0
      {
730
0
      char_start = pattern;
731
0
      GETCHARINCTEST(c, pattern);
732
733
0
      if (pattern >= pattern_end) break;
734
0
      }
735
736
0
    has_prev_c = TRUE;
737
0
    prev_c = c;
738
0
    }
739
740
0
  if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET ||
741
0
      c == CHAR_BACKSLASH || c == CHAR_MINUS)
742
0
    convert_glob_write(out, CHAR_BACKSLASH);
743
744
0
  if (c == separator) separator_seen = TRUE;
745
746
0
  do convert_glob_write(out, *char_start++); while (char_start < pattern);
747
0
  }
748
749
0
*from = pattern;
750
0
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
751
0
}
752
753
754
/* Prints a (*COMMIT) into the output.
755
756
Arguments:
757
  out            output context
758
*/
759
760
static void
761
convert_glob_print_commit(pcre2_output_context *out)
762
0
{
763
0
out->out_str[0] = CHAR_LEFT_PARENTHESIS;
764
0
out->out_str[1] = CHAR_ASTERISK;
765
0
out->out_str[2] = CHAR_C;
766
0
out->out_str[3] = CHAR_O;
767
0
out->out_str[4] = CHAR_M;
768
0
out->out_str[5] = CHAR_M;
769
0
out->out_str[6] = CHAR_I;
770
0
out->out_str[7] = CHAR_T;
771
0
convert_glob_write_str(out, 8);
772
0
convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
773
0
}
774
775
776
/* Bash glob converter.
777
778
Arguments:
779
  pattype        the pattern type
780
  pattern        the pattern
781
  plength        length in code units
782
  utf            TRUE if UTF
783
  use_buffer     where to put the output
784
  use_length     length of use_buffer
785
  bufflenptr     where to put the used length
786
  dummyrun       TRUE if a dummy run
787
  ccontext       the convert context
788
789
Returns:         0 => success
790
                !0 => error code
791
*/
792
793
static int
794
convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,
795
  BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
796
  PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
797
0
{
798
0
pcre2_output_context out;
799
0
PCRE2_SPTR pattern_start = pattern;
800
0
PCRE2_SPTR pattern_end = pattern + plength;
801
0
PCRE2_UCHAR separator = ccontext->glob_separator;
802
0
PCRE2_UCHAR escape = ccontext->glob_escape;
803
0
PCRE2_UCHAR c;
804
0
BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
805
0
BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
806
0
BOOL in_atomic = FALSE;
807
0
BOOL after_starstar = FALSE;
808
0
BOOL no_slash_z = FALSE;
809
0
BOOL with_escape, is_start, after_separator;
810
0
int result = 0;
811
812
0
(void)utf; /* Avoid compiler warning. */
813
814
0
#ifdef SUPPORT_UNICODE
815
0
if (utf && (separator >= 128 || escape >= 128))
816
0
  {
817
  /* Currently only ASCII characters are supported. */
818
0
  *bufflenptr = 0;
819
0
  return PCRE2_ERROR_CONVERT_SYNTAX;
820
0
  }
821
0
#endif
822
823
0
with_escape = strchr(pcre2_escaped_literals, separator) != NULL;
824
825
/* Initialize default for error offset as end of input. */
826
0
out.output = use_buffer;
827
0
out.output_end = use_buffer + use_length;
828
0
out.output_size = 0;
829
830
0
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
831
0
out.out_str[1] = CHAR_QUESTION_MARK;
832
0
out.out_str[2] = CHAR_s;
833
0
out.out_str[3] = CHAR_RIGHT_PARENTHESIS;
834
0
convert_glob_write_str(&out, 4);
835
836
0
is_start = TRUE;
837
838
0
if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK)
839
0
  {
840
0
  if (no_wildsep)
841
0
    is_start = FALSE;
842
0
  else if (!no_starstar && pattern + 1 < pattern_end &&
843
0
           pattern[1] == CHAR_ASTERISK)
844
0
    is_start = FALSE;
845
0
  }
846
847
0
if (is_start)
848
0
  {
849
0
  out.out_str[0] = CHAR_BACKSLASH;
850
0
  out.out_str[1] = CHAR_A;
851
0
  convert_glob_write_str(&out, 2);
852
0
  }
853
854
0
while (pattern < pattern_end)
855
0
  {
856
0
  c = *pattern++;
857
858
0
  if (c == CHAR_ASTERISK)
859
0
    {
860
0
    is_start = pattern == pattern_start + 1;
861
862
0
    if (in_atomic)
863
0
      {
864
0
      convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
865
0
      in_atomic = FALSE;
866
0
      }
867
868
0
    if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)
869
0
      {
870
0
      after_separator = is_start || (pattern[-2] == separator);
871
872
0
      do pattern++; while (pattern < pattern_end &&
873
0
                           *pattern == CHAR_ASTERISK);
874
875
0
      if (pattern >= pattern_end)
876
0
        {
877
0
        no_slash_z = TRUE;
878
0
        break;
879
0
        }
880
881
0
      after_starstar = TRUE;
882
883
0
      if (after_separator && escape != 0 && *pattern == escape &&
884
0
          pattern + 1 < pattern_end && pattern[1] == separator)
885
0
        pattern++;
886
887
0
      if (is_start)
888
0
        {
889
0
        if (*pattern != separator) continue;
890
891
0
        out.out_str[0] = CHAR_LEFT_PARENTHESIS;
892
0
        out.out_str[1] = CHAR_QUESTION_MARK;
893
0
        out.out_str[2] = CHAR_COLON;
894
0
        out.out_str[3] = CHAR_BACKSLASH;
895
0
        out.out_str[4] = CHAR_A;
896
0
        out.out_str[5] = CHAR_VERTICAL_LINE;
897
0
        convert_glob_write_str(&out, 6);
898
899
0
        convert_glob_print_separator(&out, separator, with_escape);
900
0
        convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
901
902
0
        pattern++;
903
0
        continue;
904
0
        }
905
906
0
      convert_glob_print_commit(&out);
907
908
0
      if (!after_separator || *pattern != separator)
909
0
        {
910
0
        out.out_str[0] = CHAR_DOT;
911
0
        out.out_str[1] = CHAR_ASTERISK;
912
0
        out.out_str[2] = CHAR_QUESTION_MARK;
913
0
        convert_glob_write_str(&out, 3);
914
0
        continue;
915
0
        }
916
917
0
      out.out_str[0] = CHAR_LEFT_PARENTHESIS;
918
0
      out.out_str[1] = CHAR_QUESTION_MARK;
919
0
      out.out_str[2] = CHAR_COLON;
920
0
      out.out_str[3] = CHAR_DOT;
921
0
      out.out_str[4] = CHAR_ASTERISK;
922
0
      out.out_str[5] = CHAR_QUESTION_MARK;
923
924
0
      convert_glob_write_str(&out, 6);
925
926
0
      convert_glob_print_separator(&out, separator, with_escape);
927
928
0
      out.out_str[0] = CHAR_RIGHT_PARENTHESIS;
929
0
      out.out_str[1] = CHAR_QUESTION_MARK;
930
0
      out.out_str[2] = CHAR_QUESTION_MARK;
931
0
      convert_glob_write_str(&out, 3);
932
933
0
      pattern++;
934
0
      continue;
935
0
      }
936
937
0
    if (pattern < pattern_end && *pattern == CHAR_ASTERISK)
938
0
      {
939
0
      do pattern++; while (pattern < pattern_end &&
940
0
                           *pattern == CHAR_ASTERISK);
941
0
      }
942
943
0
    if (no_wildsep)
944
0
      {
945
0
      if (pattern >= pattern_end)
946
0
        {
947
0
        no_slash_z = TRUE;
948
0
        break;
949
0
        }
950
951
      /* Start check must be after the end check. */
952
0
      if (is_start) continue;
953
0
      }
954
955
0
    if (!is_start)
956
0
      {
957
0
      if (after_starstar)
958
0
        {
959
0
        out.out_str[0] = CHAR_LEFT_PARENTHESIS;
960
0
        out.out_str[1] = CHAR_QUESTION_MARK;
961
0
        out.out_str[2] = CHAR_GREATER_THAN_SIGN;
962
0
        convert_glob_write_str(&out, 3);
963
0
        in_atomic = TRUE;
964
0
        }
965
0
      else
966
0
        convert_glob_print_commit(&out);
967
0
      }
968
969
0
    if (no_wildsep)
970
0
      convert_glob_write(&out, CHAR_DOT);
971
0
    else
972
0
      convert_glob_print_wildcard(&out, separator, with_escape);
973
974
0
    out.out_str[0] = CHAR_ASTERISK;
975
0
    out.out_str[1] = CHAR_QUESTION_MARK;
976
0
    if (pattern >= pattern_end)
977
0
      out.out_str[1] = CHAR_PLUS;
978
0
    convert_glob_write_str(&out, 2);
979
0
    continue;
980
0
    }
981
982
0
  if (c == CHAR_QUESTION_MARK)
983
0
    {
984
0
    if (no_wildsep)
985
0
      convert_glob_write(&out, CHAR_DOT);
986
0
    else
987
0
      convert_glob_print_wildcard(&out, separator, with_escape);
988
0
    continue;
989
0
    }
990
991
0
  if (c == CHAR_LEFT_SQUARE_BRACKET)
992
0
    {
993
0
    result = convert_glob_parse_range(&pattern, pattern_end,
994
0
      &out, utf, separator, with_escape, escape, no_wildsep);
995
0
    if (result != 0) break;
996
0
    continue;
997
0
    }
998
999
0
  if (escape != 0 && c == escape)
1000
0
    {
1001
0
    if (pattern >= pattern_end)
1002
0
      {
1003
0
      result = PCRE2_ERROR_CONVERT_SYNTAX;
1004
0
      break;
1005
0
      }
1006
0
    c = *pattern++;
1007
0
    }
1008
1009
0
  if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
1010
0
    convert_glob_write(&out, CHAR_BACKSLASH);
1011
1012
0
  convert_glob_write(&out, c);
1013
0
  }
1014
1015
0
if (result == 0)
1016
0
  {
1017
0
  if (!no_slash_z)
1018
0
    {
1019
0
    out.out_str[0] = CHAR_BACKSLASH;
1020
0
    out.out_str[1] = CHAR_z;
1021
0
    convert_glob_write_str(&out, 2);
1022
0
    }
1023
1024
0
  if (in_atomic)
1025
0
    convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
1026
1027
0
  convert_glob_write(&out, CHAR_NUL);
1028
1029
0
  if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
1030
0
    result = PCRE2_ERROR_NOMEMORY;
1031
0
  }
1032
1033
0
if (result != 0)
1034
0
  {
1035
0
  *bufflenptr = pattern - pattern_start;
1036
0
  return result;
1037
0
  }
1038
1039
0
*bufflenptr = out.output_size - 1;
1040
0
return 0;
1041
0
}
1042
1043
1044
/*************************************************
1045
*                Convert pattern                 *
1046
*************************************************/
1047
1048
/* This is the external-facing function for converting other forms of pattern
1049
into PCRE2 regular expression patterns. On error, the bufflenptr argument is
1050
used to return an offset in the original pattern.
1051
1052
Arguments:
1053
  pattern     the input pattern
1054
  plength     length of input, or PCRE2_ZERO_TERMINATED
1055
  options     options bits
1056
  buffptr     pointer to pointer to output buffer
1057
  bufflenptr  pointer to length of output buffer
1058
  ccontext    convert context or NULL
1059
1060
Returns:      0 for success, else an error code (+ve or -ve)
1061
*/
1062
1063
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
1064
pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
1065
  PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
1066
  pcre2_convert_context *ccontext)
1067
0
{
1068
0
int i, rc;
1069
0
PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
1070
0
PCRE2_UCHAR *use_buffer = dummy_buffer;
1071
0
PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
1072
0
BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
1073
0
uint32_t pattype = options & TYPE_OPTIONS;
1074
1075
0
if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
1076
1077
0
if ((options & ~ALL_OPTIONS) != 0 ||        /* Undefined bit set */
1078
0
    (pattype & (~pattype+1)) != pattype ||  /* More than one type set */
1079
0
    pattype == 0)                           /* No type set */
1080
0
  {
1081
0
  *bufflenptr = 0;                          /* Error offset */
1082
0
  return PCRE2_ERROR_BADOPTION;
1083
0
  }
1084
1085
0
if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);
1086
0
if (ccontext == NULL) ccontext =
1087
0
  (pcre2_convert_context *)(&PRIV(default_convert_context));
1088
1089
/* Check UTF if required. */
1090
1091
#ifndef SUPPORT_UNICODE
1092
if (utf)
1093
  {
1094
  *bufflenptr = 0;  /* Error offset */
1095
  return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
1096
  }
1097
#else
1098
0
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
1099
0
  {
1100
0
  PCRE2_SIZE erroroffset;
1101
0
  rc = PRIV(valid_utf)(pattern, plength, &erroroffset);
1102
0
  if (rc != 0)
1103
0
    {
1104
0
    *bufflenptr = erroroffset;
1105
0
    return rc;
1106
0
    }
1107
0
  }
1108
0
#endif
1109
1110
/* If buffptr is not NULL, and what it points to is not NULL, we are being
1111
provided with a buffer and a length, so set them as the buffer to use. */
1112
1113
0
if (buffptr != NULL && *buffptr != NULL)
1114
0
  {
1115
0
  use_buffer = *buffptr;
1116
0
  use_length = *bufflenptr;
1117
0
  }
1118
1119
/* Call an individual converter, either just once (if a buffer was provided or
1120
just the length is needed), or twice (if a memory allocation is required). */
1121
1122
0
for (i = 0; i < 2; i++)
1123
0
  {
1124
0
  PCRE2_UCHAR *allocated;
1125
0
  BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
1126
1127
0
  switch(pattype)
1128
0
    {
1129
0
    case PCRE2_CONVERT_GLOB:
1130
0
    rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf,
1131
0
      use_buffer, use_length, bufflenptr, dummyrun, ccontext);
1132
0
    break;
1133
1134
0
    case PCRE2_CONVERT_POSIX_BASIC:
1135
0
    case PCRE2_CONVERT_POSIX_EXTENDED:
1136
0
    rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,
1137
0
      bufflenptr, dummyrun, ccontext);
1138
0
    break;
1139
1140
0
    default:
1141
0
    *bufflenptr = 0;  /* Error offset */
1142
0
    return PCRE2_ERROR_INTERNAL;
1143
0
    }
1144
1145
0
  if (rc != 0 ||           /* Error */
1146
0
      buffptr == NULL ||   /* Just the length is required */
1147
0
      *buffptr != NULL)    /* Buffer was provided or allocated */
1148
0
    return rc;
1149
1150
  /* Allocate memory for the buffer, with hidden space for an allocator at
1151
  the start. The next time round the loop runs the conversion for real. */
1152
1153
0
  allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
1154
0
    (*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);
1155
0
  if (allocated == NULL) return PCRE2_ERROR_NOMEMORY;
1156
0
  *buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));
1157
1158
0
  use_buffer = *buffptr;
1159
0
  use_length = *bufflenptr + 1;
1160
0
  }
1161
1162
/* Control should never get here. */
1163
1164
0
return PCRE2_ERROR_INTERNAL;
1165
0
}
1166
1167
1168
/*************************************************
1169
*            Free converted pattern              *
1170
*************************************************/
1171
1172
/* This frees a converted pattern that was put in newly-allocated memory.
1173
1174
Argument:   the converted pattern
1175
Returns:    nothing
1176
*/
1177
1178
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
1179
pcre2_converted_pattern_free(PCRE2_UCHAR *converted)
1180
0
{
1181
0
if (converted != NULL)
1182
0
  {
1183
0
  pcre2_memctl *memctl =
1184
0
    (pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));
1185
0
  memctl->free(memctl, memctl->memory_data);
1186
0
  }
1187
0
}
1188
1189
/* End of pcre2_convert.c */