Coverage Report

Created: 2025-11-16 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/pcre/pcre2lib/pcre2_substitute.c
Line
Count
Source
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2016-2022 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#include "pcre2_internal.h"
47
48
0
#define PTR_STACK_SIZE 20
49
50
#define SUBSTITUTE_OPTIONS \
51
0
  (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
52
0
   PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \
53
0
   PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \
54
0
   PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY)
55
56
57
58
/*************************************************
59
*           Find end of substitute text          *
60
*************************************************/
61
62
/* In extended mode, we recognize ${name:+set text:unset text} and similar
63
constructions. This requires the identification of unescaped : and }
64
characters. This function scans for such. It must deal with nested ${
65
constructions. The pointer to the text is updated, either to the required end
66
character, or to where an error was detected.
67
68
Arguments:
69
  code      points to the compiled expression (for options)
70
  ptrptr    points to the pointer to the start of the text (updated)
71
  ptrend    end of the whole string
72
  last      TRUE if the last expected string (only } recognized)
73
74
Returns:    0 on success
75
            negative error code on failure
76
*/
77
78
static int
79
find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
80
  BOOL last)
81
0
{
82
0
int rc = 0;
83
0
uint32_t nestlevel = 0;
84
0
BOOL literal = FALSE;
85
0
PCRE2_SPTR ptr = *ptrptr;
86
87
0
for (; ptr < ptrend; ptr++)
88
0
  {
89
0
  if (literal)
90
0
    {
91
0
    if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
92
0
      {
93
0
      literal = FALSE;
94
0
      ptr += 1;
95
0
      }
96
0
    }
97
98
0
  else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
99
0
    {
100
0
    if (nestlevel == 0) goto EXIT;
101
0
    nestlevel--;
102
0
    }
103
104
0
  else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
105
106
0
  else if (*ptr == CHAR_DOLLAR_SIGN)
107
0
    {
108
0
    if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
109
0
      {
110
0
      nestlevel++;
111
0
      ptr += 1;
112
0
      }
113
0
    }
114
115
0
  else if (*ptr == CHAR_BACKSLASH)
116
0
    {
117
0
    int erc;
118
0
    int errorcode;
119
0
    uint32_t ch;
120
121
0
    if (ptr < ptrend - 1) switch (ptr[1])
122
0
      {
123
0
      case CHAR_L:
124
0
      case CHAR_l:
125
0
      case CHAR_U:
126
0
      case CHAR_u:
127
0
      ptr += 1;
128
0
      continue;
129
0
      }
130
131
0
    ptr += 1;  /* Must point after \ */
132
0
    erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
133
0
      code->overall_options, code->extra_options, FALSE, NULL);
134
0
    ptr -= 1;  /* Back to last code unit of escape */
135
0
    if (errorcode != 0)
136
0
      {
137
0
      rc = errorcode;
138
0
      goto EXIT;
139
0
      }
140
141
0
    switch(erc)
142
0
      {
143
0
      case 0:      /* Data character */
144
0
      case ESC_E:  /* Isolated \E is ignored */
145
0
      break;
146
147
0
      case ESC_Q:
148
0
      literal = TRUE;
149
0
      break;
150
151
0
      default:
152
0
      rc = PCRE2_ERROR_BADREPESCAPE;
153
0
      goto EXIT;
154
0
      }
155
0
    }
156
0
  }
157
158
0
rc = PCRE2_ERROR_REPMISSINGBRACE;   /* Terminator not found */
159
160
0
EXIT:
161
0
*ptrptr = ptr;
162
0
return rc;
163
0
}
164
165
166
167
/*************************************************
168
*              Match and substitute              *
169
*************************************************/
170
171
/* This function applies a compiled re to a subject string and creates a new
172
string with substitutions. The first 7 arguments are the same as for
173
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
174
175
Arguments:
176
  code            points to the compiled expression
177
  subject         points to the subject string
178
  length          length of subject string (may contain binary zeros)
179
  start_offset    where to start in the subject string
180
  options         option bits
181
  match_data      points to a match_data block, or is NULL
182
  context         points a PCRE2 context
183
  replacement     points to the replacement string
184
  rlength         length of replacement string
185
  buffer          where to put the substituted string
186
  blength         points to length of buffer; updated to length of string
187
188
Returns:          >= 0 number of substitutions made
189
                  < 0 an error code
190
                  PCRE2_ERROR_BADREPLACEMENT means invalid use of $
191
*/
192
193
/* This macro checks for space in the buffer before copying into it. On
194
overflow, either give an error immediately, or keep on, accumulating the
195
length. */
196
197
#define CHECKMEMCPY(from,length) \
198
0
  { \
199
0
  if (!overflowed && lengthleft < length) \
200
0
    { \
201
0
    if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
202
0
    overflowed = TRUE; \
203
0
    extra_needed = length - lengthleft; \
204
0
    } \
205
0
  else if (overflowed) \
206
0
    { \
207
0
    extra_needed += length; \
208
0
    }  \
209
0
  else \
210
0
    {  \
211
0
    memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
212
0
    buff_offset += length; \
213
0
    lengthleft -= length; \
214
0
    } \
215
0
  }
216
217
/* Here's the function */
218
219
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
220
pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
221
  PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
222
  pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
223
  PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
224
0
{
225
0
int rc;
226
0
int subs;
227
0
int forcecase = 0;
228
0
int forcecasereset = 0;
229
0
uint32_t ovector_count;
230
0
uint32_t goptions = 0;
231
0
uint32_t suboptions;
232
0
pcre2_match_data *internal_match_data = NULL;
233
0
BOOL escaped_literal = FALSE;
234
0
BOOL overflowed = FALSE;
235
0
BOOL use_existing_match;
236
0
BOOL replacement_only;
237
0
#ifdef SUPPORT_UNICODE
238
0
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
239
0
BOOL ucp = (code->overall_options & PCRE2_UCP) != 0;
240
0
#endif
241
0
PCRE2_UCHAR temp[6];
242
0
PCRE2_SPTR ptr;
243
0
PCRE2_SPTR repend;
244
0
PCRE2_SIZE extra_needed = 0;
245
0
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
246
0
PCRE2_SIZE *ovector;
247
0
PCRE2_SIZE ovecsave[3];
248
0
pcre2_substitute_callout_block scb;
249
250
/* General initialization */
251
252
0
buff_offset = 0;
253
0
lengthleft = buff_length = *blength;
254
0
*blength = PCRE2_UNSET;
255
0
ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
256
257
/* Partial matching is not valid. This must come after setting *blength to
258
PCRE2_UNSET, so as not to imply an offset in the replacement. */
259
260
0
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
261
0
  return PCRE2_ERROR_BADOPTION;
262
263
/* Validate length and find the end of the replacement. A NULL replacement of
264
zero length is interpreted as an empty string. */
265
266
0
if (replacement == NULL)
267
0
  {
268
0
  if (rlength != 0) return PCRE2_ERROR_NULL;
269
0
  replacement = (PCRE2_SPTR)"";
270
0
  }
271
272
0
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
273
0
repend = replacement + rlength;
274
275
/* Check for using a match that has already happened. Note that the subject
276
pointer in the match data may be NULL after a no-match. */
277
278
0
use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0);
279
0
replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
280
281
/* If starting from an existing match, there must be an externally provided
282
match data block. We create an internal match_data block in two cases: (a) an
283
external one is not supplied (and we are not starting from an existing match);
284
(b) an existing match is to be used for the first substitution. In the latter
285
case, we copy the existing match into the internal block, except for any cached
286
heap frame size and pointer. This ensures that no changes are made to the
287
external match data block. */
288
289
0
if (match_data == NULL)
290
0
  {
291
0
  pcre2_general_context *gcontext;
292
0
  if (use_existing_match) return PCRE2_ERROR_NULL;
293
0
  gcontext = (mcontext == NULL)?
294
0
    (pcre2_general_context *)code :
295
0
    (pcre2_general_context *)mcontext;
296
0
  match_data = internal_match_data =
297
0
    pcre2_match_data_create_from_pattern(code, gcontext);
298
0
  if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
299
0
  }
300
301
0
else if (use_existing_match)
302
0
  {
303
0
  pcre2_general_context *gcontext = (mcontext == NULL)?
304
0
    (pcre2_general_context *)code :
305
0
    (pcre2_general_context *)mcontext;
306
0
  int pairs = (code->top_bracket + 1 < match_data->oveccount)?
307
0
    code->top_bracket + 1 : match_data->oveccount;
308
0
  internal_match_data = pcre2_match_data_create(match_data->oveccount,
309
0
    gcontext);
310
0
  if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
311
0
  memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
312
0
    + 2*pairs*sizeof(PCRE2_SIZE));
313
0
  internal_match_data->heapframes = NULL;
314
0
  internal_match_data->heapframes_size = 0;
315
0
  match_data = internal_match_data;
316
0
  }
317
318
/* Remember ovector details */
319
320
0
ovector = pcre2_get_ovector_pointer(match_data);
321
0
ovector_count = pcre2_get_ovector_count(match_data);
322
323
/* Fixed things in the callout block */
324
325
0
scb.version = 0;
326
0
scb.input = subject;
327
0
scb.output = (PCRE2_SPTR)buffer;
328
0
scb.ovector = ovector;
329
330
/* A NULL subject of zero length is treated as an empty string. */
331
332
0
if (subject == NULL)
333
0
  {
334
0
  if (length != 0) return PCRE2_ERROR_NULL;
335
0
  subject = (PCRE2_SPTR)"";
336
0
  }
337
338
/* Find length of zero-terminated subject */
339
340
0
if (length == PCRE2_ZERO_TERMINATED)
341
0
  length = subject? PRIV(strlen)(subject) : 0;
342
343
/* Check UTF replacement string if necessary. */
344
345
0
#ifdef SUPPORT_UNICODE
346
0
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
347
0
  {
348
0
  rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
349
0
  if (rc != 0)
350
0
    {
351
0
    match_data->leftchar = 0;
352
0
    goto EXIT;
353
0
    }
354
0
  }
355
0
#endif  /* SUPPORT_UNICODE */
356
357
/* Save the substitute options and remove them from the match options. */
358
359
0
suboptions = options & SUBSTITUTE_OPTIONS;
360
0
options &= ~SUBSTITUTE_OPTIONS;
361
362
/* Error if the start match offset is greater than the length of the subject. */
363
364
0
if (start_offset > length)
365
0
  {
366
0
  match_data->leftchar = 0;
367
0
  rc = PCRE2_ERROR_BADOFFSET;
368
0
  goto EXIT;
369
0
  }
370
371
/* Copy up to the start offset, unless only the replacement is required. */
372
373
0
if (!replacement_only) CHECKMEMCPY(subject, start_offset);
374
375
/* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first
376
match is taken from the match_data that was passed in. */
377
378
0
subs = 0;
379
0
do
380
0
  {
381
0
  PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
382
0
  uint32_t ptrstackptr = 0;
383
384
0
  if (use_existing_match)
385
0
    {
386
0
    rc = match_data->rc;
387
0
    use_existing_match = FALSE;
388
0
    }
389
0
  else rc = pcre2_match(code, subject, length, start_offset, options|goptions,
390
0
    match_data, mcontext);
391
392
0
#ifdef SUPPORT_UNICODE
393
0
  if (utf) options |= PCRE2_NO_UTF_CHECK;  /* Only need to check once */
394
0
#endif
395
396
  /* Any error other than no match returns the error code. No match when not
397
  doing the special after-empty-match global rematch, or when at the end of the
398
  subject, breaks the global loop. Otherwise, advance the starting point by one
399
  character, copying it to the output, and try again. */
400
401
0
  if (rc < 0)
402
0
    {
403
0
    PCRE2_SIZE save_start;
404
405
0
    if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
406
0
    if (goptions == 0 || start_offset >= length) break;
407
408
    /* Advance by one code point. Then, if CRLF is a valid newline sequence and
409
    we have advanced into the middle of it, advance one more code point. In
410
    other words, do not start in the middle of CRLF, even if CR and LF on their
411
    own are valid newlines. */
412
413
0
    save_start = start_offset++;
414
0
    if (subject[start_offset-1] == CHAR_CR &&
415
0
        code->newline_convention != PCRE2_NEWLINE_CR &&
416
0
        code->newline_convention != PCRE2_NEWLINE_LF &&
417
0
        start_offset < length &&
418
0
        subject[start_offset] == CHAR_LF)
419
0
      start_offset++;
420
421
    /* Otherwise, in UTF mode, advance past any secondary code points. */
422
423
0
    else if ((code->overall_options & PCRE2_UTF) != 0)
424
0
      {
425
0
#if PCRE2_CODE_UNIT_WIDTH == 8
426
0
      while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
427
0
        start_offset++;
428
#elif PCRE2_CODE_UNIT_WIDTH == 16
429
      while (start_offset < length &&
430
            (subject[start_offset] & 0xfc00) == 0xdc00)
431
        start_offset++;
432
#endif
433
0
      }
434
435
    /* Copy what we have advanced past (unless not required), reset the special
436
    global options, and continue to the next match. */
437
438
0
    fraglength = start_offset - save_start;
439
0
    if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
440
0
    goptions = 0;
441
0
    continue;
442
0
    }
443
444
  /* Handle a successful match. Matches that use \K to end before they start
445
  or start before the current point in the subject are not supported. */
446
447
0
  if (ovector[1] < ovector[0] || ovector[0] < start_offset)
448
0
    {
449
0
    rc = PCRE2_ERROR_BADSUBSPATTERN;
450
0
    goto EXIT;
451
0
    }
452
453
  /* Check for the same match as previous. This is legitimate after matching an
454
  empty string that starts after the initial match offset. We have tried again
455
  at the match point in case the pattern is one like /(?<=\G.)/ which can never
456
  match at its starting point, so running the match achieves the bumpalong. If
457
  we do get the same (null) match at the original match point, it isn't such a
458
  pattern, so we now do the empty string magic. In all other cases, a repeat
459
  match should never occur. */
460
461
0
  if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
462
0
    {
463
0
    if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
464
0
      {
465
0
      goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
466
0
      ovecsave[2] = start_offset;
467
0
      continue;    /* Back to the top of the loop */
468
0
      }
469
0
    rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
470
0
    goto EXIT;
471
0
    }
472
473
  /* Count substitutions with a paranoid check for integer overflow; surely no
474
  real call to this function would ever hit this! */
475
476
0
  if (subs == INT_MAX)
477
0
    {
478
0
    rc = PCRE2_ERROR_TOOMANYREPLACE;
479
0
    goto EXIT;
480
0
    }
481
0
  subs++;
482
483
  /* Copy the text leading up to the match (unless not required), and remember
484
  where the insert begins and how many ovector pairs are set. */
485
486
0
  if (rc == 0) rc = ovector_count;
487
0
  fraglength = ovector[0] - start_offset;
488
0
  if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
489
0
  scb.output_offsets[0] = buff_offset;
490
0
  scb.oveccount = rc;
491
492
  /* Process the replacement string. If the entire replacement is literal, just
493
  copy it with length check. */
494
495
0
  ptr = replacement;
496
0
  if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0)
497
0
    {
498
0
    CHECKMEMCPY(ptr, rlength);
499
0
    }
500
501
  /* Within a non-literal replacement, which must be scanned character by
502
  character, local literal mode can be set by \Q, but only in extended mode
503
  when backslashes are being interpreted. In extended mode we must handle
504
  nested substrings that are to be reprocessed. */
505
506
0
  else for (;;)
507
0
    {
508
0
    uint32_t ch;
509
0
    unsigned int chlen;
510
511
    /* If at the end of a nested substring, pop the stack. */
512
513
0
    if (ptr >= repend)
514
0
      {
515
0
      if (ptrstackptr == 0) break;       /* End of replacement string */
516
0
      repend = ptrstack[--ptrstackptr];
517
0
      ptr = ptrstack[--ptrstackptr];
518
0
      continue;
519
0
      }
520
521
    /* Handle the next character */
522
523
0
    if (escaped_literal)
524
0
      {
525
0
      if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
526
0
        {
527
0
        escaped_literal = FALSE;
528
0
        ptr += 2;
529
0
        continue;
530
0
        }
531
0
      goto LOADLITERAL;
532
0
      }
533
534
    /* Not in literal mode. */
535
536
0
    if (*ptr == CHAR_DOLLAR_SIGN)
537
0
      {
538
0
      int group, n;
539
0
      uint32_t special = 0;
540
0
      BOOL inparens;
541
0
      BOOL star;
542
0
      PCRE2_SIZE sublength;
543
0
      PCRE2_SPTR text1_start = NULL;
544
0
      PCRE2_SPTR text1_end = NULL;
545
0
      PCRE2_SPTR text2_start = NULL;
546
0
      PCRE2_SPTR text2_end = NULL;
547
0
      PCRE2_UCHAR next;
548
0
      PCRE2_UCHAR name[33];
549
550
0
      if (++ptr >= repend) goto BAD;
551
0
      if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
552
553
0
      group = -1;
554
0
      n = 0;
555
0
      inparens = FALSE;
556
0
      star = FALSE;
557
558
0
      if (next == CHAR_LEFT_CURLY_BRACKET)
559
0
        {
560
0
        if (++ptr >= repend) goto BAD;
561
0
        next = *ptr;
562
0
        inparens = TRUE;
563
0
        }
564
565
0
      if (next == CHAR_ASTERISK)
566
0
        {
567
0
        if (++ptr >= repend) goto BAD;
568
0
        next = *ptr;
569
0
        star = TRUE;
570
0
        }
571
572
0
      if (!star && next >= CHAR_0 && next <= CHAR_9)
573
0
        {
574
0
        group = next - CHAR_0;
575
0
        while (++ptr < repend)
576
0
          {
577
0
          next = *ptr;
578
0
          if (next < CHAR_0 || next > CHAR_9) break;
579
0
          group = group * 10 + next - CHAR_0;
580
581
          /* A check for a number greater than the hightest captured group
582
          is sufficient here; no need for a separate overflow check. If unknown
583
          groups are to be treated as unset, just skip over any remaining
584
          digits and carry on. */
585
586
0
          if (group > code->top_bracket)
587
0
            {
588
0
            if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
589
0
              {
590
0
              while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
591
0
              break;
592
0
              }
593
0
            else
594
0
              {
595
0
              rc = PCRE2_ERROR_NOSUBSTRING;
596
0
              goto PTREXIT;
597
0
              }
598
0
            }
599
0
          }
600
0
        }
601
0
      else
602
0
        {
603
0
        const uint8_t *ctypes = code->tables + ctypes_offset;
604
0
        while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
605
0
          {
606
0
          name[n++] = next;
607
0
          if (n > 32) goto BAD;
608
0
          if (++ptr >= repend) break;
609
0
          next = *ptr;
610
0
          }
611
0
        if (n == 0) goto BAD;
612
0
        name[n] = 0;
613
0
        }
614
615
      /* In extended mode we recognize ${name:+set text:unset text} and
616
      ${name:-default text}. */
617
618
0
      if (inparens)
619
0
        {
620
0
        if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
621
0
             !star && ptr < repend - 2 && next == CHAR_COLON)
622
0
          {
623
0
          special = *(++ptr);
624
0
          if (special != CHAR_PLUS && special != CHAR_MINUS)
625
0
            {
626
0
            rc = PCRE2_ERROR_BADSUBSTITUTION;
627
0
            goto PTREXIT;
628
0
            }
629
630
0
          text1_start = ++ptr;
631
0
          rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
632
0
          if (rc != 0) goto PTREXIT;
633
0
          text1_end = ptr;
634
635
0
          if (special == CHAR_PLUS && *ptr == CHAR_COLON)
636
0
            {
637
0
            text2_start = ++ptr;
638
0
            rc = find_text_end(code, &ptr, repend, TRUE);
639
0
            if (rc != 0) goto PTREXIT;
640
0
            text2_end = ptr;
641
0
            }
642
0
          }
643
644
0
        else
645
0
          {
646
0
          if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
647
0
            {
648
0
            rc = PCRE2_ERROR_REPMISSINGBRACE;
649
0
            goto PTREXIT;
650
0
            }
651
0
          }
652
653
0
        ptr++;
654
0
        }
655
656
      /* Have found a syntactically correct group number or name, or *name.
657
      Only *MARK is currently recognized. */
658
659
0
      if (star)
660
0
        {
661
0
        if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
662
0
          {
663
0
          PCRE2_SPTR mark = pcre2_get_mark(match_data);
664
0
          if (mark != NULL)
665
0
            {
666
0
            PCRE2_SPTR mark_start = mark;
667
0
            while (*mark != 0) mark++;
668
0
            fraglength = mark - mark_start;
669
0
            CHECKMEMCPY(mark_start, fraglength);
670
0
            }
671
0
          }
672
0
        else goto BAD;
673
0
        }
674
675
      /* Substitute the contents of a group. We don't use substring_copy
676
      functions any more, in order to support case forcing. */
677
678
0
      else
679
0
        {
680
0
        PCRE2_SPTR subptr, subptrend;
681
682
        /* Find a number for a named group. In case there are duplicate names,
683
        search for the first one that is set. If the name is not found when
684
        PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
685
        non-existent group. */
686
687
0
        if (group < 0)
688
0
          {
689
0
          PCRE2_SPTR first, last, entry;
690
0
          rc = pcre2_substring_nametable_scan(code, name, &first, &last);
691
0
          if (rc == PCRE2_ERROR_NOSUBSTRING &&
692
0
              (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
693
0
            {
694
0
            group = code->top_bracket + 1;
695
0
            }
696
0
          else
697
0
            {
698
0
            if (rc < 0) goto PTREXIT;
699
0
            for (entry = first; entry <= last; entry += rc)
700
0
              {
701
0
              uint32_t ng = GET2(entry, 0);
702
0
              if (ng < ovector_count)
703
0
                {
704
0
                if (group < 0) group = ng;          /* First in ovector */
705
0
                if (ovector[ng*2] != PCRE2_UNSET)
706
0
                  {
707
0
                  group = ng;                       /* First that is set */
708
0
                  break;
709
0
                  }
710
0
                }
711
0
              }
712
713
            /* If group is still negative, it means we did not find a group
714
            that is in the ovector. Just set the first group. */
715
716
0
            if (group < 0) group = GET2(first, 0);
717
0
            }
718
0
          }
719
720
        /* We now have a group that is identified by number. Find the length of
721
        the captured string. If a group in a non-special substitution is unset
722
        when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
723
724
0
        rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
725
0
        if (rc < 0)
726
0
          {
727
0
          if (rc == PCRE2_ERROR_NOSUBSTRING &&
728
0
              (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
729
0
            {
730
0
            rc = PCRE2_ERROR_UNSET;
731
0
            }
732
0
          if (rc != PCRE2_ERROR_UNSET) goto PTREXIT;  /* Non-unset errors */
733
0
          if (special == 0)                           /* Plain substitution */
734
0
            {
735
0
            if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
736
0
            goto PTREXIT;                             /* Else error */
737
0
            }
738
0
          }
739
740
        /* If special is '+' we have a 'set' and possibly an 'unset' text,
741
        both of which are reprocessed when used. If special is '-' we have a
742
        default text for when the group is unset; it must be reprocessed. */
743
744
0
        if (special != 0)
745
0
          {
746
0
          if (special == CHAR_MINUS)
747
0
            {
748
0
            if (rc == 0) goto LITERAL_SUBSTITUTE;
749
0
            text2_start = text1_start;
750
0
            text2_end = text1_end;
751
0
            }
752
753
0
          if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
754
0
          ptrstack[ptrstackptr++] = ptr;
755
0
          ptrstack[ptrstackptr++] = repend;
756
757
0
          if (rc == 0)
758
0
            {
759
0
            ptr = text1_start;
760
0
            repend = text1_end;
761
0
            }
762
0
          else
763
0
            {
764
0
            ptr = text2_start;
765
0
            repend = text2_end;
766
0
            }
767
0
          continue;
768
0
          }
769
770
        /* Otherwise we have a literal substitution of a group's contents. */
771
772
0
        LITERAL_SUBSTITUTE:
773
0
        subptr = subject + ovector[group*2];
774
0
        subptrend = subject + ovector[group*2 + 1];
775
776
        /* Substitute a literal string, possibly forcing alphabetic case. */
777
778
0
        while (subptr < subptrend)
779
0
          {
780
0
          GETCHARINCTEST(ch, subptr);
781
0
          if (forcecase != 0)
782
0
            {
783
0
#ifdef SUPPORT_UNICODE
784
0
            if (utf || ucp)
785
0
              {
786
0
              uint32_t type = UCD_CHARTYPE(ch);
787
0
              if (PRIV(ucp_gentype)[type] == ucp_L &&
788
0
                  type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
789
0
                ch = UCD_OTHERCASE(ch);
790
0
              }
791
0
            else
792
0
#endif
793
0
              {
794
0
              if (((code->tables + cbits_offset +
795
0
                  ((forcecase > 0)? cbit_upper:cbit_lower)
796
0
                  )[ch/8] & (1u << (ch%8))) == 0)
797
0
                ch = (code->tables + fcc_offset)[ch];
798
0
              }
799
0
            forcecase = forcecasereset;
800
0
            }
801
802
0
#ifdef SUPPORT_UNICODE
803
0
          if (utf) chlen = PRIV(ord2utf)(ch, temp); else
804
0
#endif
805
0
            {
806
0
            temp[0] = ch;
807
0
            chlen = 1;
808
0
            }
809
0
          CHECKMEMCPY(temp, chlen);
810
0
          }
811
0
        }
812
0
      }
813
814
    /* Handle an escape sequence in extended mode. We can use check_escape()
815
    to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
816
    the case-forcing escapes are not supported in pcre2_compile() so must be
817
    recognized here. */
818
819
0
    else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
820
0
              *ptr == CHAR_BACKSLASH)
821
0
      {
822
0
      int errorcode;
823
824
0
      if (ptr < repend - 1) switch (ptr[1])
825
0
        {
826
0
        case CHAR_L:
827
0
        forcecase = forcecasereset = -1;
828
0
        ptr += 2;
829
0
        continue;
830
831
0
        case CHAR_l:
832
0
        forcecase = -1;
833
0
        forcecasereset = 0;
834
0
        ptr += 2;
835
0
        continue;
836
837
0
        case CHAR_U:
838
0
        forcecase = forcecasereset = 1;
839
0
        ptr += 2;
840
0
        continue;
841
842
0
        case CHAR_u:
843
0
        forcecase = 1;
844
0
        forcecasereset = 0;
845
0
        ptr += 2;
846
0
        continue;
847
848
0
        default:
849
0
        break;
850
0
        }
851
852
0
      ptr++;  /* Point after \ */
853
0
      rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
854
0
        code->overall_options, code->extra_options, FALSE, NULL);
855
0
      if (errorcode != 0) goto BADESCAPE;
856
857
0
      switch(rc)
858
0
        {
859
0
        case ESC_E:
860
0
        forcecase = forcecasereset = 0;
861
0
        continue;
862
863
0
        case ESC_Q:
864
0
        escaped_literal = TRUE;
865
0
        continue;
866
867
0
        case 0:      /* Data character */
868
0
        goto LITERAL;
869
870
0
        default:
871
0
        goto BADESCAPE;
872
0
        }
873
0
      }
874
875
    /* Handle a literal code unit */
876
877
0
    else
878
0
      {
879
0
      LOADLITERAL:
880
0
      GETCHARINCTEST(ch, ptr);    /* Get character value, increment pointer */
881
882
0
      LITERAL:
883
0
      if (forcecase != 0)
884
0
        {
885
0
#ifdef SUPPORT_UNICODE
886
0
        if (utf || ucp)
887
0
          {
888
0
          uint32_t type = UCD_CHARTYPE(ch);
889
0
          if (PRIV(ucp_gentype)[type] == ucp_L &&
890
0
              type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
891
0
            ch = UCD_OTHERCASE(ch);
892
0
          }
893
0
        else
894
0
#endif
895
0
          {
896
0
          if (((code->tables + cbits_offset +
897
0
              ((forcecase > 0)? cbit_upper:cbit_lower)
898
0
              )[ch/8] & (1u << (ch%8))) == 0)
899
0
            ch = (code->tables + fcc_offset)[ch];
900
0
          }
901
0
        forcecase = forcecasereset;
902
0
        }
903
904
0
#ifdef SUPPORT_UNICODE
905
0
      if (utf) chlen = PRIV(ord2utf)(ch, temp); else
906
0
#endif
907
0
        {
908
0
        temp[0] = ch;
909
0
        chlen = 1;
910
0
        }
911
0
      CHECKMEMCPY(temp, chlen);
912
0
      } /* End handling a literal code unit */
913
0
    }   /* End of loop for scanning the replacement. */
914
915
  /* The replacement has been copied to the output, or its size has been
916
  remembered. Do the callout if there is one and we have done an actual
917
  replacement. */
918
919
0
  if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
920
0
    {
921
0
    scb.subscount = subs;
922
0
    scb.output_offsets[1] = buff_offset;
923
0
    rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
924
925
    /* A non-zero return means cancel this substitution. Instead, copy the
926
    matched string fragment. */
927
928
0
    if (rc != 0)
929
0
      {
930
0
      PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
931
0
      PCRE2_SIZE oldlength = ovector[1] - ovector[0];
932
933
0
      buff_offset -= newlength;
934
0
      lengthleft += newlength;
935
0
      if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength);
936
937
      /* A negative return means do not do any more. */
938
939
0
      if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
940
0
      }
941
0
    }
942
943
  /* Save the details of this match. See above for how this data is used. If we
944
  matched an empty string, do the magic for global matches. Update the start
945
  offset to point to the rest of the subject string. If we re-used an existing
946
  match for the first match, switch to the internal match data block. */
947
948
0
  ovecsave[0] = ovector[0];
949
0
  ovecsave[1] = ovector[1];
950
0
  ovecsave[2] = start_offset;
951
952
0
  goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
953
0
    PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
954
0
  start_offset = ovector[1];
955
0
  } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0);  /* Repeat "do" loop */
956
957
/* Copy the rest of the subject unless not required, and terminate the output
958
with a binary zero. */
959
960
0
if (!replacement_only)
961
0
  {
962
0
  fraglength = length - start_offset;
963
0
  CHECKMEMCPY(subject + start_offset, fraglength);
964
0
  }
965
966
0
temp[0] = 0;
967
0
CHECKMEMCPY(temp, 1);
968
969
/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
970
and matching has carried on after a full buffer, in order to compute the length
971
needed. Otherwise, an overflow generates an immediate error return. */
972
973
0
if (overflowed)
974
0
  {
975
0
  rc = PCRE2_ERROR_NOMEMORY;
976
0
  *blength = buff_length + extra_needed;
977
0
  }
978
979
/* After a successful execution, return the number of substitutions and set the
980
length of buffer used, excluding the trailing zero. */
981
982
0
else
983
0
  {
984
0
  rc = subs;
985
0
  *blength = buff_offset - 1;
986
0
  }
987
988
0
EXIT:
989
0
if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
990
0
  else match_data->rc = rc;
991
0
return rc;
992
993
0
NOROOM:
994
0
rc = PCRE2_ERROR_NOMEMORY;
995
0
goto EXIT;
996
997
0
BAD:
998
0
rc = PCRE2_ERROR_BADREPLACEMENT;
999
0
goto PTREXIT;
1000
1001
0
BADESCAPE:
1002
0
rc = PCRE2_ERROR_BADREPESCAPE;
1003
1004
0
PTREXIT:
1005
0
*blength = (PCRE2_SIZE)(ptr - replacement);
1006
0
goto EXIT;
1007
0
}
1008
1009
/* End of pcre2_substitute.c */