Coverage Report

Created: 2025-06-13 06:43

/src/php-src/ext/pcre/pcre2lib/pcre2_find_bracket.c
Line
Count
Source (jump to first uncovered line)
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2016-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
/* This module contains a single function that scans through a compiled pattern
43
until it finds a capturing bracket with the given number, or, if the number is
44
negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The
45
function is called from pcre2_compile.c and also from pcre2_study.c when
46
finding the minimum matching length. */
47
48
49
#ifdef HAVE_CONFIG_H
50
#include "config.h"
51
#endif
52
53
#include "pcre2_internal.h"
54
55
56
/*************************************************
57
*    Scan compiled regex for specific bracket    *
58
*************************************************/
59
60
/*
61
Arguments:
62
  code        points to start of expression
63
  utf         TRUE in UTF mode
64
  number      the required bracket number or negative to find a lookbehind
65
66
Returns:      pointer to the opcode for the bracket, or NULL if not found
67
*/
68
69
PCRE2_SPTR
70
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
71
264
{
72
264
for (;;)
73
142k
  {
74
142k
  PCRE2_UCHAR c = *code;
75
76
142k
  if (c == OP_END) return NULL;
77
78
  /* XCLASS is used for classes that cannot be represented just by a bit map.
79
  This includes negated single high-valued characters. ECLASS is used for
80
  classes that use set operations internally. CALLOUT_STR is used for
81
  callouts with string arguments. In each case the length in the table is
82
  zero; the actual length is stored in the compiled code. */
83
84
142k
  if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1);
85
142k
  else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
86
87
  /* Handle lookbehind */
88
89
142k
  else if (c == OP_REVERSE || c == OP_VREVERSE)
90
2
    {
91
2
    if (number < 0) return code;
92
2
    code += PRIV(OP_lengths)[c];
93
2
    }
94
95
  /* Handle capturing bracket */
96
97
142k
  else if (c == OP_CBRA || c == OP_SCBRA ||
98
142k
           c == OP_CBRAPOS || c == OP_SCBRAPOS)
99
1.41k
    {
100
1.41k
    int n = (int)GET2(code, 1+LINK_SIZE);
101
1.41k
    if (n == number) return code;
102
1.14k
    code += PRIV(OP_lengths)[c];
103
1.14k
    }
104
105
  /* Otherwise, we can get the item's length from the table, except that for
106
  repeated character types, we have to test for \p and \P, which have an extra
107
  two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
108
  must add in its length. */
109
110
140k
  else
111
140k
    {
112
140k
    switch(c)
113
140k
      {
114
6
      case OP_TYPESTAR:
115
6
      case OP_TYPEMINSTAR:
116
109
      case OP_TYPEPLUS:
117
126
      case OP_TYPEMINPLUS:
118
491
      case OP_TYPEQUERY:
119
491
      case OP_TYPEMINQUERY:
120
491
      case OP_TYPEPOSSTAR:
121
542
      case OP_TYPEPOSPLUS:
122
824
      case OP_TYPEPOSQUERY:
123
824
      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
124
824
      break;
125
126
0
      case OP_TYPEUPTO:
127
0
      case OP_TYPEMINUPTO:
128
0
      case OP_TYPEEXACT:
129
0
      case OP_TYPEPOSUPTO:
130
0
      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
131
0
        code += 2;
132
0
      break;
133
134
0
      case OP_MARK:
135
0
      case OP_COMMIT_ARG:
136
0
      case OP_PRUNE_ARG:
137
0
      case OP_SKIP_ARG:
138
0
      case OP_THEN_ARG:
139
0
      code += code[1];
140
0
      break;
141
140k
      }
142
143
    /* Add in the fixed length from the table */
144
145
140k
    code += PRIV(OP_lengths)[c];
146
147
  /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
148
  followed by a multi-byte character. The length in the table is a minimum, so
149
  we have to arrange to skip the extra bytes. */
150
151
140k
#ifdef MAYBE_UTF_MULTI
152
140k
    if (utf) switch(c)
153
0
      {
154
0
      case OP_CHAR:
155
0
      case OP_CHARI:
156
0
      case OP_NOT:
157
0
      case OP_NOTI:
158
0
      case OP_EXACT:
159
0
      case OP_EXACTI:
160
0
      case OP_NOTEXACT:
161
0
      case OP_NOTEXACTI:
162
0
      case OP_UPTO:
163
0
      case OP_UPTOI:
164
0
      case OP_NOTUPTO:
165
0
      case OP_NOTUPTOI:
166
0
      case OP_MINUPTO:
167
0
      case OP_MINUPTOI:
168
0
      case OP_NOTMINUPTO:
169
0
      case OP_NOTMINUPTOI:
170
0
      case OP_POSUPTO:
171
0
      case OP_POSUPTOI:
172
0
      case OP_NOTPOSUPTO:
173
0
      case OP_NOTPOSUPTOI:
174
0
      case OP_STAR:
175
0
      case OP_STARI:
176
0
      case OP_NOTSTAR:
177
0
      case OP_NOTSTARI:
178
0
      case OP_MINSTAR:
179
0
      case OP_MINSTARI:
180
0
      case OP_NOTMINSTAR:
181
0
      case OP_NOTMINSTARI:
182
0
      case OP_POSSTAR:
183
0
      case OP_POSSTARI:
184
0
      case OP_NOTPOSSTAR:
185
0
      case OP_NOTPOSSTARI:
186
0
      case OP_PLUS:
187
0
      case OP_PLUSI:
188
0
      case OP_NOTPLUS:
189
0
      case OP_NOTPLUSI:
190
0
      case OP_MINPLUS:
191
0
      case OP_MINPLUSI:
192
0
      case OP_NOTMINPLUS:
193
0
      case OP_NOTMINPLUSI:
194
0
      case OP_POSPLUS:
195
0
      case OP_POSPLUSI:
196
0
      case OP_NOTPOSPLUS:
197
0
      case OP_NOTPOSPLUSI:
198
0
      case OP_QUERY:
199
0
      case OP_QUERYI:
200
0
      case OP_NOTQUERY:
201
0
      case OP_NOTQUERYI:
202
0
      case OP_MINQUERY:
203
0
      case OP_MINQUERYI:
204
0
      case OP_NOTMINQUERY:
205
0
      case OP_NOTMINQUERYI:
206
0
      case OP_POSQUERY:
207
0
      case OP_POSQUERYI:
208
0
      case OP_NOTPOSQUERY:
209
0
      case OP_NOTPOSQUERYI:
210
0
      if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
211
0
      break;
212
0
      }
213
#else
214
    (void)(utf);  /* Keep compiler happy by referencing function argument */
215
#endif  /* MAYBE_UTF_MULTI */
216
140k
    }
217
142k
  }
218
264
}
219
220
/* End of pcre2_find_bracket.c */