Coverage Report

Created: 2025-07-23 06:33

/src/php-src/ext/pcre/pcre2lib/pcre2_find_bracket.c
Line
Count
Source (jump to first uncovered line)
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2016-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
/* This module contains a single function that scans through a compiled pattern
43
until it finds a capturing bracket with the given number, or, if the number is
44
negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The
45
function is called from pcre2_compile.c and also from pcre2_study.c when
46
finding the minimum matching length. */
47
48
49
#ifdef HAVE_CONFIG_H
50
#include "config.h"
51
#endif
52
53
#include "pcre2_internal.h"
54
55
56
/*************************************************
57
*    Scan compiled regex for specific bracket    *
58
*************************************************/
59
60
/*
61
Arguments:
62
  code        points to start of expression
63
  utf         TRUE in UTF mode
64
  number      the required bracket number or negative to find a lookbehind
65
66
Returns:      pointer to the opcode for the bracket, or NULL if not found
67
*/
68
69
PCRE2_SPTR
70
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
71
437
{
72
437
for (;;)
73
915k
  {
74
915k
  PCRE2_UCHAR c = *code;
75
76
915k
  if (c == OP_END) return NULL;
77
78
  /* XCLASS is used for classes that cannot be represented just by a bit map.
79
  This includes negated single high-valued characters. ECLASS is used for
80
  classes that use set operations internally. CALLOUT_STR is used for
81
  callouts with string arguments. In each case the length in the table is
82
  zero; the actual length is stored in the compiled code. */
83
84
915k
  if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1);
85
915k
  else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
86
87
  /* Handle lookbehind */
88
89
915k
  else if (c == OP_REVERSE || c == OP_VREVERSE)
90
0
    {
91
0
    if (number < 0) return code;
92
0
    code += PRIV(OP_lengths)[c];
93
0
    }
94
95
  /* Handle capturing bracket */
96
97
915k
  else if (c == OP_CBRA || c == OP_SCBRA ||
98
915k
           c == OP_CBRAPOS || c == OP_SCBRAPOS)
99
2.05k
    {
100
2.05k
    int n = (int)GET2(code, 1+LINK_SIZE);
101
2.05k
    if (n == number) return code;
102
1.61k
    code += PRIV(OP_lengths)[c];
103
1.61k
    }
104
105
  /* Otherwise, we can get the item's length from the table, except that for
106
  repeated character types, we have to test for \p and \P, which have an extra
107
  two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
108
  must add in its length. */
109
110
913k
  else
111
913k
    {
112
913k
    switch(c)
113
913k
      {
114
5
      case OP_TYPESTAR:
115
5
      case OP_TYPEMINSTAR:
116
114
      case OP_TYPEPLUS:
117
134
      case OP_TYPEMINPLUS:
118
558
      case OP_TYPEQUERY:
119
558
      case OP_TYPEMINQUERY:
120
558
      case OP_TYPEPOSSTAR:
121
559
      case OP_TYPEPOSPLUS:
122
990
      case OP_TYPEPOSQUERY:
123
990
      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
124
990
      break;
125
126
0
      case OP_TYPEUPTO:
127
0
      case OP_TYPEMINUPTO:
128
0
      case OP_TYPEEXACT:
129
0
      case OP_TYPEPOSUPTO:
130
0
      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
131
0
        code += 2;
132
0
      break;
133
134
0
      case OP_MARK:
135
0
      case OP_COMMIT_ARG:
136
0
      case OP_PRUNE_ARG:
137
0
      case OP_SKIP_ARG:
138
0
      case OP_THEN_ARG:
139
0
      code += code[1];
140
0
      break;
141
913k
      }
142
143
    /* Add in the fixed length from the table */
144
145
913k
    code += PRIV(OP_lengths)[c];
146
147
  /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
148
  followed by a multi-byte character. The length in the table is a minimum, so
149
  we have to arrange to skip the extra bytes. */
150
151
913k
#ifdef MAYBE_UTF_MULTI
152
913k
    if (utf) switch(c)
153
0
      {
154
0
      case OP_CHAR:
155
0
      case OP_CHARI:
156
0
      case OP_NOT:
157
0
      case OP_NOTI:
158
0
      case OP_EXACT:
159
0
      case OP_EXACTI:
160
0
      case OP_NOTEXACT:
161
0
      case OP_NOTEXACTI:
162
0
      case OP_UPTO:
163
0
      case OP_UPTOI:
164
0
      case OP_NOTUPTO:
165
0
      case OP_NOTUPTOI:
166
0
      case OP_MINUPTO:
167
0
      case OP_MINUPTOI:
168
0
      case OP_NOTMINUPTO:
169
0
      case OP_NOTMINUPTOI:
170
0
      case OP_POSUPTO:
171
0
      case OP_POSUPTOI:
172
0
      case OP_NOTPOSUPTO:
173
0
      case OP_NOTPOSUPTOI:
174
0
      case OP_STAR:
175
0
      case OP_STARI:
176
0
      case OP_NOTSTAR:
177
0
      case OP_NOTSTARI:
178
0
      case OP_MINSTAR:
179
0
      case OP_MINSTARI:
180
0
      case OP_NOTMINSTAR:
181
0
      case OP_NOTMINSTARI:
182
0
      case OP_POSSTAR:
183
0
      case OP_POSSTARI:
184
0
      case OP_NOTPOSSTAR:
185
0
      case OP_NOTPOSSTARI:
186
0
      case OP_PLUS:
187
0
      case OP_PLUSI:
188
0
      case OP_NOTPLUS:
189
0
      case OP_NOTPLUSI:
190
0
      case OP_MINPLUS:
191
0
      case OP_MINPLUSI:
192
0
      case OP_NOTMINPLUS:
193
0
      case OP_NOTMINPLUSI:
194
0
      case OP_POSPLUS:
195
0
      case OP_POSPLUSI:
196
0
      case OP_NOTPOSPLUS:
197
0
      case OP_NOTPOSPLUSI:
198
0
      case OP_QUERY:
199
0
      case OP_QUERYI:
200
0
      case OP_NOTQUERY:
201
0
      case OP_NOTQUERYI:
202
0
      case OP_MINQUERY:
203
0
      case OP_MINQUERYI:
204
0
      case OP_NOTMINQUERY:
205
0
      case OP_NOTMINQUERYI:
206
0
      case OP_POSQUERY:
207
0
      case OP_POSQUERYI:
208
0
      case OP_NOTPOSQUERY:
209
0
      case OP_NOTPOSQUERYI:
210
0
      if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
211
0
      break;
212
0
      }
213
#else
214
    (void)(utf);  /* Keep compiler happy by referencing function argument */
215
#endif  /* MAYBE_UTF_MULTI */
216
913k
    }
217
915k
  }
218
437
}
219
220
/* End of pcre2_find_bracket.c */