/src/php-src/ext/pcre/pcre2lib/pcre2_find_bracket.c
Line | Count | Source (jump to first uncovered line) |
1 | | /************************************************* |
2 | | * Perl-Compatible Regular Expressions * |
3 | | *************************************************/ |
4 | | |
5 | | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | | and semantics are as close as possible to those of the Perl 5 language. |
7 | | |
8 | | Written by Philip Hazel |
9 | | Original API code Copyright (c) 1997-2012 University of Cambridge |
10 | | New API code Copyright (c) 2016-2024 University of Cambridge |
11 | | |
12 | | ----------------------------------------------------------------------------- |
13 | | Redistribution and use in source and binary forms, with or without |
14 | | modification, are permitted provided that the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the University of Cambridge nor the names of its |
24 | | contributors may be used to endorse or promote products derived from |
25 | | this software without specific prior written permission. |
26 | | |
27 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
28 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
29 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
30 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
31 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
32 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
33 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
34 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
35 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
36 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
37 | | POSSIBILITY OF SUCH DAMAGE. |
38 | | ----------------------------------------------------------------------------- |
39 | | */ |
40 | | |
41 | | |
42 | | /* This module contains a single function that scans through a compiled pattern |
43 | | until it finds a capturing bracket with the given number, or, if the number is |
44 | | negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The |
45 | | function is called from pcre2_compile.c and also from pcre2_study.c when |
46 | | finding the minimum matching length. */ |
47 | | |
48 | | |
49 | | #ifdef HAVE_CONFIG_H |
50 | | #include "config.h" |
51 | | #endif |
52 | | |
53 | | #include "pcre2_internal.h" |
54 | | |
55 | | |
56 | | /************************************************* |
57 | | * Scan compiled regex for specific bracket * |
58 | | *************************************************/ |
59 | | |
60 | | /* |
61 | | Arguments: |
62 | | code points to start of expression |
63 | | utf TRUE in UTF mode |
64 | | number the required bracket number or negative to find a lookbehind |
65 | | |
66 | | Returns: pointer to the opcode for the bracket, or NULL if not found |
67 | | */ |
68 | | |
69 | | PCRE2_SPTR |
70 | | PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number) |
71 | 264 | { |
72 | 264 | for (;;) |
73 | 142k | { |
74 | 142k | PCRE2_UCHAR c = *code; |
75 | | |
76 | 142k | if (c == OP_END) return NULL; |
77 | | |
78 | | /* XCLASS is used for classes that cannot be represented just by a bit map. |
79 | | This includes negated single high-valued characters. ECLASS is used for |
80 | | classes that use set operations internally. CALLOUT_STR is used for |
81 | | callouts with string arguments. In each case the length in the table is |
82 | | zero; the actual length is stored in the compiled code. */ |
83 | | |
84 | 142k | if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1); |
85 | 142k | else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); |
86 | | |
87 | | /* Handle lookbehind */ |
88 | | |
89 | 142k | else if (c == OP_REVERSE || c == OP_VREVERSE) |
90 | 2 | { |
91 | 2 | if (number < 0) return code; |
92 | 2 | code += PRIV(OP_lengths)[c]; |
93 | 2 | } |
94 | | |
95 | | /* Handle capturing bracket */ |
96 | | |
97 | 142k | else if (c == OP_CBRA || c == OP_SCBRA || |
98 | 142k | c == OP_CBRAPOS || c == OP_SCBRAPOS) |
99 | 1.41k | { |
100 | 1.41k | int n = (int)GET2(code, 1+LINK_SIZE); |
101 | 1.41k | if (n == number) return code; |
102 | 1.14k | code += PRIV(OP_lengths)[c]; |
103 | 1.14k | } |
104 | | |
105 | | /* Otherwise, we can get the item's length from the table, except that for |
106 | | repeated character types, we have to test for \p and \P, which have an extra |
107 | | two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we |
108 | | must add in its length. */ |
109 | | |
110 | 140k | else |
111 | 140k | { |
112 | 140k | switch(c) |
113 | 140k | { |
114 | 6 | case OP_TYPESTAR: |
115 | 6 | case OP_TYPEMINSTAR: |
116 | 109 | case OP_TYPEPLUS: |
117 | 126 | case OP_TYPEMINPLUS: |
118 | 491 | case OP_TYPEQUERY: |
119 | 491 | case OP_TYPEMINQUERY: |
120 | 491 | case OP_TYPEPOSSTAR: |
121 | 542 | case OP_TYPEPOSPLUS: |
122 | 824 | case OP_TYPEPOSQUERY: |
123 | 824 | if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; |
124 | 824 | break; |
125 | | |
126 | 0 | case OP_TYPEUPTO: |
127 | 0 | case OP_TYPEMINUPTO: |
128 | 0 | case OP_TYPEEXACT: |
129 | 0 | case OP_TYPEPOSUPTO: |
130 | 0 | if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) |
131 | 0 | code += 2; |
132 | 0 | break; |
133 | | |
134 | 0 | case OP_MARK: |
135 | 0 | case OP_COMMIT_ARG: |
136 | 0 | case OP_PRUNE_ARG: |
137 | 0 | case OP_SKIP_ARG: |
138 | 0 | case OP_THEN_ARG: |
139 | 0 | code += code[1]; |
140 | 0 | break; |
141 | 140k | } |
142 | | |
143 | | /* Add in the fixed length from the table */ |
144 | | |
145 | 140k | code += PRIV(OP_lengths)[c]; |
146 | | |
147 | | /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be |
148 | | followed by a multi-byte character. The length in the table is a minimum, so |
149 | | we have to arrange to skip the extra bytes. */ |
150 | | |
151 | 140k | #ifdef MAYBE_UTF_MULTI |
152 | 140k | if (utf) switch(c) |
153 | 0 | { |
154 | 0 | case OP_CHAR: |
155 | 0 | case OP_CHARI: |
156 | 0 | case OP_NOT: |
157 | 0 | case OP_NOTI: |
158 | 0 | case OP_EXACT: |
159 | 0 | case OP_EXACTI: |
160 | 0 | case OP_NOTEXACT: |
161 | 0 | case OP_NOTEXACTI: |
162 | 0 | case OP_UPTO: |
163 | 0 | case OP_UPTOI: |
164 | 0 | case OP_NOTUPTO: |
165 | 0 | case OP_NOTUPTOI: |
166 | 0 | case OP_MINUPTO: |
167 | 0 | case OP_MINUPTOI: |
168 | 0 | case OP_NOTMINUPTO: |
169 | 0 | case OP_NOTMINUPTOI: |
170 | 0 | case OP_POSUPTO: |
171 | 0 | case OP_POSUPTOI: |
172 | 0 | case OP_NOTPOSUPTO: |
173 | 0 | case OP_NOTPOSUPTOI: |
174 | 0 | case OP_STAR: |
175 | 0 | case OP_STARI: |
176 | 0 | case OP_NOTSTAR: |
177 | 0 | case OP_NOTSTARI: |
178 | 0 | case OP_MINSTAR: |
179 | 0 | case OP_MINSTARI: |
180 | 0 | case OP_NOTMINSTAR: |
181 | 0 | case OP_NOTMINSTARI: |
182 | 0 | case OP_POSSTAR: |
183 | 0 | case OP_POSSTARI: |
184 | 0 | case OP_NOTPOSSTAR: |
185 | 0 | case OP_NOTPOSSTARI: |
186 | 0 | case OP_PLUS: |
187 | 0 | case OP_PLUSI: |
188 | 0 | case OP_NOTPLUS: |
189 | 0 | case OP_NOTPLUSI: |
190 | 0 | case OP_MINPLUS: |
191 | 0 | case OP_MINPLUSI: |
192 | 0 | case OP_NOTMINPLUS: |
193 | 0 | case OP_NOTMINPLUSI: |
194 | 0 | case OP_POSPLUS: |
195 | 0 | case OP_POSPLUSI: |
196 | 0 | case OP_NOTPOSPLUS: |
197 | 0 | case OP_NOTPOSPLUSI: |
198 | 0 | case OP_QUERY: |
199 | 0 | case OP_QUERYI: |
200 | 0 | case OP_NOTQUERY: |
201 | 0 | case OP_NOTQUERYI: |
202 | 0 | case OP_MINQUERY: |
203 | 0 | case OP_MINQUERYI: |
204 | 0 | case OP_NOTMINQUERY: |
205 | 0 | case OP_NOTMINQUERYI: |
206 | 0 | case OP_POSQUERY: |
207 | 0 | case OP_POSQUERYI: |
208 | 0 | case OP_NOTPOSQUERY: |
209 | 0 | case OP_NOTPOSQUERYI: |
210 | 0 | if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); |
211 | 0 | break; |
212 | 0 | } |
213 | | #else |
214 | | (void)(utf); /* Keep compiler happy by referencing function argument */ |
215 | | #endif /* MAYBE_UTF_MULTI */ |
216 | 140k | } |
217 | 142k | } |
218 | 264 | } |
219 | | |
220 | | /* End of pcre2_find_bracket.c */ |