/src/Python-3.8.3/Modules/sre_lib.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Secret Labs' Regular Expression Engine |
3 | | * |
4 | | * regular expression matching engine |
5 | | * |
6 | | * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. |
7 | | * |
8 | | * See the _sre.c file for information on usage and redistribution. |
9 | | */ |
10 | | |
11 | | /* String matching engine */ |
12 | | |
13 | | /* This file is included three times, with different character settings */ |
14 | | |
15 | | LOCAL(int) |
16 | | SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) |
17 | 3 | { |
18 | | /* check if pointer is at given position */ |
19 | | |
20 | 3 | Py_ssize_t thisp, thatp; |
21 | | |
22 | 3 | switch (at) { |
23 | | |
24 | 3 | case SRE_AT_BEGINNING: |
25 | 3 | case SRE_AT_BEGINNING_STRING: |
26 | 3 | return ((void*) ptr == state->beginning); |
27 | | |
28 | 0 | case SRE_AT_BEGINNING_LINE: |
29 | 0 | return ((void*) ptr == state->beginning || |
30 | 0 | SRE_IS_LINEBREAK((int) ptr[-1])); |
31 | | |
32 | 0 | case SRE_AT_END: |
33 | 0 | return (((SRE_CHAR *)state->end - ptr == 1 && |
34 | 0 | SRE_IS_LINEBREAK((int) ptr[0])) || |
35 | 0 | ((void*) ptr == state->end)); |
36 | | |
37 | 0 | case SRE_AT_END_LINE: |
38 | 0 | return ((void*) ptr == state->end || |
39 | 0 | SRE_IS_LINEBREAK((int) ptr[0])); |
40 | | |
41 | 0 | case SRE_AT_END_STRING: |
42 | 0 | return ((void*) ptr == state->end); |
43 | | |
44 | 0 | case SRE_AT_BOUNDARY: |
45 | 0 | if (state->beginning == state->end) |
46 | 0 | return 0; |
47 | 0 | thatp = ((void*) ptr > state->beginning) ? |
48 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; |
49 | 0 | thisp = ((void*) ptr < state->end) ? |
50 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; |
51 | 0 | return thisp != thatp; |
52 | | |
53 | 0 | case SRE_AT_NON_BOUNDARY: |
54 | 0 | if (state->beginning == state->end) |
55 | 0 | return 0; |
56 | 0 | thatp = ((void*) ptr > state->beginning) ? |
57 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; |
58 | 0 | thisp = ((void*) ptr < state->end) ? |
59 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; |
60 | 0 | return thisp == thatp; |
61 | | |
62 | 0 | case SRE_AT_LOC_BOUNDARY: |
63 | 0 | if (state->beginning == state->end) |
64 | 0 | return 0; |
65 | 0 | thatp = ((void*) ptr > state->beginning) ? |
66 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; |
67 | 0 | thisp = ((void*) ptr < state->end) ? |
68 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; |
69 | 0 | return thisp != thatp; |
70 | | |
71 | 0 | case SRE_AT_LOC_NON_BOUNDARY: |
72 | 0 | if (state->beginning == state->end) |
73 | 0 | return 0; |
74 | 0 | thatp = ((void*) ptr > state->beginning) ? |
75 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; |
76 | 0 | thisp = ((void*) ptr < state->end) ? |
77 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; |
78 | 0 | return thisp == thatp; |
79 | | |
80 | 0 | case SRE_AT_UNI_BOUNDARY: |
81 | 0 | if (state->beginning == state->end) |
82 | 0 | return 0; |
83 | 0 | thatp = ((void*) ptr > state->beginning) ? |
84 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; |
85 | 0 | thisp = ((void*) ptr < state->end) ? |
86 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; |
87 | 0 | return thisp != thatp; |
88 | | |
89 | 0 | case SRE_AT_UNI_NON_BOUNDARY: |
90 | 0 | if (state->beginning == state->end) |
91 | 0 | return 0; |
92 | 0 | thatp = ((void*) ptr > state->beginning) ? |
93 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; |
94 | 0 | thisp = ((void*) ptr < state->end) ? |
95 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; |
96 | 0 | return thisp == thatp; |
97 | | |
98 | 3 | } |
99 | | |
100 | 0 | return 0; |
101 | 3 | } Line | Count | Source | 17 | 3 | { | 18 | | /* check if pointer is at given position */ | 19 | | | 20 | 3 | Py_ssize_t thisp, thatp; | 21 | | | 22 | 3 | switch (at) { | 23 | | | 24 | 3 | case SRE_AT_BEGINNING: | 25 | 3 | case SRE_AT_BEGINNING_STRING: | 26 | 3 | return ((void*) ptr == state->beginning); | 27 | | | 28 | 0 | case SRE_AT_BEGINNING_LINE: | 29 | 0 | return ((void*) ptr == state->beginning || | 30 | 0 | SRE_IS_LINEBREAK((int) ptr[-1])); | 31 | | | 32 | 0 | case SRE_AT_END: | 33 | 0 | return (((SRE_CHAR *)state->end - ptr == 1 && | 34 | 0 | SRE_IS_LINEBREAK((int) ptr[0])) || | 35 | 0 | ((void*) ptr == state->end)); | 36 | | | 37 | 0 | case SRE_AT_END_LINE: | 38 | 0 | return ((void*) ptr == state->end || | 39 | 0 | SRE_IS_LINEBREAK((int) ptr[0])); | 40 | | | 41 | 0 | case SRE_AT_END_STRING: | 42 | 0 | return ((void*) ptr == state->end); | 43 | | | 44 | 0 | case SRE_AT_BOUNDARY: | 45 | 0 | if (state->beginning == state->end) | 46 | 0 | return 0; | 47 | 0 | thatp = ((void*) ptr > state->beginning) ? | 48 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 49 | 0 | thisp = ((void*) ptr < state->end) ? | 50 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 51 | 0 | return thisp != thatp; | 52 | | | 53 | 0 | case SRE_AT_NON_BOUNDARY: | 54 | 0 | if (state->beginning == state->end) | 55 | 0 | return 0; | 56 | 0 | thatp = ((void*) ptr > state->beginning) ? | 57 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 58 | 0 | thisp = ((void*) ptr < state->end) ? | 59 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 60 | 0 | return thisp == thatp; | 61 | | | 62 | 0 | case SRE_AT_LOC_BOUNDARY: | 63 | 0 | if (state->beginning == state->end) | 64 | 0 | return 0; | 65 | 0 | thatp = ((void*) ptr > state->beginning) ? | 66 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 67 | 0 | thisp = ((void*) ptr < state->end) ? | 68 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 69 | 0 | return thisp != thatp; | 70 | | | 71 | 0 | case SRE_AT_LOC_NON_BOUNDARY: | 72 | 0 | if (state->beginning == state->end) | 73 | 0 | return 0; | 74 | 0 | thatp = ((void*) ptr > state->beginning) ? | 75 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 76 | 0 | thisp = ((void*) ptr < state->end) ? | 77 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 78 | 0 | return thisp == thatp; | 79 | | | 80 | 0 | case SRE_AT_UNI_BOUNDARY: | 81 | 0 | if (state->beginning == state->end) | 82 | 0 | return 0; | 83 | 0 | thatp = ((void*) ptr > state->beginning) ? | 84 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 85 | 0 | thisp = ((void*) ptr < state->end) ? | 86 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 87 | 0 | return thisp != thatp; | 88 | | | 89 | 0 | case SRE_AT_UNI_NON_BOUNDARY: | 90 | 0 | if (state->beginning == state->end) | 91 | 0 | return 0; | 92 | 0 | thatp = ((void*) ptr > state->beginning) ? | 93 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 94 | 0 | thisp = ((void*) ptr < state->end) ? | 95 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 96 | 0 | return thisp == thatp; | 97 | | | 98 | 3 | } | 99 | | | 100 | 0 | return 0; | 101 | 3 | } |
Unexecuted instantiation: _sre.c:sre_ucs2_at Unexecuted instantiation: _sre.c:sre_ucs4_at |
102 | | |
103 | | LOCAL(int) |
104 | | SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) |
105 | 29 | { |
106 | | /* check if character is a member of the given set */ |
107 | | |
108 | 29 | int ok = 1; |
109 | | |
110 | 38 | for (;;) { |
111 | 38 | switch (*set++) { |
112 | | |
113 | 9 | case SRE_OP_FAILURE: |
114 | 9 | return !ok; |
115 | | |
116 | 0 | case SRE_OP_LITERAL: |
117 | | /* <LITERAL> <code> */ |
118 | 0 | if (ch == set[0]) |
119 | 0 | return ok; |
120 | 0 | set++; |
121 | 0 | break; |
122 | | |
123 | 20 | case SRE_OP_CATEGORY: |
124 | | /* <CATEGORY> <code> */ |
125 | 20 | if (sre_category(set[0], (int) ch)) |
126 | 16 | return ok; |
127 | 4 | set++; |
128 | 4 | break; |
129 | | |
130 | 7 | case SRE_OP_CHARSET: |
131 | | /* <CHARSET> <bitmap> */ |
132 | 7 | if (ch < 256 && |
133 | 7 | (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) |
134 | 2 | return ok; |
135 | 5 | set += 256/SRE_CODE_BITS; |
136 | 5 | break; |
137 | | |
138 | 0 | case SRE_OP_RANGE: |
139 | | /* <RANGE> <lower> <upper> */ |
140 | 0 | if (set[0] <= ch && ch <= set[1]) |
141 | 0 | return ok; |
142 | 0 | set += 2; |
143 | 0 | break; |
144 | | |
145 | 0 | case SRE_OP_RANGE_UNI_IGNORE: |
146 | | /* <RANGE_UNI_IGNORE> <lower> <upper> */ |
147 | 0 | { |
148 | 0 | SRE_CODE uch; |
149 | | /* ch is already lower cased */ |
150 | 0 | if (set[0] <= ch && ch <= set[1]) |
151 | 0 | return ok; |
152 | 0 | uch = sre_upper_unicode(ch); |
153 | 0 | if (set[0] <= uch && uch <= set[1]) |
154 | 0 | return ok; |
155 | 0 | set += 2; |
156 | 0 | break; |
157 | 0 | } |
158 | | |
159 | 0 | case SRE_OP_NEGATE: |
160 | 0 | ok = !ok; |
161 | 0 | break; |
162 | | |
163 | 2 | case SRE_OP_BIGCHARSET: |
164 | | /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ |
165 | 2 | { |
166 | 2 | Py_ssize_t count, block; |
167 | 2 | count = *(set++); |
168 | | |
169 | 2 | if (ch < 0x10000u) |
170 | 2 | block = ((unsigned char*)set)[ch >> 8]; |
171 | 0 | else |
172 | 0 | block = -1; |
173 | 2 | set += 256/sizeof(SRE_CODE); |
174 | 2 | if (block >=0 && |
175 | 2 | (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & |
176 | 2 | (1u << (ch & (SRE_CODE_BITS-1))))) |
177 | 2 | return ok; |
178 | 0 | set += count * (256/SRE_CODE_BITS); |
179 | 0 | break; |
180 | 2 | } |
181 | | |
182 | 0 | default: |
183 | | /* internal error -- there's not much we can do about it |
184 | | here, so let's just pretend it didn't match... */ |
185 | 0 | return 0; |
186 | 38 | } |
187 | 38 | } |
188 | 29 | } Line | Count | Source | 105 | 29 | { | 106 | | /* check if character is a member of the given set */ | 107 | | | 108 | 29 | int ok = 1; | 109 | | | 110 | 38 | for (;;) { | 111 | 38 | switch (*set++) { | 112 | | | 113 | 9 | case SRE_OP_FAILURE: | 114 | 9 | return !ok; | 115 | | | 116 | 0 | case SRE_OP_LITERAL: | 117 | | /* <LITERAL> <code> */ | 118 | 0 | if (ch == set[0]) | 119 | 0 | return ok; | 120 | 0 | set++; | 121 | 0 | break; | 122 | | | 123 | 20 | case SRE_OP_CATEGORY: | 124 | | /* <CATEGORY> <code> */ | 125 | 20 | if (sre_category(set[0], (int) ch)) | 126 | 16 | return ok; | 127 | 4 | set++; | 128 | 4 | break; | 129 | | | 130 | 7 | case SRE_OP_CHARSET: | 131 | | /* <CHARSET> <bitmap> */ | 132 | 7 | if (ch < 256 && | 133 | 7 | (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) | 134 | 2 | return ok; | 135 | 5 | set += 256/SRE_CODE_BITS; | 136 | 5 | break; | 137 | | | 138 | 0 | case SRE_OP_RANGE: | 139 | | /* <RANGE> <lower> <upper> */ | 140 | 0 | if (set[0] <= ch && ch <= set[1]) | 141 | 0 | return ok; | 142 | 0 | set += 2; | 143 | 0 | break; | 144 | | | 145 | 0 | case SRE_OP_RANGE_UNI_IGNORE: | 146 | | /* <RANGE_UNI_IGNORE> <lower> <upper> */ | 147 | 0 | { | 148 | 0 | SRE_CODE uch; | 149 | | /* ch is already lower cased */ | 150 | 0 | if (set[0] <= ch && ch <= set[1]) | 151 | 0 | return ok; | 152 | 0 | uch = sre_upper_unicode(ch); | 153 | 0 | if (set[0] <= uch && uch <= set[1]) | 154 | 0 | return ok; | 155 | 0 | set += 2; | 156 | 0 | break; | 157 | 0 | } | 158 | | | 159 | 0 | case SRE_OP_NEGATE: | 160 | 0 | ok = !ok; | 161 | 0 | break; | 162 | | | 163 | 2 | case SRE_OP_BIGCHARSET: | 164 | | /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ | 165 | 2 | { | 166 | 2 | Py_ssize_t count, block; | 167 | 2 | count = *(set++); | 168 | | | 169 | 2 | if (ch < 0x10000u) | 170 | 2 | block = ((unsigned char*)set)[ch >> 8]; | 171 | 0 | else | 172 | 0 | block = -1; | 173 | 2 | set += 256/sizeof(SRE_CODE); | 174 | 2 | if (block >=0 && | 175 | 2 | (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & | 176 | 2 | (1u << (ch & (SRE_CODE_BITS-1))))) | 177 | 2 | return ok; | 178 | 0 | set += count * (256/SRE_CODE_BITS); | 179 | 0 | break; | 180 | 2 | } | 181 | | | 182 | 0 | default: | 183 | | /* internal error -- there's not much we can do about it | 184 | | here, so let's just pretend it didn't match... */ | 185 | 0 | return 0; | 186 | 38 | } | 187 | 38 | } | 188 | 29 | } |
Unexecuted instantiation: _sre.c:sre_ucs2_charset Unexecuted instantiation: _sre.c:sre_ucs4_charset |
189 | | |
190 | | LOCAL(int) |
191 | | SRE(charset_loc_ignore)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) |
192 | 0 | { |
193 | 0 | SRE_CODE lo, up; |
194 | 0 | lo = sre_lower_locale(ch); |
195 | 0 | if (SRE(charset)(state, set, lo)) |
196 | 0 | return 1; |
197 | | |
198 | 0 | up = sre_upper_locale(ch); |
199 | 0 | return up != lo && SRE(charset)(state, set, up); |
200 | 0 | } Unexecuted instantiation: _sre.c:sre_ucs1_charset_loc_ignore Unexecuted instantiation: _sre.c:sre_ucs2_charset_loc_ignore Unexecuted instantiation: _sre.c:sre_ucs4_charset_loc_ignore |
201 | | |
202 | | LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel); |
203 | | |
204 | | LOCAL(Py_ssize_t) |
205 | | SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) |
206 | 122 | { |
207 | 122 | SRE_CODE chr; |
208 | 122 | SRE_CHAR c; |
209 | 122 | SRE_CHAR* ptr = (SRE_CHAR *)state->ptr; |
210 | 122 | SRE_CHAR* end = (SRE_CHAR *)state->end; |
211 | 122 | Py_ssize_t i; |
212 | | |
213 | | /* adjust end */ |
214 | 122 | if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) |
215 | 111 | end = ptr + maxcount; |
216 | | |
217 | 122 | switch (pattern[0]) { |
218 | | |
219 | 9 | case SRE_OP_IN: |
220 | | /* repeated set */ |
221 | 9 | TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); |
222 | 25 | while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) |
223 | 16 | ptr++; |
224 | 9 | break; |
225 | | |
226 | 113 | case SRE_OP_ANY: |
227 | | /* repeated dot wildcard. */ |
228 | 113 | TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); |
229 | 224 | while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) |
230 | 111 | ptr++; |
231 | 113 | break; |
232 | | |
233 | 0 | case SRE_OP_ANY_ALL: |
234 | | /* repeated dot wildcard. skip to the end of the target |
235 | | string, and backtrack from there */ |
236 | 0 | TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); |
237 | 0 | ptr = end; |
238 | 0 | break; |
239 | | |
240 | 0 | case SRE_OP_LITERAL: |
241 | | /* repeated literal */ |
242 | 0 | chr = pattern[1]; |
243 | 0 | TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); |
244 | 0 | c = (SRE_CHAR) chr; |
245 | | #if SIZEOF_SRE_CHAR < 4 |
246 | 0 | if ((SRE_CODE) c != chr) |
247 | 0 | ; /* literal can't match: doesn't fit in char width */ |
248 | 0 | else |
249 | 0 | #endif |
250 | 0 | while (ptr < end && *ptr == c) |
251 | 0 | ptr++; |
252 | 0 | break; |
253 | | |
254 | 0 | case SRE_OP_LITERAL_IGNORE: |
255 | | /* repeated literal */ |
256 | 0 | chr = pattern[1]; |
257 | 0 | TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); |
258 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) |
259 | 0 | ptr++; |
260 | 0 | break; |
261 | | |
262 | 0 | case SRE_OP_LITERAL_UNI_IGNORE: |
263 | | /* repeated literal */ |
264 | 0 | chr = pattern[1]; |
265 | 0 | TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); |
266 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) |
267 | 0 | ptr++; |
268 | 0 | break; |
269 | | |
270 | 0 | case SRE_OP_LITERAL_LOC_IGNORE: |
271 | | /* repeated literal */ |
272 | 0 | chr = pattern[1]; |
273 | 0 | TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); |
274 | 0 | while (ptr < end && char_loc_ignore(chr, *ptr)) |
275 | 0 | ptr++; |
276 | 0 | break; |
277 | | |
278 | 0 | case SRE_OP_NOT_LITERAL: |
279 | | /* repeated non-literal */ |
280 | 0 | chr = pattern[1]; |
281 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); |
282 | 0 | c = (SRE_CHAR) chr; |
283 | | #if SIZEOF_SRE_CHAR < 4 |
284 | 0 | if ((SRE_CODE) c != chr) |
285 | 0 | ptr = end; /* literal can't match: doesn't fit in char width */ |
286 | 0 | else |
287 | 0 | #endif |
288 | 0 | while (ptr < end && *ptr != c) |
289 | 0 | ptr++; |
290 | 0 | break; |
291 | | |
292 | 0 | case SRE_OP_NOT_LITERAL_IGNORE: |
293 | | /* repeated non-literal */ |
294 | 0 | chr = pattern[1]; |
295 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); |
296 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) |
297 | 0 | ptr++; |
298 | 0 | break; |
299 | | |
300 | 0 | case SRE_OP_NOT_LITERAL_UNI_IGNORE: |
301 | | /* repeated non-literal */ |
302 | 0 | chr = pattern[1]; |
303 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); |
304 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) |
305 | 0 | ptr++; |
306 | 0 | break; |
307 | | |
308 | 0 | case SRE_OP_NOT_LITERAL_LOC_IGNORE: |
309 | | /* repeated non-literal */ |
310 | 0 | chr = pattern[1]; |
311 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); |
312 | 0 | while (ptr < end && !char_loc_ignore(chr, *ptr)) |
313 | 0 | ptr++; |
314 | 0 | break; |
315 | | |
316 | 0 | default: |
317 | | /* repeated single character pattern */ |
318 | 0 | TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); |
319 | 0 | while ((SRE_CHAR*) state->ptr < end) { |
320 | 0 | i = SRE(match)(state, pattern, 0); |
321 | 0 | if (i < 0) |
322 | 0 | return i; |
323 | 0 | if (!i) |
324 | 0 | break; |
325 | 0 | } |
326 | 0 | TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr, |
327 | 0 | (SRE_CHAR*) state->ptr - ptr)); |
328 | 0 | return (SRE_CHAR*) state->ptr - ptr; |
329 | 122 | } |
330 | | |
331 | 122 | TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr, |
332 | 122 | ptr - (SRE_CHAR*) state->ptr)); |
333 | 122 | return ptr - (SRE_CHAR*) state->ptr; |
334 | 122 | } Line | Count | Source | 206 | 122 | { | 207 | 122 | SRE_CODE chr; | 208 | 122 | SRE_CHAR c; | 209 | 122 | SRE_CHAR* ptr = (SRE_CHAR *)state->ptr; | 210 | 122 | SRE_CHAR* end = (SRE_CHAR *)state->end; | 211 | 122 | Py_ssize_t i; | 212 | | | 213 | | /* adjust end */ | 214 | 122 | if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) | 215 | 111 | end = ptr + maxcount; | 216 | | | 217 | 122 | switch (pattern[0]) { | 218 | | | 219 | 9 | case SRE_OP_IN: | 220 | | /* repeated set */ | 221 | 9 | TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); | 222 | 25 | while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) | 223 | 16 | ptr++; | 224 | 9 | break; | 225 | | | 226 | 113 | case SRE_OP_ANY: | 227 | | /* repeated dot wildcard. */ | 228 | 113 | TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); | 229 | 224 | while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) | 230 | 111 | ptr++; | 231 | 113 | break; | 232 | | | 233 | 0 | case SRE_OP_ANY_ALL: | 234 | | /* repeated dot wildcard. skip to the end of the target | 235 | | string, and backtrack from there */ | 236 | 0 | TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); | 237 | 0 | ptr = end; | 238 | 0 | break; | 239 | | | 240 | 0 | case SRE_OP_LITERAL: | 241 | | /* repeated literal */ | 242 | 0 | chr = pattern[1]; | 243 | 0 | TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); | 244 | 0 | c = (SRE_CHAR) chr; | 245 | 0 | #if SIZEOF_SRE_CHAR < 4 | 246 | 0 | if ((SRE_CODE) c != chr) | 247 | 0 | ; /* literal can't match: doesn't fit in char width */ | 248 | 0 | else | 249 | 0 | #endif | 250 | 0 | while (ptr < end && *ptr == c) | 251 | 0 | ptr++; | 252 | 0 | break; | 253 | | | 254 | 0 | case SRE_OP_LITERAL_IGNORE: | 255 | | /* repeated literal */ | 256 | 0 | chr = pattern[1]; | 257 | 0 | TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 258 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) | 259 | 0 | ptr++; | 260 | 0 | break; | 261 | | | 262 | 0 | case SRE_OP_LITERAL_UNI_IGNORE: | 263 | | /* repeated literal */ | 264 | 0 | chr = pattern[1]; | 265 | 0 | TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 266 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) | 267 | 0 | ptr++; | 268 | 0 | break; | 269 | | | 270 | 0 | case SRE_OP_LITERAL_LOC_IGNORE: | 271 | | /* repeated literal */ | 272 | 0 | chr = pattern[1]; | 273 | 0 | TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 274 | 0 | while (ptr < end && char_loc_ignore(chr, *ptr)) | 275 | 0 | ptr++; | 276 | 0 | break; | 277 | | | 278 | 0 | case SRE_OP_NOT_LITERAL: | 279 | | /* repeated non-literal */ | 280 | 0 | chr = pattern[1]; | 281 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); | 282 | 0 | c = (SRE_CHAR) chr; | 283 | 0 | #if SIZEOF_SRE_CHAR < 4 | 284 | 0 | if ((SRE_CODE) c != chr) | 285 | 0 | ptr = end; /* literal can't match: doesn't fit in char width */ | 286 | 0 | else | 287 | 0 | #endif | 288 | 0 | while (ptr < end && *ptr != c) | 289 | 0 | ptr++; | 290 | 0 | break; | 291 | | | 292 | 0 | case SRE_OP_NOT_LITERAL_IGNORE: | 293 | | /* repeated non-literal */ | 294 | 0 | chr = pattern[1]; | 295 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 296 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) | 297 | 0 | ptr++; | 298 | 0 | break; | 299 | | | 300 | 0 | case SRE_OP_NOT_LITERAL_UNI_IGNORE: | 301 | | /* repeated non-literal */ | 302 | 0 | chr = pattern[1]; | 303 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 304 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) | 305 | 0 | ptr++; | 306 | 0 | break; | 307 | | | 308 | 0 | case SRE_OP_NOT_LITERAL_LOC_IGNORE: | 309 | | /* repeated non-literal */ | 310 | 0 | chr = pattern[1]; | 311 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 312 | 0 | while (ptr < end && !char_loc_ignore(chr, *ptr)) | 313 | 0 | ptr++; | 314 | 0 | break; | 315 | | | 316 | 0 | default: | 317 | | /* repeated single character pattern */ | 318 | 0 | TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); | 319 | 0 | while ((SRE_CHAR*) state->ptr < end) { | 320 | 0 | i = SRE(match)(state, pattern, 0); | 321 | 0 | if (i < 0) | 322 | 0 | return i; | 323 | 0 | if (!i) | 324 | 0 | break; | 325 | 0 | } | 326 | 0 | TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr, | 327 | 0 | (SRE_CHAR*) state->ptr - ptr)); | 328 | 0 | return (SRE_CHAR*) state->ptr - ptr; | 329 | 122 | } | 330 | | | 331 | 122 | TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr, | 332 | 122 | ptr - (SRE_CHAR*) state->ptr)); | 333 | 122 | return ptr - (SRE_CHAR*) state->ptr; | 334 | 122 | } |
Unexecuted instantiation: _sre.c:sre_ucs2_count Unexecuted instantiation: _sre.c:sre_ucs4_count |
335 | | |
336 | | #if 0 /* not used in this release */ |
337 | | LOCAL(int) |
338 | | SRE(info)(SRE_STATE* state, SRE_CODE* pattern) |
339 | | { |
340 | | /* check if an SRE_OP_INFO block matches at the current position. |
341 | | returns the number of SRE_CODE objects to skip if successful, 0 |
342 | | if no match */ |
343 | | |
344 | | SRE_CHAR* end = (SRE_CHAR*) state->end; |
345 | | SRE_CHAR* ptr = (SRE_CHAR*) state->ptr; |
346 | | Py_ssize_t i; |
347 | | |
348 | | /* check minimal length */ |
349 | | if (pattern[3] && end - ptr < pattern[3]) |
350 | | return 0; |
351 | | |
352 | | /* check known prefix */ |
353 | | if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) { |
354 | | /* <length> <skip> <prefix data> <overlap data> */ |
355 | | for (i = 0; i < pattern[5]; i++) |
356 | | if ((SRE_CODE) ptr[i] != pattern[7 + i]) |
357 | | return 0; |
358 | | return pattern[0] + 2 * pattern[6]; |
359 | | } |
360 | | return pattern[0]; |
361 | | } |
362 | | #endif |
363 | | |
364 | | /* The macros below should be used to protect recursive SRE(match)() |
365 | | * calls that *failed* and do *not* return immediately (IOW, those |
366 | | * that will backtrack). Explaining: |
367 | | * |
368 | | * - Recursive SRE(match)() returned true: that's usually a success |
369 | | * (besides atypical cases like ASSERT_NOT), therefore there's no |
370 | | * reason to restore lastmark; |
371 | | * |
372 | | * - Recursive SRE(match)() returned false but the current SRE(match)() |
373 | | * is returning to the caller: If the current SRE(match)() is the |
374 | | * top function of the recursion, returning false will be a matching |
375 | | * failure, and it doesn't matter where lastmark is pointing to. |
376 | | * If it's *not* the top function, it will be a recursive SRE(match)() |
377 | | * failure by itself, and the calling SRE(match)() will have to deal |
378 | | * with the failure by the same rules explained here (it will restore |
379 | | * lastmark by itself if necessary); |
380 | | * |
381 | | * - Recursive SRE(match)() returned false, and will continue the |
382 | | * outside 'for' loop: must be protected when breaking, since the next |
383 | | * OP could potentially depend on lastmark; |
384 | | * |
385 | | * - Recursive SRE(match)() returned false, and will be called again |
386 | | * inside a local for/while loop: must be protected between each |
387 | | * loop iteration, since the recursive SRE(match)() could do anything, |
388 | | * and could potentially depend on lastmark. |
389 | | * |
390 | | * For more information, check the discussion at SF patch #712900. |
391 | | */ |
392 | | #define LASTMARK_SAVE() \ |
393 | 16 | do { \ |
394 | 16 | ctx->lastmark = state->lastmark; \ |
395 | 16 | ctx->lastindex = state->lastindex; \ |
396 | 16 | } while (0) |
397 | | #define LASTMARK_RESTORE() \ |
398 | 119 | do { \ |
399 | 119 | state->lastmark = ctx->lastmark; \ |
400 | 119 | state->lastindex = ctx->lastindex; \ |
401 | 119 | } while (0) |
402 | | |
403 | 0 | #define RETURN_ERROR(i) do { return i; } while(0) |
404 | 123 | #define RETURN_FAILURE do { ret = 0; goto exit; } while(0) |
405 | 18 | #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) |
406 | | |
407 | | #define RETURN_ON_ERROR(i) \ |
408 | 136 | do { if (i < 0) RETURN_ERROR(i); } while (0) |
409 | | #define RETURN_ON_SUCCESS(i) \ |
410 | 4 | do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) |
411 | | #define RETURN_ON_FAILURE(i) \ |
412 | 0 | do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) |
413 | | |
414 | 141 | #define DATA_STACK_ALLOC(state, type, ptr) \ |
415 | 141 | do { \ |
416 | 141 | alloc_pos = state->data_stack_base; \ |
417 | 141 | TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \ |
418 | 141 | "(%" PY_FORMAT_SIZE_T "d)\n", \ |
419 | 141 | Py_STRINGIFY(type), alloc_pos, sizeof(type))); \ |
420 | 141 | if (sizeof(type) > state->data_stack_size - alloc_pos) { \ |
421 | 6 | int j = data_stack_grow(state, sizeof(type)); \ |
422 | 6 | if (j < 0) return j; \ |
423 | 6 | if (ctx_pos != -1) \ |
424 | 6 | DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ |
425 | 6 | } \ |
426 | 141 | ptr = (type*)(state->data_stack+alloc_pos); \ |
427 | 141 | state->data_stack_base += sizeof(type); \ |
428 | 141 | } while (0) |
429 | | |
430 | 257 | #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ |
431 | 257 | do { \ |
432 | 257 | TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", Py_STRINGIFY(type), pos)); \ |
433 | 257 | ptr = (type*)(state->data_stack+pos); \ |
434 | 257 | } while (0) |
435 | | |
436 | 4 | #define DATA_STACK_PUSH(state, data, size) \ |
437 | 4 | do { \ |
438 | 4 | TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \ |
439 | 4 | "(%" PY_FORMAT_SIZE_T "d)\n", \ |
440 | 4 | data, state->data_stack_base, size)); \ |
441 | 4 | if (size > state->data_stack_size - state->data_stack_base) { \ |
442 | 0 | int j = data_stack_grow(state, size); \ |
443 | 0 | if (j < 0) return j; \ |
444 | 0 | if (ctx_pos != -1) \ |
445 | 0 | DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ |
446 | 0 | } \ |
447 | 4 | memcpy(state->data_stack+state->data_stack_base, data, size); \ |
448 | 4 | state->data_stack_base += size; \ |
449 | 4 | } while (0) |
450 | | |
451 | 4 | #define DATA_STACK_POP(state, data, size, discard) \ |
452 | 4 | do { \ |
453 | 4 | TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \ |
454 | 4 | "(%" PY_FORMAT_SIZE_T "d)\n", \ |
455 | 4 | data, state->data_stack_base-size, size)); \ |
456 | 4 | memcpy(data, state->data_stack+state->data_stack_base-size, size); \ |
457 | 4 | if (discard) \ |
458 | 4 | state->data_stack_base -= size; \ |
459 | 4 | } while (0) |
460 | | |
461 | 141 | #define DATA_STACK_POP_DISCARD(state, size) \ |
462 | 141 | do { \ |
463 | 141 | TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \ |
464 | 141 | "(%" PY_FORMAT_SIZE_T "d)\n", \ |
465 | 141 | state->data_stack_base-size, size)); \ |
466 | 141 | state->data_stack_base -= size; \ |
467 | 141 | } while(0) |
468 | | |
469 | | #define DATA_PUSH(x) \ |
470 | 4 | DATA_STACK_PUSH(state, (x), sizeof(*(x))) |
471 | | #define DATA_POP(x) \ |
472 | 4 | DATA_STACK_POP(state, (x), sizeof(*(x)), 1) |
473 | | #define DATA_POP_DISCARD(x) \ |
474 | 141 | DATA_STACK_POP_DISCARD(state, sizeof(*(x))) |
475 | | #define DATA_ALLOC(t,p) \ |
476 | 141 | DATA_STACK_ALLOC(state, t, p) |
477 | | #define DATA_LOOKUP_AT(t,p,pos) \ |
478 | 257 | DATA_STACK_LOOKUP_AT(state,t,p,pos) |
479 | | |
480 | | #define MARK_PUSH(lastmark) \ |
481 | 6 | do if (lastmark > 0) { \ |
482 | 0 | i = lastmark; /* ctx->lastmark may change if reallocated */ \ |
483 | 0 | DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \ |
484 | 6 | } while (0) |
485 | | #define MARK_POP(lastmark) \ |
486 | 4 | do if (lastmark > 0) { \ |
487 | 0 | DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \ |
488 | 4 | } while (0) |
489 | | #define MARK_POP_KEEP(lastmark) \ |
490 | 2 | do if (lastmark > 0) { \ |
491 | 0 | DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \ |
492 | 2 | } while (0) |
493 | | #define MARK_POP_DISCARD(lastmark) \ |
494 | 2 | do if (lastmark > 0) { \ |
495 | 0 | DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \ |
496 | 2 | } while (0) |
497 | | |
498 | 6 | #define JUMP_NONE 0 |
499 | 0 | #define JUMP_MAX_UNTIL_1 1 |
500 | 4 | #define JUMP_MAX_UNTIL_2 2 |
501 | 4 | #define JUMP_MAX_UNTIL_3 3 |
502 | 0 | #define JUMP_MIN_UNTIL_1 4 |
503 | 0 | #define JUMP_MIN_UNTIL_2 5 |
504 | 0 | #define JUMP_MIN_UNTIL_3 6 |
505 | 4 | #define JUMP_REPEAT 7 |
506 | 4 | #define JUMP_REPEAT_ONE_1 8 |
507 | 3 | #define JUMP_REPEAT_ONE_2 9 |
508 | 113 | #define JUMP_MIN_REPEAT_ONE 10 |
509 | 3 | #define JUMP_BRANCH 11 |
510 | 0 | #define JUMP_ASSERT 12 |
511 | 0 | #define JUMP_ASSERT_NOT 13 |
512 | | |
513 | | #define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \ |
514 | 135 | DATA_ALLOC(SRE(match_context), nextctx); \ |
515 | 135 | nextctx->last_ctx_pos = ctx_pos; \ |
516 | 135 | nextctx->jump = jumpvalue; \ |
517 | 135 | nextctx->pattern = nextpattern; \ |
518 | 135 | nextctx->toplevel = toplevel_; \ |
519 | 135 | ctx_pos = alloc_pos; \ |
520 | 135 | ctx = nextctx; \ |
521 | 135 | goto entrance; \ |
522 | 135 | jumplabel: \ |
523 | 135 | while (0) /* gcc doesn't like labels at end of scopes */ \ |
524 | | |
525 | | #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ |
526 | 135 | DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel) |
527 | | |
528 | | #define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \ |
529 | 0 | DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0) |
530 | | |
531 | | typedef struct { |
532 | | Py_ssize_t last_ctx_pos; |
533 | | Py_ssize_t jump; |
534 | | SRE_CHAR* ptr; |
535 | | SRE_CODE* pattern; |
536 | | Py_ssize_t count; |
537 | | Py_ssize_t lastmark; |
538 | | Py_ssize_t lastindex; |
539 | | union { |
540 | | SRE_CODE chr; |
541 | | SRE_REPEAT* rep; |
542 | | } u; |
543 | | int toplevel; |
544 | | } SRE(match_context); |
545 | | |
546 | | /* check if string matches the given pattern. returns <0 for |
547 | | error, 0 for failure, and 1 for success */ |
548 | | LOCAL(Py_ssize_t) |
549 | | SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel) |
550 | 6 | { |
551 | 6 | SRE_CHAR* end = (SRE_CHAR *)state->end; |
552 | 6 | Py_ssize_t alloc_pos, ctx_pos = -1; |
553 | 6 | Py_ssize_t i, ret = 0; |
554 | 6 | Py_ssize_t jump; |
555 | 6 | unsigned int sigcount=0; |
556 | | |
557 | 6 | SRE(match_context)* ctx; |
558 | 6 | SRE(match_context)* nextctx; |
559 | | |
560 | 6 | TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); |
561 | | |
562 | 6 | DATA_ALLOC(SRE(match_context), ctx); |
563 | 6 | ctx->last_ctx_pos = -1; |
564 | 6 | ctx->jump = JUMP_NONE; |
565 | 6 | ctx->pattern = pattern; |
566 | 6 | ctx->toplevel = toplevel; |
567 | 6 | ctx_pos = alloc_pos; |
568 | | |
569 | 141 | entrance: |
570 | | |
571 | 141 | ctx->ptr = (SRE_CHAR *)state->ptr; |
572 | | |
573 | 141 | if (ctx->pattern[0] == SRE_OP_INFO) { |
574 | | /* optimization info block */ |
575 | | /* <INFO> <1=skip> <2=flags> <3=min> ... */ |
576 | 4 | if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) { |
577 | 0 | TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, " |
578 | 0 | "need %" PY_FORMAT_SIZE_T "d)\n", |
579 | 0 | end - ctx->ptr, (Py_ssize_t) ctx->pattern[3])); |
580 | 0 | RETURN_FAILURE; |
581 | 0 | } |
582 | 4 | ctx->pattern += ctx->pattern[1] + 1; |
583 | 4 | } |
584 | | |
585 | 157 | for (;;) { |
586 | 157 | ++sigcount; |
587 | 157 | if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals()) |
588 | 0 | RETURN_ERROR(SRE_ERROR_INTERRUPTED); |
589 | | |
590 | 157 | switch (*ctx->pattern++) { |
591 | | |
592 | 4 | case SRE_OP_MARK: |
593 | | /* set mark */ |
594 | | /* <MARK> <gid> */ |
595 | 4 | TRACE(("|%p|%p|MARK %d\n", ctx->pattern, |
596 | 4 | ctx->ptr, ctx->pattern[0])); |
597 | 4 | i = ctx->pattern[0]; |
598 | 4 | if (i & 1) |
599 | 0 | state->lastindex = i/2 + 1; |
600 | 4 | if (i > state->lastmark) { |
601 | | /* state->lastmark is the highest valid index in the |
602 | | state->mark array. If it is increased by more than 1, |
603 | | the intervening marks must be set to NULL to signal |
604 | | that these marks have not been encountered. */ |
605 | 4 | Py_ssize_t j = state->lastmark + 1; |
606 | 8 | while (j < i) |
607 | 4 | state->mark[j++] = NULL; |
608 | 4 | state->lastmark = i; |
609 | 4 | } |
610 | 4 | state->mark[i] = ctx->ptr; |
611 | 4 | ctx->pattern++; |
612 | 4 | break; |
613 | | |
614 | 120 | case SRE_OP_LITERAL: |
615 | | /* match literal string */ |
616 | | /* <LITERAL> <code> */ |
617 | 120 | TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern, |
618 | 120 | ctx->ptr, *ctx->pattern)); |
619 | 120 | if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0]) |
620 | 115 | RETURN_FAILURE; |
621 | 5 | ctx->pattern++; |
622 | 5 | ctx->ptr++; |
623 | 5 | break; |
624 | | |
625 | 0 | case SRE_OP_NOT_LITERAL: |
626 | | /* match anything that is not literal character */ |
627 | | /* <NOT_LITERAL> <code> */ |
628 | 0 | TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern, |
629 | 0 | ctx->ptr, *ctx->pattern)); |
630 | 0 | if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0]) |
631 | 0 | RETURN_FAILURE; |
632 | 0 | ctx->pattern++; |
633 | 0 | ctx->ptr++; |
634 | 0 | break; |
635 | | |
636 | 4 | case SRE_OP_SUCCESS: |
637 | | /* end of pattern */ |
638 | 4 | TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); |
639 | 4 | if (ctx->toplevel && |
640 | 4 | ((state->match_all && ctx->ptr != state->end) || |
641 | 2 | (state->must_advance && ctx->ptr == state->start))) |
642 | 0 | { |
643 | 0 | RETURN_FAILURE; |
644 | 0 | } |
645 | 4 | state->ptr = ctx->ptr; |
646 | 4 | RETURN_SUCCESS; |
647 | | |
648 | 3 | case SRE_OP_AT: |
649 | | /* match at given position */ |
650 | | /* <AT> <code> */ |
651 | 3 | TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); |
652 | 3 | if (!SRE(at)(state, ctx->ptr, *ctx->pattern)) |
653 | 0 | RETURN_FAILURE; |
654 | 3 | ctx->pattern++; |
655 | 3 | break; |
656 | | |
657 | 0 | case SRE_OP_CATEGORY: |
658 | | /* match at given category */ |
659 | | /* <CATEGORY> <code> */ |
660 | 0 | TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern, |
661 | 0 | ctx->ptr, *ctx->pattern)); |
662 | 0 | if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0])) |
663 | 0 | RETURN_FAILURE; |
664 | 0 | ctx->pattern++; |
665 | 0 | ctx->ptr++; |
666 | 0 | break; |
667 | | |
668 | 0 | case SRE_OP_ANY: |
669 | | /* match anything (except a newline) */ |
670 | | /* <ANY> */ |
671 | 0 | TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); |
672 | 0 | if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) |
673 | 0 | RETURN_FAILURE; |
674 | 0 | ctx->ptr++; |
675 | 0 | break; |
676 | | |
677 | 0 | case SRE_OP_ANY_ALL: |
678 | | /* match anything */ |
679 | | /* <ANY_ALL> */ |
680 | 0 | TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr)); |
681 | 0 | if (ctx->ptr >= end) |
682 | 0 | RETURN_FAILURE; |
683 | 0 | ctx->ptr++; |
684 | 0 | break; |
685 | | |
686 | 1 | case SRE_OP_IN: |
687 | | /* match set member (or non_member) */ |
688 | | /* <IN> <skip> <set> */ |
689 | 1 | TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr)); |
690 | 1 | if (ctx->ptr >= end || |
691 | 1 | !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr)) |
692 | 0 | RETURN_FAILURE; |
693 | 1 | ctx->pattern += ctx->pattern[0]; |
694 | 1 | ctx->ptr++; |
695 | 1 | break; |
696 | | |
697 | 0 | case SRE_OP_LITERAL_IGNORE: |
698 | 0 | TRACE(("|%p|%p|LITERAL_IGNORE %d\n", |
699 | 0 | ctx->pattern, ctx->ptr, ctx->pattern[0])); |
700 | 0 | if (ctx->ptr >= end || |
701 | 0 | sre_lower_ascii(*ctx->ptr) != *ctx->pattern) |
702 | 0 | RETURN_FAILURE; |
703 | 0 | ctx->pattern++; |
704 | 0 | ctx->ptr++; |
705 | 0 | break; |
706 | | |
707 | 0 | case SRE_OP_LITERAL_UNI_IGNORE: |
708 | 0 | TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", |
709 | 0 | ctx->pattern, ctx->ptr, ctx->pattern[0])); |
710 | 0 | if (ctx->ptr >= end || |
711 | 0 | sre_lower_unicode(*ctx->ptr) != *ctx->pattern) |
712 | 0 | RETURN_FAILURE; |
713 | 0 | ctx->pattern++; |
714 | 0 | ctx->ptr++; |
715 | 0 | break; |
716 | | |
717 | 0 | case SRE_OP_LITERAL_LOC_IGNORE: |
718 | 0 | TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", |
719 | 0 | ctx->pattern, ctx->ptr, ctx->pattern[0])); |
720 | 0 | if (ctx->ptr >= end |
721 | 0 | || !char_loc_ignore(*ctx->pattern, *ctx->ptr)) |
722 | 0 | RETURN_FAILURE; |
723 | 0 | ctx->pattern++; |
724 | 0 | ctx->ptr++; |
725 | 0 | break; |
726 | | |
727 | 0 | case SRE_OP_NOT_LITERAL_IGNORE: |
728 | 0 | TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", |
729 | 0 | ctx->pattern, ctx->ptr, *ctx->pattern)); |
730 | 0 | if (ctx->ptr >= end || |
731 | 0 | sre_lower_ascii(*ctx->ptr) == *ctx->pattern) |
732 | 0 | RETURN_FAILURE; |
733 | 0 | ctx->pattern++; |
734 | 0 | ctx->ptr++; |
735 | 0 | break; |
736 | | |
737 | 0 | case SRE_OP_NOT_LITERAL_UNI_IGNORE: |
738 | 0 | TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", |
739 | 0 | ctx->pattern, ctx->ptr, *ctx->pattern)); |
740 | 0 | if (ctx->ptr >= end || |
741 | 0 | sre_lower_unicode(*ctx->ptr) == *ctx->pattern) |
742 | 0 | RETURN_FAILURE; |
743 | 0 | ctx->pattern++; |
744 | 0 | ctx->ptr++; |
745 | 0 | break; |
746 | | |
747 | 0 | case SRE_OP_NOT_LITERAL_LOC_IGNORE: |
748 | 0 | TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", |
749 | 0 | ctx->pattern, ctx->ptr, *ctx->pattern)); |
750 | 0 | if (ctx->ptr >= end |
751 | 0 | || char_loc_ignore(*ctx->pattern, *ctx->ptr)) |
752 | 0 | RETURN_FAILURE; |
753 | 0 | ctx->pattern++; |
754 | 0 | ctx->ptr++; |
755 | 0 | break; |
756 | | |
757 | 0 | case SRE_OP_IN_IGNORE: |
758 | 0 | TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr)); |
759 | 0 | if (ctx->ptr >= end |
760 | 0 | || !SRE(charset)(state, ctx->pattern+1, |
761 | 0 | (SRE_CODE)sre_lower_ascii(*ctx->ptr))) |
762 | 0 | RETURN_FAILURE; |
763 | 0 | ctx->pattern += ctx->pattern[0]; |
764 | 0 | ctx->ptr++; |
765 | 0 | break; |
766 | | |
767 | 2 | case SRE_OP_IN_UNI_IGNORE: |
768 | 2 | TRACE(("|%p|%p|IN_UNI_IGNORE\n", ctx->pattern, ctx->ptr)); |
769 | 2 | if (ctx->ptr >= end |
770 | 2 | || !SRE(charset)(state, ctx->pattern+1, |
771 | 2 | (SRE_CODE)sre_lower_unicode(*ctx->ptr))) |
772 | 0 | RETURN_FAILURE; |
773 | 2 | ctx->pattern += ctx->pattern[0]; |
774 | 2 | ctx->ptr++; |
775 | 2 | break; |
776 | | |
777 | 0 | case SRE_OP_IN_LOC_IGNORE: |
778 | 0 | TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr)); |
779 | 0 | if (ctx->ptr >= end |
780 | 0 | || !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr)) |
781 | 0 | RETURN_FAILURE; |
782 | 0 | ctx->pattern += ctx->pattern[0]; |
783 | 0 | ctx->ptr++; |
784 | 0 | break; |
785 | | |
786 | 1 | case SRE_OP_JUMP: |
787 | 1 | case SRE_OP_INFO: |
788 | | /* jump forward */ |
789 | | /* <JUMP> <offset> */ |
790 | 1 | TRACE(("|%p|%p|JUMP %d\n", ctx->pattern, |
791 | 1 | ctx->ptr, ctx->pattern[0])); |
792 | 1 | ctx->pattern += ctx->pattern[0]; |
793 | 1 | break; |
794 | | |
795 | 3 | case SRE_OP_BRANCH: |
796 | | /* alternation */ |
797 | | /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ |
798 | 3 | TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr)); |
799 | 3 | LASTMARK_SAVE(); |
800 | 3 | ctx->u.rep = state->repeat; |
801 | 3 | if (ctx->u.rep) |
802 | 2 | MARK_PUSH(ctx->lastmark); |
803 | 7 | for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) { |
804 | 5 | if (ctx->pattern[1] == SRE_OP_LITERAL && |
805 | 5 | (ctx->ptr >= end || |
806 | 2 | (SRE_CODE) *ctx->ptr != ctx->pattern[2])) |
807 | 2 | continue; |
808 | 3 | if (ctx->pattern[1] == SRE_OP_IN && |
809 | 3 | (ctx->ptr >= end || |
810 | 1 | !SRE(charset)(state, ctx->pattern + 3, |
811 | 1 | (SRE_CODE) *ctx->ptr))) |
812 | 0 | continue; |
813 | 3 | state->ptr = ctx->ptr; |
814 | 3 | DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1); |
815 | 3 | if (ret) { |
816 | 1 | if (ctx->u.rep) |
817 | 0 | MARK_POP_DISCARD(ctx->lastmark); |
818 | 1 | RETURN_ON_ERROR(ret); |
819 | 1 | RETURN_SUCCESS; |
820 | 1 | } |
821 | 2 | if (ctx->u.rep) |
822 | 2 | MARK_POP_KEEP(ctx->lastmark); |
823 | 2 | LASTMARK_RESTORE(); |
824 | 2 | } |
825 | 2 | if (ctx->u.rep) |
826 | 2 | MARK_POP_DISCARD(ctx->lastmark); |
827 | 2 | RETURN_FAILURE; |
828 | | |
829 | 9 | case SRE_OP_REPEAT_ONE: |
830 | | /* match repeated sequence (maximizing regexp) */ |
831 | | |
832 | | /* this operator only works if the repeated item is |
833 | | exactly one character wide, and we're not already |
834 | | collecting backtracking points. for other cases, |
835 | | use the MAX_REPEAT operator */ |
836 | | |
837 | | /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ |
838 | | |
839 | 9 | TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, |
840 | 9 | ctx->pattern[1], ctx->pattern[2])); |
841 | | |
842 | 9 | if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) |
843 | 0 | RETURN_FAILURE; /* cannot match */ |
844 | | |
845 | 9 | state->ptr = ctx->ptr; |
846 | | |
847 | 9 | ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]); |
848 | 9 | RETURN_ON_ERROR(ret); |
849 | 9 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
850 | 9 | ctx->count = ret; |
851 | 9 | ctx->ptr += ctx->count; |
852 | | |
853 | | /* when we arrive here, count contains the number of |
854 | | matches, and ctx->ptr points to the tail of the target |
855 | | string. check if the rest of the pattern matches, |
856 | | and backtrack if not. */ |
857 | | |
858 | 9 | if (ctx->count < (Py_ssize_t) ctx->pattern[1]) |
859 | 2 | RETURN_FAILURE; |
860 | | |
861 | 7 | if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && |
862 | 7 | ctx->ptr == state->end && |
863 | 7 | !(ctx->toplevel && state->must_advance && ctx->ptr == state->start)) |
864 | 0 | { |
865 | | /* tail is empty. we're finished */ |
866 | 0 | state->ptr = ctx->ptr; |
867 | 0 | RETURN_SUCCESS; |
868 | 0 | } |
869 | | |
870 | 7 | LASTMARK_SAVE(); |
871 | | |
872 | 7 | if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) { |
873 | | /* tail starts with a literal. skip positions where |
874 | | the rest of the pattern cannot possibly match */ |
875 | 4 | ctx->u.chr = ctx->pattern[ctx->pattern[0]+1]; |
876 | 6 | for (;;) { |
877 | 6 | while (ctx->count >= (Py_ssize_t) ctx->pattern[1] && |
878 | 6 | (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) { |
879 | 0 | ctx->ptr--; |
880 | 0 | ctx->count--; |
881 | 0 | } |
882 | 6 | if (ctx->count < (Py_ssize_t) ctx->pattern[1]) |
883 | 2 | break; |
884 | 4 | state->ptr = ctx->ptr; |
885 | 4 | DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, |
886 | 0 | ctx->pattern+ctx->pattern[0]); |
887 | 4 | if (ret) { |
888 | 2 | RETURN_ON_ERROR(ret); |
889 | 2 | RETURN_SUCCESS; |
890 | 2 | } |
891 | | |
892 | 2 | LASTMARK_RESTORE(); |
893 | | |
894 | 2 | ctx->ptr--; |
895 | 2 | ctx->count--; |
896 | 2 | } |
897 | | |
898 | 4 | } else { |
899 | | /* general case */ |
900 | 3 | while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) { |
901 | 3 | state->ptr = ctx->ptr; |
902 | 3 | DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, |
903 | 0 | ctx->pattern+ctx->pattern[0]); |
904 | 3 | if (ret) { |
905 | 3 | RETURN_ON_ERROR(ret); |
906 | 3 | RETURN_SUCCESS; |
907 | 3 | } |
908 | 0 | ctx->ptr--; |
909 | 0 | ctx->count--; |
910 | 0 | LASTMARK_RESTORE(); |
911 | 0 | } |
912 | 3 | } |
913 | 2 | RETURN_FAILURE; |
914 | | |
915 | 2 | case SRE_OP_MIN_REPEAT_ONE: |
916 | | /* match repeated sequence (minimizing regexp) */ |
917 | | |
918 | | /* this operator only works if the repeated item is |
919 | | exactly one character wide, and we're not already |
920 | | collecting backtracking points. for other cases, |
921 | | use the MIN_REPEAT operator */ |
922 | | |
923 | | /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ |
924 | | |
925 | 2 | TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, |
926 | 2 | ctx->pattern[1], ctx->pattern[2])); |
927 | | |
928 | 2 | if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) |
929 | 0 | RETURN_FAILURE; /* cannot match */ |
930 | | |
931 | 2 | state->ptr = ctx->ptr; |
932 | | |
933 | 2 | if (ctx->pattern[1] == 0) |
934 | 2 | ctx->count = 0; |
935 | 0 | else { |
936 | | /* count using pattern min as the maximum */ |
937 | 0 | ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]); |
938 | 0 | RETURN_ON_ERROR(ret); |
939 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
940 | 0 | if (ret < (Py_ssize_t) ctx->pattern[1]) |
941 | | /* didn't match minimum number of times */ |
942 | 0 | RETURN_FAILURE; |
943 | | /* advance past minimum matches of repeat */ |
944 | 0 | ctx->count = ret; |
945 | 0 | ctx->ptr += ctx->count; |
946 | 0 | } |
947 | | |
948 | 2 | if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && |
949 | 2 | !(ctx->toplevel && |
950 | 0 | ((state->match_all && ctx->ptr != state->end) || |
951 | 0 | (state->must_advance && ctx->ptr == state->start)))) |
952 | 0 | { |
953 | | /* tail is empty. we're finished */ |
954 | 0 | state->ptr = ctx->ptr; |
955 | 0 | RETURN_SUCCESS; |
956 | |
|
957 | 2 | } else { |
958 | | /* general case */ |
959 | 2 | LASTMARK_SAVE(); |
960 | 113 | while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT |
961 | 113 | || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { |
962 | 113 | state->ptr = ctx->ptr; |
963 | 113 | DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, |
964 | 0 | ctx->pattern+ctx->pattern[0]); |
965 | 113 | if (ret) { |
966 | 0 | RETURN_ON_ERROR(ret); |
967 | 0 | RETURN_SUCCESS; |
968 | 0 | } |
969 | 113 | state->ptr = ctx->ptr; |
970 | 113 | ret = SRE(count)(state, ctx->pattern+3, 1); |
971 | 113 | RETURN_ON_ERROR(ret); |
972 | 113 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
973 | 113 | if (ret == 0) |
974 | 2 | break; |
975 | 111 | assert(ret == 1); |
976 | 111 | ctx->ptr++; |
977 | 111 | ctx->count++; |
978 | 111 | LASTMARK_RESTORE(); |
979 | 111 | } |
980 | 2 | } |
981 | 2 | RETURN_FAILURE; |
982 | | |
983 | 4 | case SRE_OP_REPEAT: |
984 | | /* create repeat context. all the hard work is done |
985 | | by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ |
986 | | /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */ |
987 | 4 | TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr, |
988 | 4 | ctx->pattern[1], ctx->pattern[2])); |
989 | | |
990 | | /* install new repeat context */ |
991 | 4 | ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep)); |
992 | 4 | if (!ctx->u.rep) { |
993 | 0 | PyErr_NoMemory(); |
994 | 0 | RETURN_FAILURE; |
995 | 0 | } |
996 | 4 | ctx->u.rep->count = -1; |
997 | 4 | ctx->u.rep->pattern = ctx->pattern; |
998 | 4 | ctx->u.rep->prev = state->repeat; |
999 | 4 | ctx->u.rep->last_ptr = NULL; |
1000 | 4 | state->repeat = ctx->u.rep; |
1001 | | |
1002 | 4 | state->ptr = ctx->ptr; |
1003 | 4 | DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); |
1004 | 4 | state->repeat = ctx->u.rep->prev; |
1005 | 4 | PyObject_FREE(ctx->u.rep); |
1006 | | |
1007 | 4 | if (ret) { |
1008 | 4 | RETURN_ON_ERROR(ret); |
1009 | 4 | RETURN_SUCCESS; |
1010 | 4 | } |
1011 | 0 | RETURN_FAILURE; |
1012 | | |
1013 | 4 | case SRE_OP_MAX_UNTIL: |
1014 | | /* maximizing repeat */ |
1015 | | /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ |
1016 | | |
1017 | | /* FIXME: we probably need to deal with zero-width |
1018 | | matches in here... */ |
1019 | | |
1020 | 4 | ctx->u.rep = state->repeat; |
1021 | 4 | if (!ctx->u.rep) |
1022 | 0 | RETURN_ERROR(SRE_ERROR_STATE); |
1023 | | |
1024 | 4 | state->ptr = ctx->ptr; |
1025 | | |
1026 | 4 | ctx->count = ctx->u.rep->count+1; |
1027 | | |
1028 | 4 | TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, |
1029 | 4 | ctx->ptr, ctx->count)); |
1030 | | |
1031 | 4 | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { |
1032 | | /* not enough matches */ |
1033 | 0 | ctx->u.rep->count = ctx->count; |
1034 | 0 | DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, |
1035 | 0 | ctx->u.rep->pattern+3); |
1036 | 0 | if (ret) { |
1037 | 0 | RETURN_ON_ERROR(ret); |
1038 | 0 | RETURN_SUCCESS; |
1039 | 0 | } |
1040 | 0 | ctx->u.rep->count = ctx->count-1; |
1041 | 0 | state->ptr = ctx->ptr; |
1042 | 0 | RETURN_FAILURE; |
1043 | 0 | } |
1044 | | |
1045 | 4 | if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || |
1046 | 4 | ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && |
1047 | 4 | state->ptr != ctx->u.rep->last_ptr) { |
1048 | | /* we may have enough matches, but if we can |
1049 | | match another item, do so */ |
1050 | 4 | ctx->u.rep->count = ctx->count; |
1051 | 4 | LASTMARK_SAVE(); |
1052 | 4 | MARK_PUSH(ctx->lastmark); |
1053 | | /* zero-width match protection */ |
1054 | 4 | DATA_PUSH(&ctx->u.rep->last_ptr); |
1055 | 4 | ctx->u.rep->last_ptr = state->ptr; |
1056 | 4 | DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, |
1057 | 0 | ctx->u.rep->pattern+3); |
1058 | 4 | DATA_POP(&ctx->u.rep->last_ptr); |
1059 | 4 | if (ret) { |
1060 | 0 | MARK_POP_DISCARD(ctx->lastmark); |
1061 | 0 | RETURN_ON_ERROR(ret); |
1062 | 0 | RETURN_SUCCESS; |
1063 | 0 | } |
1064 | 4 | MARK_POP(ctx->lastmark); |
1065 | 4 | LASTMARK_RESTORE(); |
1066 | 4 | ctx->u.rep->count = ctx->count-1; |
1067 | 4 | state->ptr = ctx->ptr; |
1068 | 4 | } |
1069 | | |
1070 | | /* cannot match more repeated items here. make sure the |
1071 | | tail matches */ |
1072 | 4 | state->repeat = ctx->u.rep->prev; |
1073 | 4 | DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern); |
1074 | 4 | RETURN_ON_SUCCESS(ret); |
1075 | 0 | state->repeat = ctx->u.rep; |
1076 | 0 | state->ptr = ctx->ptr; |
1077 | 0 | RETURN_FAILURE; |
1078 | | |
1079 | 0 | case SRE_OP_MIN_UNTIL: |
1080 | | /* minimizing repeat */ |
1081 | | /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ |
1082 | |
|
1083 | 0 | ctx->u.rep = state->repeat; |
1084 | 0 | if (!ctx->u.rep) |
1085 | 0 | RETURN_ERROR(SRE_ERROR_STATE); |
1086 | | |
1087 | 0 | state->ptr = ctx->ptr; |
1088 | |
|
1089 | 0 | ctx->count = ctx->u.rep->count+1; |
1090 | |
|
1091 | 0 | TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern, |
1092 | 0 | ctx->ptr, ctx->count, ctx->u.rep->pattern)); |
1093 | |
|
1094 | 0 | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { |
1095 | | /* not enough matches */ |
1096 | 0 | ctx->u.rep->count = ctx->count; |
1097 | 0 | DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, |
1098 | 0 | ctx->u.rep->pattern+3); |
1099 | 0 | if (ret) { |
1100 | 0 | RETURN_ON_ERROR(ret); |
1101 | 0 | RETURN_SUCCESS; |
1102 | 0 | } |
1103 | 0 | ctx->u.rep->count = ctx->count-1; |
1104 | 0 | state->ptr = ctx->ptr; |
1105 | 0 | RETURN_FAILURE; |
1106 | 0 | } |
1107 | | |
1108 | 0 | LASTMARK_SAVE(); |
1109 | | |
1110 | | /* see if the tail matches */ |
1111 | 0 | state->repeat = ctx->u.rep->prev; |
1112 | 0 | DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern); |
1113 | 0 | if (ret) { |
1114 | 0 | RETURN_ON_ERROR(ret); |
1115 | 0 | RETURN_SUCCESS; |
1116 | 0 | } |
1117 | | |
1118 | 0 | state->repeat = ctx->u.rep; |
1119 | 0 | state->ptr = ctx->ptr; |
1120 | |
|
1121 | 0 | LASTMARK_RESTORE(); |
1122 | |
|
1123 | 0 | if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] |
1124 | 0 | && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || |
1125 | 0 | state->ptr == ctx->u.rep->last_ptr) |
1126 | 0 | RETURN_FAILURE; |
1127 | | |
1128 | 0 | ctx->u.rep->count = ctx->count; |
1129 | | /* zero-width match protection */ |
1130 | 0 | DATA_PUSH(&ctx->u.rep->last_ptr); |
1131 | 0 | ctx->u.rep->last_ptr = state->ptr; |
1132 | 0 | DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, |
1133 | 0 | ctx->u.rep->pattern+3); |
1134 | 0 | DATA_POP(&ctx->u.rep->last_ptr); |
1135 | 0 | if (ret) { |
1136 | 0 | RETURN_ON_ERROR(ret); |
1137 | 0 | RETURN_SUCCESS; |
1138 | 0 | } |
1139 | 0 | ctx->u.rep->count = ctx->count-1; |
1140 | 0 | state->ptr = ctx->ptr; |
1141 | 0 | RETURN_FAILURE; |
1142 | | |
1143 | 0 | case SRE_OP_GROUPREF: |
1144 | | /* match backreference */ |
1145 | 0 | TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, |
1146 | 0 | ctx->ptr, ctx->pattern[0])); |
1147 | 0 | i = ctx->pattern[0]; |
1148 | 0 | { |
1149 | 0 | Py_ssize_t groupref = i+i; |
1150 | 0 | if (groupref >= state->lastmark) { |
1151 | 0 | RETURN_FAILURE; |
1152 | 0 | } else { |
1153 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1154 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1155 | 0 | if (!p || !e || e < p) |
1156 | 0 | RETURN_FAILURE; |
1157 | 0 | while (p < e) { |
1158 | 0 | if (ctx->ptr >= end || *ctx->ptr != *p) |
1159 | 0 | RETURN_FAILURE; |
1160 | 0 | p++; |
1161 | 0 | ctx->ptr++; |
1162 | 0 | } |
1163 | 0 | } |
1164 | 0 | } |
1165 | 0 | ctx->pattern++; |
1166 | 0 | break; |
1167 | | |
1168 | 0 | case SRE_OP_GROUPREF_IGNORE: |
1169 | | /* match backreference */ |
1170 | 0 | TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern, |
1171 | 0 | ctx->ptr, ctx->pattern[0])); |
1172 | 0 | i = ctx->pattern[0]; |
1173 | 0 | { |
1174 | 0 | Py_ssize_t groupref = i+i; |
1175 | 0 | if (groupref >= state->lastmark) { |
1176 | 0 | RETURN_FAILURE; |
1177 | 0 | } else { |
1178 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1179 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1180 | 0 | if (!p || !e || e < p) |
1181 | 0 | RETURN_FAILURE; |
1182 | 0 | while (p < e) { |
1183 | 0 | if (ctx->ptr >= end || |
1184 | 0 | sre_lower_ascii(*ctx->ptr) != sre_lower_ascii(*p)) |
1185 | 0 | RETURN_FAILURE; |
1186 | 0 | p++; |
1187 | 0 | ctx->ptr++; |
1188 | 0 | } |
1189 | 0 | } |
1190 | 0 | } |
1191 | 0 | ctx->pattern++; |
1192 | 0 | break; |
1193 | | |
1194 | 0 | case SRE_OP_GROUPREF_UNI_IGNORE: |
1195 | | /* match backreference */ |
1196 | 0 | TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", ctx->pattern, |
1197 | 0 | ctx->ptr, ctx->pattern[0])); |
1198 | 0 | i = ctx->pattern[0]; |
1199 | 0 | { |
1200 | 0 | Py_ssize_t groupref = i+i; |
1201 | 0 | if (groupref >= state->lastmark) { |
1202 | 0 | RETURN_FAILURE; |
1203 | 0 | } else { |
1204 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1205 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1206 | 0 | if (!p || !e || e < p) |
1207 | 0 | RETURN_FAILURE; |
1208 | 0 | while (p < e) { |
1209 | 0 | if (ctx->ptr >= end || |
1210 | 0 | sre_lower_unicode(*ctx->ptr) != sre_lower_unicode(*p)) |
1211 | 0 | RETURN_FAILURE; |
1212 | 0 | p++; |
1213 | 0 | ctx->ptr++; |
1214 | 0 | } |
1215 | 0 | } |
1216 | 0 | } |
1217 | 0 | ctx->pattern++; |
1218 | 0 | break; |
1219 | | |
1220 | 0 | case SRE_OP_GROUPREF_LOC_IGNORE: |
1221 | | /* match backreference */ |
1222 | 0 | TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", ctx->pattern, |
1223 | 0 | ctx->ptr, ctx->pattern[0])); |
1224 | 0 | i = ctx->pattern[0]; |
1225 | 0 | { |
1226 | 0 | Py_ssize_t groupref = i+i; |
1227 | 0 | if (groupref >= state->lastmark) { |
1228 | 0 | RETURN_FAILURE; |
1229 | 0 | } else { |
1230 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1231 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1232 | 0 | if (!p || !e || e < p) |
1233 | 0 | RETURN_FAILURE; |
1234 | 0 | while (p < e) { |
1235 | 0 | if (ctx->ptr >= end || |
1236 | 0 | sre_lower_locale(*ctx->ptr) != sre_lower_locale(*p)) |
1237 | 0 | RETURN_FAILURE; |
1238 | 0 | p++; |
1239 | 0 | ctx->ptr++; |
1240 | 0 | } |
1241 | 0 | } |
1242 | 0 | } |
1243 | 0 | ctx->pattern++; |
1244 | 0 | break; |
1245 | | |
1246 | 0 | case SRE_OP_GROUPREF_EXISTS: |
1247 | 0 | TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern, |
1248 | 0 | ctx->ptr, ctx->pattern[0])); |
1249 | | /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ |
1250 | 0 | i = ctx->pattern[0]; |
1251 | 0 | { |
1252 | 0 | Py_ssize_t groupref = i+i; |
1253 | 0 | if (groupref >= state->lastmark) { |
1254 | 0 | ctx->pattern += ctx->pattern[1]; |
1255 | 0 | break; |
1256 | 0 | } else { |
1257 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1258 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1259 | 0 | if (!p || !e || e < p) { |
1260 | 0 | ctx->pattern += ctx->pattern[1]; |
1261 | 0 | break; |
1262 | 0 | } |
1263 | 0 | } |
1264 | 0 | } |
1265 | 0 | ctx->pattern += 2; |
1266 | 0 | break; |
1267 | | |
1268 | 0 | case SRE_OP_ASSERT: |
1269 | | /* assert subpattern */ |
1270 | | /* <ASSERT> <skip> <back> <pattern> */ |
1271 | 0 | TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, |
1272 | 0 | ctx->ptr, ctx->pattern[1])); |
1273 | 0 | if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1]) |
1274 | 0 | RETURN_FAILURE; |
1275 | 0 | state->ptr = ctx->ptr - ctx->pattern[1]; |
1276 | 0 | DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2); |
1277 | 0 | RETURN_ON_FAILURE(ret); |
1278 | 0 | ctx->pattern += ctx->pattern[0]; |
1279 | 0 | break; |
1280 | | |
1281 | 0 | case SRE_OP_ASSERT_NOT: |
1282 | | /* assert not subpattern */ |
1283 | | /* <ASSERT_NOT> <skip> <back> <pattern> */ |
1284 | 0 | TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, |
1285 | 0 | ctx->ptr, ctx->pattern[1])); |
1286 | 0 | if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) { |
1287 | 0 | state->ptr = ctx->ptr - ctx->pattern[1]; |
1288 | 0 | DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); |
1289 | 0 | if (ret) { |
1290 | 0 | RETURN_ON_ERROR(ret); |
1291 | 0 | RETURN_FAILURE; |
1292 | 0 | } |
1293 | 0 | } |
1294 | 0 | ctx->pattern += ctx->pattern[0]; |
1295 | 0 | break; |
1296 | | |
1297 | 0 | case SRE_OP_FAILURE: |
1298 | | /* immediate failure */ |
1299 | 0 | TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr)); |
1300 | 0 | RETURN_FAILURE; |
1301 | | |
1302 | 0 | default: |
1303 | 0 | TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr, |
1304 | 0 | ctx->pattern[-1])); |
1305 | 0 | RETURN_ERROR(SRE_ERROR_ILLEGAL); |
1306 | 157 | } |
1307 | 157 | } |
1308 | | |
1309 | 141 | exit: |
1310 | 141 | ctx_pos = ctx->last_ctx_pos; |
1311 | 141 | jump = ctx->jump; |
1312 | 141 | DATA_POP_DISCARD(ctx); |
1313 | 141 | if (ctx_pos == -1) |
1314 | 6 | return ret; |
1315 | 135 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
1316 | | |
1317 | 135 | switch (jump) { |
1318 | 4 | case JUMP_MAX_UNTIL_2: |
1319 | 4 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr)); |
1320 | 4 | goto jump_max_until_2; |
1321 | 4 | case JUMP_MAX_UNTIL_3: |
1322 | 4 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr)); |
1323 | 4 | goto jump_max_until_3; |
1324 | 0 | case JUMP_MIN_UNTIL_2: |
1325 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr)); |
1326 | 0 | goto jump_min_until_2; |
1327 | 0 | case JUMP_MIN_UNTIL_3: |
1328 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr)); |
1329 | 0 | goto jump_min_until_3; |
1330 | 3 | case JUMP_BRANCH: |
1331 | 3 | TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr)); |
1332 | 3 | goto jump_branch; |
1333 | 0 | case JUMP_MAX_UNTIL_1: |
1334 | 0 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr)); |
1335 | 0 | goto jump_max_until_1; |
1336 | 0 | case JUMP_MIN_UNTIL_1: |
1337 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr)); |
1338 | 0 | goto jump_min_until_1; |
1339 | 4 | case JUMP_REPEAT: |
1340 | 4 | TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr)); |
1341 | 4 | goto jump_repeat; |
1342 | 4 | case JUMP_REPEAT_ONE_1: |
1343 | 4 | TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr)); |
1344 | 4 | goto jump_repeat_one_1; |
1345 | 3 | case JUMP_REPEAT_ONE_2: |
1346 | 3 | TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr)); |
1347 | 3 | goto jump_repeat_one_2; |
1348 | 113 | case JUMP_MIN_REPEAT_ONE: |
1349 | 113 | TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr)); |
1350 | 113 | goto jump_min_repeat_one; |
1351 | 0 | case JUMP_ASSERT: |
1352 | 0 | TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr)); |
1353 | 0 | goto jump_assert; |
1354 | 0 | case JUMP_ASSERT_NOT: |
1355 | 0 | TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr)); |
1356 | 0 | goto jump_assert_not; |
1357 | 0 | case JUMP_NONE: |
1358 | 0 | TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, |
1359 | 0 | ctx->ptr, ret)); |
1360 | 0 | break; |
1361 | 135 | } |
1362 | | |
1363 | 0 | return ret; /* should never get here */ |
1364 | 135 | } Line | Count | Source | 550 | 6 | { | 551 | 6 | SRE_CHAR* end = (SRE_CHAR *)state->end; | 552 | 6 | Py_ssize_t alloc_pos, ctx_pos = -1; | 553 | 6 | Py_ssize_t i, ret = 0; | 554 | 6 | Py_ssize_t jump; | 555 | 6 | unsigned int sigcount=0; | 556 | | | 557 | 6 | SRE(match_context)* ctx; | 558 | 6 | SRE(match_context)* nextctx; | 559 | | | 560 | 6 | TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); | 561 | | | 562 | 6 | DATA_ALLOC(SRE(match_context), ctx); | 563 | 6 | ctx->last_ctx_pos = -1; | 564 | 6 | ctx->jump = JUMP_NONE; | 565 | 6 | ctx->pattern = pattern; | 566 | 6 | ctx->toplevel = toplevel; | 567 | 6 | ctx_pos = alloc_pos; | 568 | | | 569 | 141 | entrance: | 570 | | | 571 | 141 | ctx->ptr = (SRE_CHAR *)state->ptr; | 572 | | | 573 | 141 | if (ctx->pattern[0] == SRE_OP_INFO) { | 574 | | /* optimization info block */ | 575 | | /* <INFO> <1=skip> <2=flags> <3=min> ... */ | 576 | 4 | if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) { | 577 | 0 | TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, " | 578 | 0 | "need %" PY_FORMAT_SIZE_T "d)\n", | 579 | 0 | end - ctx->ptr, (Py_ssize_t) ctx->pattern[3])); | 580 | 0 | RETURN_FAILURE; | 581 | 0 | } | 582 | 4 | ctx->pattern += ctx->pattern[1] + 1; | 583 | 4 | } | 584 | | | 585 | 157 | for (;;) { | 586 | 157 | ++sigcount; | 587 | 157 | if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals()) | 588 | 0 | RETURN_ERROR(SRE_ERROR_INTERRUPTED); | 589 | | | 590 | 157 | switch (*ctx->pattern++) { | 591 | | | 592 | 4 | case SRE_OP_MARK: | 593 | | /* set mark */ | 594 | | /* <MARK> <gid> */ | 595 | 4 | TRACE(("|%p|%p|MARK %d\n", ctx->pattern, | 596 | 4 | ctx->ptr, ctx->pattern[0])); | 597 | 4 | i = ctx->pattern[0]; | 598 | 4 | if (i & 1) | 599 | 0 | state->lastindex = i/2 + 1; | 600 | 4 | if (i > state->lastmark) { | 601 | | /* state->lastmark is the highest valid index in the | 602 | | state->mark array. If it is increased by more than 1, | 603 | | the intervening marks must be set to NULL to signal | 604 | | that these marks have not been encountered. */ | 605 | 4 | Py_ssize_t j = state->lastmark + 1; | 606 | 8 | while (j < i) | 607 | 4 | state->mark[j++] = NULL; | 608 | 4 | state->lastmark = i; | 609 | 4 | } | 610 | 4 | state->mark[i] = ctx->ptr; | 611 | 4 | ctx->pattern++; | 612 | 4 | break; | 613 | | | 614 | 120 | case SRE_OP_LITERAL: | 615 | | /* match literal string */ | 616 | | /* <LITERAL> <code> */ | 617 | 120 | TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern, | 618 | 120 | ctx->ptr, *ctx->pattern)); | 619 | 120 | if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0]) | 620 | 115 | RETURN_FAILURE; | 621 | 5 | ctx->pattern++; | 622 | 5 | ctx->ptr++; | 623 | 5 | break; | 624 | | | 625 | 0 | case SRE_OP_NOT_LITERAL: | 626 | | /* match anything that is not literal character */ | 627 | | /* <NOT_LITERAL> <code> */ | 628 | 0 | TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern, | 629 | 0 | ctx->ptr, *ctx->pattern)); | 630 | 0 | if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0]) | 631 | 0 | RETURN_FAILURE; | 632 | 0 | ctx->pattern++; | 633 | 0 | ctx->ptr++; | 634 | 0 | break; | 635 | | | 636 | 4 | case SRE_OP_SUCCESS: | 637 | | /* end of pattern */ | 638 | 4 | TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); | 639 | 4 | if (ctx->toplevel && | 640 | 4 | ((state->match_all && ctx->ptr != state->end) || | 641 | 2 | (state->must_advance && ctx->ptr == state->start))) | 642 | 0 | { | 643 | 0 | RETURN_FAILURE; | 644 | 0 | } | 645 | 4 | state->ptr = ctx->ptr; | 646 | 4 | RETURN_SUCCESS; | 647 | | | 648 | 3 | case SRE_OP_AT: | 649 | | /* match at given position */ | 650 | | /* <AT> <code> */ | 651 | 3 | TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); | 652 | 3 | if (!SRE(at)(state, ctx->ptr, *ctx->pattern)) | 653 | 0 | RETURN_FAILURE; | 654 | 3 | ctx->pattern++; | 655 | 3 | break; | 656 | | | 657 | 0 | case SRE_OP_CATEGORY: | 658 | | /* match at given category */ | 659 | | /* <CATEGORY> <code> */ | 660 | 0 | TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern, | 661 | 0 | ctx->ptr, *ctx->pattern)); | 662 | 0 | if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0])) | 663 | 0 | RETURN_FAILURE; | 664 | 0 | ctx->pattern++; | 665 | 0 | ctx->ptr++; | 666 | 0 | break; | 667 | | | 668 | 0 | case SRE_OP_ANY: | 669 | | /* match anything (except a newline) */ | 670 | | /* <ANY> */ | 671 | 0 | TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); | 672 | 0 | if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) | 673 | 0 | RETURN_FAILURE; | 674 | 0 | ctx->ptr++; | 675 | 0 | break; | 676 | | | 677 | 0 | case SRE_OP_ANY_ALL: | 678 | | /* match anything */ | 679 | | /* <ANY_ALL> */ | 680 | 0 | TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr)); | 681 | 0 | if (ctx->ptr >= end) | 682 | 0 | RETURN_FAILURE; | 683 | 0 | ctx->ptr++; | 684 | 0 | break; | 685 | | | 686 | 1 | case SRE_OP_IN: | 687 | | /* match set member (or non_member) */ | 688 | | /* <IN> <skip> <set> */ | 689 | 1 | TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr)); | 690 | 1 | if (ctx->ptr >= end || | 691 | 1 | !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr)) | 692 | 0 | RETURN_FAILURE; | 693 | 1 | ctx->pattern += ctx->pattern[0]; | 694 | 1 | ctx->ptr++; | 695 | 1 | break; | 696 | | | 697 | 0 | case SRE_OP_LITERAL_IGNORE: | 698 | 0 | TRACE(("|%p|%p|LITERAL_IGNORE %d\n", | 699 | 0 | ctx->pattern, ctx->ptr, ctx->pattern[0])); | 700 | 0 | if (ctx->ptr >= end || | 701 | 0 | sre_lower_ascii(*ctx->ptr) != *ctx->pattern) | 702 | 0 | RETURN_FAILURE; | 703 | 0 | ctx->pattern++; | 704 | 0 | ctx->ptr++; | 705 | 0 | break; | 706 | | | 707 | 0 | case SRE_OP_LITERAL_UNI_IGNORE: | 708 | 0 | TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", | 709 | 0 | ctx->pattern, ctx->ptr, ctx->pattern[0])); | 710 | 0 | if (ctx->ptr >= end || | 711 | 0 | sre_lower_unicode(*ctx->ptr) != *ctx->pattern) | 712 | 0 | RETURN_FAILURE; | 713 | 0 | ctx->pattern++; | 714 | 0 | ctx->ptr++; | 715 | 0 | break; | 716 | | | 717 | 0 | case SRE_OP_LITERAL_LOC_IGNORE: | 718 | 0 | TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", | 719 | 0 | ctx->pattern, ctx->ptr, ctx->pattern[0])); | 720 | 0 | if (ctx->ptr >= end | 721 | 0 | || !char_loc_ignore(*ctx->pattern, *ctx->ptr)) | 722 | 0 | RETURN_FAILURE; | 723 | 0 | ctx->pattern++; | 724 | 0 | ctx->ptr++; | 725 | 0 | break; | 726 | | | 727 | 0 | case SRE_OP_NOT_LITERAL_IGNORE: | 728 | 0 | TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", | 729 | 0 | ctx->pattern, ctx->ptr, *ctx->pattern)); | 730 | 0 | if (ctx->ptr >= end || | 731 | 0 | sre_lower_ascii(*ctx->ptr) == *ctx->pattern) | 732 | 0 | RETURN_FAILURE; | 733 | 0 | ctx->pattern++; | 734 | 0 | ctx->ptr++; | 735 | 0 | break; | 736 | | | 737 | 0 | case SRE_OP_NOT_LITERAL_UNI_IGNORE: | 738 | 0 | TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", | 739 | 0 | ctx->pattern, ctx->ptr, *ctx->pattern)); | 740 | 0 | if (ctx->ptr >= end || | 741 | 0 | sre_lower_unicode(*ctx->ptr) == *ctx->pattern) | 742 | 0 | RETURN_FAILURE; | 743 | 0 | ctx->pattern++; | 744 | 0 | ctx->ptr++; | 745 | 0 | break; | 746 | | | 747 | 0 | case SRE_OP_NOT_LITERAL_LOC_IGNORE: | 748 | 0 | TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", | 749 | 0 | ctx->pattern, ctx->ptr, *ctx->pattern)); | 750 | 0 | if (ctx->ptr >= end | 751 | 0 | || char_loc_ignore(*ctx->pattern, *ctx->ptr)) | 752 | 0 | RETURN_FAILURE; | 753 | 0 | ctx->pattern++; | 754 | 0 | ctx->ptr++; | 755 | 0 | break; | 756 | | | 757 | 0 | case SRE_OP_IN_IGNORE: | 758 | 0 | TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr)); | 759 | 0 | if (ctx->ptr >= end | 760 | 0 | || !SRE(charset)(state, ctx->pattern+1, | 761 | 0 | (SRE_CODE)sre_lower_ascii(*ctx->ptr))) | 762 | 0 | RETURN_FAILURE; | 763 | 0 | ctx->pattern += ctx->pattern[0]; | 764 | 0 | ctx->ptr++; | 765 | 0 | break; | 766 | | | 767 | 2 | case SRE_OP_IN_UNI_IGNORE: | 768 | 2 | TRACE(("|%p|%p|IN_UNI_IGNORE\n", ctx->pattern, ctx->ptr)); | 769 | 2 | if (ctx->ptr >= end | 770 | 2 | || !SRE(charset)(state, ctx->pattern+1, | 771 | 2 | (SRE_CODE)sre_lower_unicode(*ctx->ptr))) | 772 | 0 | RETURN_FAILURE; | 773 | 2 | ctx->pattern += ctx->pattern[0]; | 774 | 2 | ctx->ptr++; | 775 | 2 | break; | 776 | | | 777 | 0 | case SRE_OP_IN_LOC_IGNORE: | 778 | 0 | TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr)); | 779 | 0 | if (ctx->ptr >= end | 780 | 0 | || !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr)) | 781 | 0 | RETURN_FAILURE; | 782 | 0 | ctx->pattern += ctx->pattern[0]; | 783 | 0 | ctx->ptr++; | 784 | 0 | break; | 785 | | | 786 | 1 | case SRE_OP_JUMP: | 787 | 1 | case SRE_OP_INFO: | 788 | | /* jump forward */ | 789 | | /* <JUMP> <offset> */ | 790 | 1 | TRACE(("|%p|%p|JUMP %d\n", ctx->pattern, | 791 | 1 | ctx->ptr, ctx->pattern[0])); | 792 | 1 | ctx->pattern += ctx->pattern[0]; | 793 | 1 | break; | 794 | | | 795 | 3 | case SRE_OP_BRANCH: | 796 | | /* alternation */ | 797 | | /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ | 798 | 3 | TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr)); | 799 | 3 | LASTMARK_SAVE(); | 800 | 3 | ctx->u.rep = state->repeat; | 801 | 3 | if (ctx->u.rep) | 802 | 2 | MARK_PUSH(ctx->lastmark); | 803 | 7 | for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) { | 804 | 5 | if (ctx->pattern[1] == SRE_OP_LITERAL && | 805 | 5 | (ctx->ptr >= end || | 806 | 2 | (SRE_CODE) *ctx->ptr != ctx->pattern[2])) | 807 | 2 | continue; | 808 | 3 | if (ctx->pattern[1] == SRE_OP_IN && | 809 | 3 | (ctx->ptr >= end || | 810 | 1 | !SRE(charset)(state, ctx->pattern + 3, | 811 | 1 | (SRE_CODE) *ctx->ptr))) | 812 | 0 | continue; | 813 | 3 | state->ptr = ctx->ptr; | 814 | 3 | DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1); | 815 | 3 | if (ret) { | 816 | 1 | if (ctx->u.rep) | 817 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 818 | 1 | RETURN_ON_ERROR(ret); | 819 | 1 | RETURN_SUCCESS; | 820 | 1 | } | 821 | 2 | if (ctx->u.rep) | 822 | 2 | MARK_POP_KEEP(ctx->lastmark); | 823 | 2 | LASTMARK_RESTORE(); | 824 | 2 | } | 825 | 2 | if (ctx->u.rep) | 826 | 2 | MARK_POP_DISCARD(ctx->lastmark); | 827 | 2 | RETURN_FAILURE; | 828 | | | 829 | 9 | case SRE_OP_REPEAT_ONE: | 830 | | /* match repeated sequence (maximizing regexp) */ | 831 | | | 832 | | /* this operator only works if the repeated item is | 833 | | exactly one character wide, and we're not already | 834 | | collecting backtracking points. for other cases, | 835 | | use the MAX_REPEAT operator */ | 836 | | | 837 | | /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 838 | | | 839 | 9 | TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, | 840 | 9 | ctx->pattern[1], ctx->pattern[2])); | 841 | | | 842 | 9 | if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) | 843 | 0 | RETURN_FAILURE; /* cannot match */ | 844 | | | 845 | 9 | state->ptr = ctx->ptr; | 846 | | | 847 | 9 | ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]); | 848 | 9 | RETURN_ON_ERROR(ret); | 849 | 9 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 850 | 9 | ctx->count = ret; | 851 | 9 | ctx->ptr += ctx->count; | 852 | | | 853 | | /* when we arrive here, count contains the number of | 854 | | matches, and ctx->ptr points to the tail of the target | 855 | | string. check if the rest of the pattern matches, | 856 | | and backtrack if not. */ | 857 | | | 858 | 9 | if (ctx->count < (Py_ssize_t) ctx->pattern[1]) | 859 | 2 | RETURN_FAILURE; | 860 | | | 861 | 7 | if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && | 862 | 7 | ctx->ptr == state->end && | 863 | 7 | !(ctx->toplevel && state->must_advance && ctx->ptr == state->start)) | 864 | 0 | { | 865 | | /* tail is empty. we're finished */ | 866 | 0 | state->ptr = ctx->ptr; | 867 | 0 | RETURN_SUCCESS; | 868 | 0 | } | 869 | | | 870 | 7 | LASTMARK_SAVE(); | 871 | | | 872 | 7 | if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) { | 873 | | /* tail starts with a literal. skip positions where | 874 | | the rest of the pattern cannot possibly match */ | 875 | 4 | ctx->u.chr = ctx->pattern[ctx->pattern[0]+1]; | 876 | 6 | for (;;) { | 877 | 6 | while (ctx->count >= (Py_ssize_t) ctx->pattern[1] && | 878 | 6 | (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) { | 879 | 0 | ctx->ptr--; | 880 | 0 | ctx->count--; | 881 | 0 | } | 882 | 6 | if (ctx->count < (Py_ssize_t) ctx->pattern[1]) | 883 | 2 | break; | 884 | 4 | state->ptr = ctx->ptr; | 885 | 4 | DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, | 886 | 0 | ctx->pattern+ctx->pattern[0]); | 887 | 4 | if (ret) { | 888 | 2 | RETURN_ON_ERROR(ret); | 889 | 2 | RETURN_SUCCESS; | 890 | 2 | } | 891 | | | 892 | 2 | LASTMARK_RESTORE(); | 893 | | | 894 | 2 | ctx->ptr--; | 895 | 2 | ctx->count--; | 896 | 2 | } | 897 | | | 898 | 4 | } else { | 899 | | /* general case */ | 900 | 3 | while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) { | 901 | 3 | state->ptr = ctx->ptr; | 902 | 3 | DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, | 903 | 0 | ctx->pattern+ctx->pattern[0]); | 904 | 3 | if (ret) { | 905 | 3 | RETURN_ON_ERROR(ret); | 906 | 3 | RETURN_SUCCESS; | 907 | 3 | } | 908 | 0 | ctx->ptr--; | 909 | 0 | ctx->count--; | 910 | 0 | LASTMARK_RESTORE(); | 911 | 0 | } | 912 | 3 | } | 913 | 2 | RETURN_FAILURE; | 914 | | | 915 | 2 | case SRE_OP_MIN_REPEAT_ONE: | 916 | | /* match repeated sequence (minimizing regexp) */ | 917 | | | 918 | | /* this operator only works if the repeated item is | 919 | | exactly one character wide, and we're not already | 920 | | collecting backtracking points. for other cases, | 921 | | use the MIN_REPEAT operator */ | 922 | | | 923 | | /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 924 | | | 925 | 2 | TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, | 926 | 2 | ctx->pattern[1], ctx->pattern[2])); | 927 | | | 928 | 2 | if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) | 929 | 0 | RETURN_FAILURE; /* cannot match */ | 930 | | | 931 | 2 | state->ptr = ctx->ptr; | 932 | | | 933 | 2 | if (ctx->pattern[1] == 0) | 934 | 2 | ctx->count = 0; | 935 | 0 | else { | 936 | | /* count using pattern min as the maximum */ | 937 | 0 | ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]); | 938 | 0 | RETURN_ON_ERROR(ret); | 939 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 940 | 0 | if (ret < (Py_ssize_t) ctx->pattern[1]) | 941 | | /* didn't match minimum number of times */ | 942 | 0 | RETURN_FAILURE; | 943 | | /* advance past minimum matches of repeat */ | 944 | 0 | ctx->count = ret; | 945 | 0 | ctx->ptr += ctx->count; | 946 | 0 | } | 947 | | | 948 | 2 | if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && | 949 | 2 | !(ctx->toplevel && | 950 | 0 | ((state->match_all && ctx->ptr != state->end) || | 951 | 0 | (state->must_advance && ctx->ptr == state->start)))) | 952 | 0 | { | 953 | | /* tail is empty. we're finished */ | 954 | 0 | state->ptr = ctx->ptr; | 955 | 0 | RETURN_SUCCESS; | 956 | |
| 957 | 2 | } else { | 958 | | /* general case */ | 959 | 2 | LASTMARK_SAVE(); | 960 | 113 | while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT | 961 | 113 | || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { | 962 | 113 | state->ptr = ctx->ptr; | 963 | 113 | DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, | 964 | 0 | ctx->pattern+ctx->pattern[0]); | 965 | 113 | if (ret) { | 966 | 0 | RETURN_ON_ERROR(ret); | 967 | 0 | RETURN_SUCCESS; | 968 | 0 | } | 969 | 113 | state->ptr = ctx->ptr; | 970 | 113 | ret = SRE(count)(state, ctx->pattern+3, 1); | 971 | 113 | RETURN_ON_ERROR(ret); | 972 | 113 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 973 | 113 | if (ret == 0) | 974 | 2 | break; | 975 | 111 | assert(ret == 1); | 976 | 111 | ctx->ptr++; | 977 | 111 | ctx->count++; | 978 | 111 | LASTMARK_RESTORE(); | 979 | 111 | } | 980 | 2 | } | 981 | 2 | RETURN_FAILURE; | 982 | | | 983 | 4 | case SRE_OP_REPEAT: | 984 | | /* create repeat context. all the hard work is done | 985 | | by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ | 986 | | /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */ | 987 | 4 | TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr, | 988 | 4 | ctx->pattern[1], ctx->pattern[2])); | 989 | | | 990 | | /* install new repeat context */ | 991 | 4 | ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep)); | 992 | 4 | if (!ctx->u.rep) { | 993 | 0 | PyErr_NoMemory(); | 994 | 0 | RETURN_FAILURE; | 995 | 0 | } | 996 | 4 | ctx->u.rep->count = -1; | 997 | 4 | ctx->u.rep->pattern = ctx->pattern; | 998 | 4 | ctx->u.rep->prev = state->repeat; | 999 | 4 | ctx->u.rep->last_ptr = NULL; | 1000 | 4 | state->repeat = ctx->u.rep; | 1001 | | | 1002 | 4 | state->ptr = ctx->ptr; | 1003 | 4 | DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); | 1004 | 4 | state->repeat = ctx->u.rep->prev; | 1005 | 4 | PyObject_FREE(ctx->u.rep); | 1006 | | | 1007 | 4 | if (ret) { | 1008 | 4 | RETURN_ON_ERROR(ret); | 1009 | 4 | RETURN_SUCCESS; | 1010 | 4 | } | 1011 | 0 | RETURN_FAILURE; | 1012 | | | 1013 | 4 | case SRE_OP_MAX_UNTIL: | 1014 | | /* maximizing repeat */ | 1015 | | /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ | 1016 | | | 1017 | | /* FIXME: we probably need to deal with zero-width | 1018 | | matches in here... */ | 1019 | | | 1020 | 4 | ctx->u.rep = state->repeat; | 1021 | 4 | if (!ctx->u.rep) | 1022 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1023 | | | 1024 | 4 | state->ptr = ctx->ptr; | 1025 | | | 1026 | 4 | ctx->count = ctx->u.rep->count+1; | 1027 | | | 1028 | 4 | TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, | 1029 | 4 | ctx->ptr, ctx->count)); | 1030 | | | 1031 | 4 | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1032 | | /* not enough matches */ | 1033 | 0 | ctx->u.rep->count = ctx->count; | 1034 | 0 | DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, | 1035 | 0 | ctx->u.rep->pattern+3); | 1036 | 0 | if (ret) { | 1037 | 0 | RETURN_ON_ERROR(ret); | 1038 | 0 | RETURN_SUCCESS; | 1039 | 0 | } | 1040 | 0 | ctx->u.rep->count = ctx->count-1; | 1041 | 0 | state->ptr = ctx->ptr; | 1042 | 0 | RETURN_FAILURE; | 1043 | 0 | } | 1044 | | | 1045 | 4 | if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || | 1046 | 4 | ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && | 1047 | 4 | state->ptr != ctx->u.rep->last_ptr) { | 1048 | | /* we may have enough matches, but if we can | 1049 | | match another item, do so */ | 1050 | 4 | ctx->u.rep->count = ctx->count; | 1051 | 4 | LASTMARK_SAVE(); | 1052 | 4 | MARK_PUSH(ctx->lastmark); | 1053 | | /* zero-width match protection */ | 1054 | 4 | DATA_PUSH(&ctx->u.rep->last_ptr); | 1055 | 4 | ctx->u.rep->last_ptr = state->ptr; | 1056 | 4 | DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, | 1057 | 0 | ctx->u.rep->pattern+3); | 1058 | 4 | DATA_POP(&ctx->u.rep->last_ptr); | 1059 | 4 | if (ret) { | 1060 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1061 | 0 | RETURN_ON_ERROR(ret); | 1062 | 0 | RETURN_SUCCESS; | 1063 | 0 | } | 1064 | 4 | MARK_POP(ctx->lastmark); | 1065 | 4 | LASTMARK_RESTORE(); | 1066 | 4 | ctx->u.rep->count = ctx->count-1; | 1067 | 4 | state->ptr = ctx->ptr; | 1068 | 4 | } | 1069 | | | 1070 | | /* cannot match more repeated items here. make sure the | 1071 | | tail matches */ | 1072 | 4 | state->repeat = ctx->u.rep->prev; | 1073 | 4 | DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern); | 1074 | 4 | RETURN_ON_SUCCESS(ret); | 1075 | 0 | state->repeat = ctx->u.rep; | 1076 | 0 | state->ptr = ctx->ptr; | 1077 | 0 | RETURN_FAILURE; | 1078 | | | 1079 | 0 | case SRE_OP_MIN_UNTIL: | 1080 | | /* minimizing repeat */ | 1081 | | /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ | 1082 | |
| 1083 | 0 | ctx->u.rep = state->repeat; | 1084 | 0 | if (!ctx->u.rep) | 1085 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1086 | | | 1087 | 0 | state->ptr = ctx->ptr; | 1088 | |
| 1089 | 0 | ctx->count = ctx->u.rep->count+1; | 1090 | |
| 1091 | 0 | TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern, | 1092 | 0 | ctx->ptr, ctx->count, ctx->u.rep->pattern)); | 1093 | |
| 1094 | 0 | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1095 | | /* not enough matches */ | 1096 | 0 | ctx->u.rep->count = ctx->count; | 1097 | 0 | DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, | 1098 | 0 | ctx->u.rep->pattern+3); | 1099 | 0 | if (ret) { | 1100 | 0 | RETURN_ON_ERROR(ret); | 1101 | 0 | RETURN_SUCCESS; | 1102 | 0 | } | 1103 | 0 | ctx->u.rep->count = ctx->count-1; | 1104 | 0 | state->ptr = ctx->ptr; | 1105 | 0 | RETURN_FAILURE; | 1106 | 0 | } | 1107 | | | 1108 | 0 | LASTMARK_SAVE(); | 1109 | | | 1110 | | /* see if the tail matches */ | 1111 | 0 | state->repeat = ctx->u.rep->prev; | 1112 | 0 | DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern); | 1113 | 0 | if (ret) { | 1114 | 0 | RETURN_ON_ERROR(ret); | 1115 | 0 | RETURN_SUCCESS; | 1116 | 0 | } | 1117 | | | 1118 | 0 | state->repeat = ctx->u.rep; | 1119 | 0 | state->ptr = ctx->ptr; | 1120 | |
| 1121 | 0 | LASTMARK_RESTORE(); | 1122 | |
| 1123 | 0 | if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] | 1124 | 0 | && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || | 1125 | 0 | state->ptr == ctx->u.rep->last_ptr) | 1126 | 0 | RETURN_FAILURE; | 1127 | | | 1128 | 0 | ctx->u.rep->count = ctx->count; | 1129 | | /* zero-width match protection */ | 1130 | 0 | DATA_PUSH(&ctx->u.rep->last_ptr); | 1131 | 0 | ctx->u.rep->last_ptr = state->ptr; | 1132 | 0 | DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, | 1133 | 0 | ctx->u.rep->pattern+3); | 1134 | 0 | DATA_POP(&ctx->u.rep->last_ptr); | 1135 | 0 | if (ret) { | 1136 | 0 | RETURN_ON_ERROR(ret); | 1137 | 0 | RETURN_SUCCESS; | 1138 | 0 | } | 1139 | 0 | ctx->u.rep->count = ctx->count-1; | 1140 | 0 | state->ptr = ctx->ptr; | 1141 | 0 | RETURN_FAILURE; | 1142 | | | 1143 | 0 | case SRE_OP_GROUPREF: | 1144 | | /* match backreference */ | 1145 | 0 | TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, | 1146 | 0 | ctx->ptr, ctx->pattern[0])); | 1147 | 0 | i = ctx->pattern[0]; | 1148 | 0 | { | 1149 | 0 | Py_ssize_t groupref = i+i; | 1150 | 0 | if (groupref >= state->lastmark) { | 1151 | 0 | RETURN_FAILURE; | 1152 | 0 | } else { | 1153 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1154 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1155 | 0 | if (!p || !e || e < p) | 1156 | 0 | RETURN_FAILURE; | 1157 | 0 | while (p < e) { | 1158 | 0 | if (ctx->ptr >= end || *ctx->ptr != *p) | 1159 | 0 | RETURN_FAILURE; | 1160 | 0 | p++; | 1161 | 0 | ctx->ptr++; | 1162 | 0 | } | 1163 | 0 | } | 1164 | 0 | } | 1165 | 0 | ctx->pattern++; | 1166 | 0 | break; | 1167 | | | 1168 | 0 | case SRE_OP_GROUPREF_IGNORE: | 1169 | | /* match backreference */ | 1170 | 0 | TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern, | 1171 | 0 | ctx->ptr, ctx->pattern[0])); | 1172 | 0 | i = ctx->pattern[0]; | 1173 | 0 | { | 1174 | 0 | Py_ssize_t groupref = i+i; | 1175 | 0 | if (groupref >= state->lastmark) { | 1176 | 0 | RETURN_FAILURE; | 1177 | 0 | } else { | 1178 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1179 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1180 | 0 | if (!p || !e || e < p) | 1181 | 0 | RETURN_FAILURE; | 1182 | 0 | while (p < e) { | 1183 | 0 | if (ctx->ptr >= end || | 1184 | 0 | sre_lower_ascii(*ctx->ptr) != sre_lower_ascii(*p)) | 1185 | 0 | RETURN_FAILURE; | 1186 | 0 | p++; | 1187 | 0 | ctx->ptr++; | 1188 | 0 | } | 1189 | 0 | } | 1190 | 0 | } | 1191 | 0 | ctx->pattern++; | 1192 | 0 | break; | 1193 | | | 1194 | 0 | case SRE_OP_GROUPREF_UNI_IGNORE: | 1195 | | /* match backreference */ | 1196 | 0 | TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", ctx->pattern, | 1197 | 0 | ctx->ptr, ctx->pattern[0])); | 1198 | 0 | i = ctx->pattern[0]; | 1199 | 0 | { | 1200 | 0 | Py_ssize_t groupref = i+i; | 1201 | 0 | if (groupref >= state->lastmark) { | 1202 | 0 | RETURN_FAILURE; | 1203 | 0 | } else { | 1204 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1205 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1206 | 0 | if (!p || !e || e < p) | 1207 | 0 | RETURN_FAILURE; | 1208 | 0 | while (p < e) { | 1209 | 0 | if (ctx->ptr >= end || | 1210 | 0 | sre_lower_unicode(*ctx->ptr) != sre_lower_unicode(*p)) | 1211 | 0 | RETURN_FAILURE; | 1212 | 0 | p++; | 1213 | 0 | ctx->ptr++; | 1214 | 0 | } | 1215 | 0 | } | 1216 | 0 | } | 1217 | 0 | ctx->pattern++; | 1218 | 0 | break; | 1219 | | | 1220 | 0 | case SRE_OP_GROUPREF_LOC_IGNORE: | 1221 | | /* match backreference */ | 1222 | 0 | TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", ctx->pattern, | 1223 | 0 | ctx->ptr, ctx->pattern[0])); | 1224 | 0 | i = ctx->pattern[0]; | 1225 | 0 | { | 1226 | 0 | Py_ssize_t groupref = i+i; | 1227 | 0 | if (groupref >= state->lastmark) { | 1228 | 0 | RETURN_FAILURE; | 1229 | 0 | } else { | 1230 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1231 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1232 | 0 | if (!p || !e || e < p) | 1233 | 0 | RETURN_FAILURE; | 1234 | 0 | while (p < e) { | 1235 | 0 | if (ctx->ptr >= end || | 1236 | 0 | sre_lower_locale(*ctx->ptr) != sre_lower_locale(*p)) | 1237 | 0 | RETURN_FAILURE; | 1238 | 0 | p++; | 1239 | 0 | ctx->ptr++; | 1240 | 0 | } | 1241 | 0 | } | 1242 | 0 | } | 1243 | 0 | ctx->pattern++; | 1244 | 0 | break; | 1245 | | | 1246 | 0 | case SRE_OP_GROUPREF_EXISTS: | 1247 | 0 | TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern, | 1248 | 0 | ctx->ptr, ctx->pattern[0])); | 1249 | | /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ | 1250 | 0 | i = ctx->pattern[0]; | 1251 | 0 | { | 1252 | 0 | Py_ssize_t groupref = i+i; | 1253 | 0 | if (groupref >= state->lastmark) { | 1254 | 0 | ctx->pattern += ctx->pattern[1]; | 1255 | 0 | break; | 1256 | 0 | } else { | 1257 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1258 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1259 | 0 | if (!p || !e || e < p) { | 1260 | 0 | ctx->pattern += ctx->pattern[1]; | 1261 | 0 | break; | 1262 | 0 | } | 1263 | 0 | } | 1264 | 0 | } | 1265 | 0 | ctx->pattern += 2; | 1266 | 0 | break; | 1267 | | | 1268 | 0 | case SRE_OP_ASSERT: | 1269 | | /* assert subpattern */ | 1270 | | /* <ASSERT> <skip> <back> <pattern> */ | 1271 | 0 | TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, | 1272 | 0 | ctx->ptr, ctx->pattern[1])); | 1273 | 0 | if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1]) | 1274 | 0 | RETURN_FAILURE; | 1275 | 0 | state->ptr = ctx->ptr - ctx->pattern[1]; | 1276 | 0 | DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2); | 1277 | 0 | RETURN_ON_FAILURE(ret); | 1278 | 0 | ctx->pattern += ctx->pattern[0]; | 1279 | 0 | break; | 1280 | | | 1281 | 0 | case SRE_OP_ASSERT_NOT: | 1282 | | /* assert not subpattern */ | 1283 | | /* <ASSERT_NOT> <skip> <back> <pattern> */ | 1284 | 0 | TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, | 1285 | 0 | ctx->ptr, ctx->pattern[1])); | 1286 | 0 | if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) { | 1287 | 0 | state->ptr = ctx->ptr - ctx->pattern[1]; | 1288 | 0 | DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); | 1289 | 0 | if (ret) { | 1290 | 0 | RETURN_ON_ERROR(ret); | 1291 | 0 | RETURN_FAILURE; | 1292 | 0 | } | 1293 | 0 | } | 1294 | 0 | ctx->pattern += ctx->pattern[0]; | 1295 | 0 | break; | 1296 | | | 1297 | 0 | case SRE_OP_FAILURE: | 1298 | | /* immediate failure */ | 1299 | 0 | TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr)); | 1300 | 0 | RETURN_FAILURE; | 1301 | | | 1302 | 0 | default: | 1303 | 0 | TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr, | 1304 | 0 | ctx->pattern[-1])); | 1305 | 0 | RETURN_ERROR(SRE_ERROR_ILLEGAL); | 1306 | 157 | } | 1307 | 157 | } | 1308 | | | 1309 | 141 | exit: | 1310 | 141 | ctx_pos = ctx->last_ctx_pos; | 1311 | 141 | jump = ctx->jump; | 1312 | 141 | DATA_POP_DISCARD(ctx); | 1313 | 141 | if (ctx_pos == -1) | 1314 | 6 | return ret; | 1315 | 135 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1316 | | | 1317 | 135 | switch (jump) { | 1318 | 4 | case JUMP_MAX_UNTIL_2: | 1319 | 4 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr)); | 1320 | 4 | goto jump_max_until_2; | 1321 | 4 | case JUMP_MAX_UNTIL_3: | 1322 | 4 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr)); | 1323 | 4 | goto jump_max_until_3; | 1324 | 0 | case JUMP_MIN_UNTIL_2: | 1325 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr)); | 1326 | 0 | goto jump_min_until_2; | 1327 | 0 | case JUMP_MIN_UNTIL_3: | 1328 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr)); | 1329 | 0 | goto jump_min_until_3; | 1330 | 3 | case JUMP_BRANCH: | 1331 | 3 | TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr)); | 1332 | 3 | goto jump_branch; | 1333 | 0 | case JUMP_MAX_UNTIL_1: | 1334 | 0 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr)); | 1335 | 0 | goto jump_max_until_1; | 1336 | 0 | case JUMP_MIN_UNTIL_1: | 1337 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr)); | 1338 | 0 | goto jump_min_until_1; | 1339 | 4 | case JUMP_REPEAT: | 1340 | 4 | TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr)); | 1341 | 4 | goto jump_repeat; | 1342 | 4 | case JUMP_REPEAT_ONE_1: | 1343 | 4 | TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr)); | 1344 | 4 | goto jump_repeat_one_1; | 1345 | 3 | case JUMP_REPEAT_ONE_2: | 1346 | 3 | TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr)); | 1347 | 3 | goto jump_repeat_one_2; | 1348 | 113 | case JUMP_MIN_REPEAT_ONE: | 1349 | 113 | TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr)); | 1350 | 113 | goto jump_min_repeat_one; | 1351 | 0 | case JUMP_ASSERT: | 1352 | 0 | TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr)); | 1353 | 0 | goto jump_assert; | 1354 | 0 | case JUMP_ASSERT_NOT: | 1355 | 0 | TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr)); | 1356 | 0 | goto jump_assert_not; | 1357 | 0 | case JUMP_NONE: | 1358 | 0 | TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, | 1359 | 0 | ctx->ptr, ret)); | 1360 | 0 | break; | 1361 | 135 | } | 1362 | | | 1363 | 0 | return ret; /* should never get here */ | 1364 | 135 | } |
Unexecuted instantiation: _sre.c:sre_ucs2_match Unexecuted instantiation: _sre.c:sre_ucs4_match |
1365 | | |
1366 | | /* need to reset capturing groups between two SRE(match) callings in loops */ |
1367 | | #define RESET_CAPTURE_GROUP() \ |
1368 | 0 | do { state->lastmark = state->lastindex = -1; } while (0) |
1369 | | |
1370 | | LOCAL(Py_ssize_t) |
1371 | | SRE(search)(SRE_STATE* state, SRE_CODE* pattern) |
1372 | 2 | { |
1373 | 2 | SRE_CHAR* ptr = (SRE_CHAR *)state->start; |
1374 | 2 | SRE_CHAR* end = (SRE_CHAR *)state->end; |
1375 | 2 | Py_ssize_t status = 0; |
1376 | 2 | Py_ssize_t prefix_len = 0; |
1377 | 2 | Py_ssize_t prefix_skip = 0; |
1378 | 2 | SRE_CODE* prefix = NULL; |
1379 | 2 | SRE_CODE* charset = NULL; |
1380 | 2 | SRE_CODE* overlap = NULL; |
1381 | 2 | int flags = 0; |
1382 | | |
1383 | 2 | if (ptr > end) |
1384 | 0 | return 0; |
1385 | | |
1386 | 2 | if (pattern[0] == SRE_OP_INFO) { |
1387 | | /* optimization info block */ |
1388 | | /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ |
1389 | | |
1390 | 2 | flags = pattern[2]; |
1391 | | |
1392 | 2 | if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) { |
1393 | 0 | TRACE(("reject (got %u chars, need %u)\n", |
1394 | 0 | (unsigned int)(end - ptr), pattern[3])); |
1395 | 0 | return 0; |
1396 | 0 | } |
1397 | 2 | if (pattern[3] > 1) { |
1398 | | /* adjust end point (but make sure we leave at least one |
1399 | | character in there, so literal search will work) */ |
1400 | 2 | end -= pattern[3] - 1; |
1401 | 2 | if (end <= ptr) |
1402 | 0 | end = ptr; |
1403 | 2 | } |
1404 | | |
1405 | 2 | if (flags & SRE_INFO_PREFIX) { |
1406 | | /* pattern starts with a known prefix */ |
1407 | | /* <length> <skip> <prefix data> <overlap data> */ |
1408 | 2 | prefix_len = pattern[5]; |
1409 | 2 | prefix_skip = pattern[6]; |
1410 | 2 | prefix = pattern + 7; |
1411 | 2 | overlap = prefix + prefix_len - 1; |
1412 | 2 | } else if (flags & SRE_INFO_CHARSET) |
1413 | | /* pattern starts with a character from a known set */ |
1414 | | /* <charset> */ |
1415 | 0 | charset = pattern + 5; |
1416 | | |
1417 | 2 | pattern += 1 + pattern[1]; |
1418 | 2 | } |
1419 | | |
1420 | 2 | TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n", |
1421 | 2 | prefix, prefix_len, prefix_skip)); |
1422 | 2 | TRACE(("charset = %p\n", charset)); |
1423 | | |
1424 | 2 | if (prefix_len == 1) { |
1425 | | /* pattern starts with a literal character */ |
1426 | 0 | SRE_CHAR c = (SRE_CHAR) prefix[0]; |
1427 | | #if SIZEOF_SRE_CHAR < 4 |
1428 | 0 | if ((SRE_CODE) c != prefix[0]) |
1429 | 0 | return 0; /* literal can't match: doesn't fit in char width */ |
1430 | 0 | #endif |
1431 | 0 | end = (SRE_CHAR *)state->end; |
1432 | 0 | state->must_advance = 0; |
1433 | 0 | while (ptr < end) { |
1434 | 0 | while (*ptr != c) { |
1435 | 0 | if (++ptr >= end) |
1436 | 0 | return 0; |
1437 | 0 | } |
1438 | 0 | TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); |
1439 | 0 | state->start = ptr; |
1440 | 0 | state->ptr = ptr + prefix_skip; |
1441 | 0 | if (flags & SRE_INFO_LITERAL) |
1442 | 0 | return 1; /* we got all of it */ |
1443 | 0 | status = SRE(match)(state, pattern + 2*prefix_skip, 0); |
1444 | 0 | if (status != 0) |
1445 | 0 | return status; |
1446 | 0 | ++ptr; |
1447 | 0 | RESET_CAPTURE_GROUP(); |
1448 | 0 | } |
1449 | 0 | return 0; |
1450 | 0 | } |
1451 | | |
1452 | 2 | if (prefix_len > 1) { |
1453 | | /* pattern starts with a known prefix. use the overlap |
1454 | | table to skip forward as fast as we possibly can */ |
1455 | 2 | Py_ssize_t i = 0; |
1456 | | |
1457 | 2 | end = (SRE_CHAR *)state->end; |
1458 | 2 | if (prefix_len > end - ptr) |
1459 | 0 | return 0; |
1460 | | #if SIZEOF_SRE_CHAR < 4 |
1461 | 6 | for (i = 0; i < prefix_len; i++) |
1462 | 4 | if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) |
1463 | 0 | return 0; /* literal can't match: doesn't fit in char width */ |
1464 | 2 | #endif |
1465 | 2 | while (ptr < end) { |
1466 | 2 | SRE_CHAR c = (SRE_CHAR) prefix[0]; |
1467 | 2 | while (*ptr++ != c) { |
1468 | 0 | if (ptr >= end) |
1469 | 0 | return 0; |
1470 | 0 | } |
1471 | 2 | if (ptr >= end) |
1472 | 0 | return 0; |
1473 | | |
1474 | 2 | i = 1; |
1475 | 2 | state->must_advance = 0; |
1476 | 2 | do { |
1477 | 2 | if (*ptr == (SRE_CHAR) prefix[i]) { |
1478 | 2 | if (++i != prefix_len) { |
1479 | 0 | if (++ptr >= end) |
1480 | 0 | return 0; |
1481 | 0 | continue; |
1482 | 0 | } |
1483 | | /* found a potential match */ |
1484 | 2 | TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); |
1485 | 2 | state->start = ptr - (prefix_len - 1); |
1486 | 2 | state->ptr = ptr - (prefix_len - prefix_skip - 1); |
1487 | 2 | if (flags & SRE_INFO_LITERAL) |
1488 | 0 | return 1; /* we got all of it */ |
1489 | 2 | status = SRE(match)(state, pattern + 2*prefix_skip, 0); |
1490 | 2 | if (status != 0) |
1491 | 2 | return status; |
1492 | | /* close but no cigar -- try again */ |
1493 | 0 | if (++ptr >= end) |
1494 | 0 | return 0; |
1495 | 0 | RESET_CAPTURE_GROUP(); |
1496 | 0 | } |
1497 | 0 | i = overlap[i]; |
1498 | 0 | } while (i != 0); |
1499 | 2 | } |
1500 | 0 | return 0; |
1501 | 2 | } |
1502 | | |
1503 | 0 | if (charset) { |
1504 | | /* pattern starts with a character from a known set */ |
1505 | 0 | end = (SRE_CHAR *)state->end; |
1506 | 0 | state->must_advance = 0; |
1507 | 0 | for (;;) { |
1508 | 0 | while (ptr < end && !SRE(charset)(state, charset, *ptr)) |
1509 | 0 | ptr++; |
1510 | 0 | if (ptr >= end) |
1511 | 0 | return 0; |
1512 | 0 | TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); |
1513 | 0 | state->start = ptr; |
1514 | 0 | state->ptr = ptr; |
1515 | 0 | status = SRE(match)(state, pattern, 0); |
1516 | 0 | if (status != 0) |
1517 | 0 | break; |
1518 | 0 | ptr++; |
1519 | 0 | RESET_CAPTURE_GROUP(); |
1520 | 0 | } |
1521 | 0 | } else { |
1522 | | /* general case */ |
1523 | 0 | assert(ptr <= end); |
1524 | 0 | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); |
1525 | 0 | state->start = state->ptr = ptr; |
1526 | 0 | status = SRE(match)(state, pattern, 1); |
1527 | 0 | state->must_advance = 0; |
1528 | 0 | while (status == 0 && ptr < end) { |
1529 | 0 | ptr++; |
1530 | 0 | RESET_CAPTURE_GROUP(); |
1531 | 0 | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); |
1532 | 0 | state->start = state->ptr = ptr; |
1533 | 0 | status = SRE(match)(state, pattern, 0); |
1534 | 0 | } |
1535 | 0 | } |
1536 | | |
1537 | 0 | return status; |
1538 | 0 | } Line | Count | Source | 1372 | 2 | { | 1373 | 2 | SRE_CHAR* ptr = (SRE_CHAR *)state->start; | 1374 | 2 | SRE_CHAR* end = (SRE_CHAR *)state->end; | 1375 | 2 | Py_ssize_t status = 0; | 1376 | 2 | Py_ssize_t prefix_len = 0; | 1377 | 2 | Py_ssize_t prefix_skip = 0; | 1378 | 2 | SRE_CODE* prefix = NULL; | 1379 | 2 | SRE_CODE* charset = NULL; | 1380 | 2 | SRE_CODE* overlap = NULL; | 1381 | 2 | int flags = 0; | 1382 | | | 1383 | 2 | if (ptr > end) | 1384 | 0 | return 0; | 1385 | | | 1386 | 2 | if (pattern[0] == SRE_OP_INFO) { | 1387 | | /* optimization info block */ | 1388 | | /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ | 1389 | | | 1390 | 2 | flags = pattern[2]; | 1391 | | | 1392 | 2 | if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) { | 1393 | 0 | TRACE(("reject (got %u chars, need %u)\n", | 1394 | 0 | (unsigned int)(end - ptr), pattern[3])); | 1395 | 0 | return 0; | 1396 | 0 | } | 1397 | 2 | if (pattern[3] > 1) { | 1398 | | /* adjust end point (but make sure we leave at least one | 1399 | | character in there, so literal search will work) */ | 1400 | 2 | end -= pattern[3] - 1; | 1401 | 2 | if (end <= ptr) | 1402 | 0 | end = ptr; | 1403 | 2 | } | 1404 | | | 1405 | 2 | if (flags & SRE_INFO_PREFIX) { | 1406 | | /* pattern starts with a known prefix */ | 1407 | | /* <length> <skip> <prefix data> <overlap data> */ | 1408 | 2 | prefix_len = pattern[5]; | 1409 | 2 | prefix_skip = pattern[6]; | 1410 | 2 | prefix = pattern + 7; | 1411 | 2 | overlap = prefix + prefix_len - 1; | 1412 | 2 | } else if (flags & SRE_INFO_CHARSET) | 1413 | | /* pattern starts with a character from a known set */ | 1414 | | /* <charset> */ | 1415 | 0 | charset = pattern + 5; | 1416 | | | 1417 | 2 | pattern += 1 + pattern[1]; | 1418 | 2 | } | 1419 | | | 1420 | 2 | TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n", | 1421 | 2 | prefix, prefix_len, prefix_skip)); | 1422 | 2 | TRACE(("charset = %p\n", charset)); | 1423 | | | 1424 | 2 | if (prefix_len == 1) { | 1425 | | /* pattern starts with a literal character */ | 1426 | 0 | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1427 | 0 | #if SIZEOF_SRE_CHAR < 4 | 1428 | 0 | if ((SRE_CODE) c != prefix[0]) | 1429 | 0 | return 0; /* literal can't match: doesn't fit in char width */ | 1430 | 0 | #endif | 1431 | 0 | end = (SRE_CHAR *)state->end; | 1432 | 0 | state->must_advance = 0; | 1433 | 0 | while (ptr < end) { | 1434 | 0 | while (*ptr != c) { | 1435 | 0 | if (++ptr >= end) | 1436 | 0 | return 0; | 1437 | 0 | } | 1438 | 0 | TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); | 1439 | 0 | state->start = ptr; | 1440 | 0 | state->ptr = ptr + prefix_skip; | 1441 | 0 | if (flags & SRE_INFO_LITERAL) | 1442 | 0 | return 1; /* we got all of it */ | 1443 | 0 | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1444 | 0 | if (status != 0) | 1445 | 0 | return status; | 1446 | 0 | ++ptr; | 1447 | 0 | RESET_CAPTURE_GROUP(); | 1448 | 0 | } | 1449 | 0 | return 0; | 1450 | 0 | } | 1451 | | | 1452 | 2 | if (prefix_len > 1) { | 1453 | | /* pattern starts with a known prefix. use the overlap | 1454 | | table to skip forward as fast as we possibly can */ | 1455 | 2 | Py_ssize_t i = 0; | 1456 | | | 1457 | 2 | end = (SRE_CHAR *)state->end; | 1458 | 2 | if (prefix_len > end - ptr) | 1459 | 0 | return 0; | 1460 | 2 | #if SIZEOF_SRE_CHAR < 4 | 1461 | 6 | for (i = 0; i < prefix_len; i++) | 1462 | 4 | if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) | 1463 | 0 | return 0; /* literal can't match: doesn't fit in char width */ | 1464 | 2 | #endif | 1465 | 2 | while (ptr < end) { | 1466 | 2 | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1467 | 2 | while (*ptr++ != c) { | 1468 | 0 | if (ptr >= end) | 1469 | 0 | return 0; | 1470 | 0 | } | 1471 | 2 | if (ptr >= end) | 1472 | 0 | return 0; | 1473 | | | 1474 | 2 | i = 1; | 1475 | 2 | state->must_advance = 0; | 1476 | 2 | do { | 1477 | 2 | if (*ptr == (SRE_CHAR) prefix[i]) { | 1478 | 2 | if (++i != prefix_len) { | 1479 | 0 | if (++ptr >= end) | 1480 | 0 | return 0; | 1481 | 0 | continue; | 1482 | 0 | } | 1483 | | /* found a potential match */ | 1484 | 2 | TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); | 1485 | 2 | state->start = ptr - (prefix_len - 1); | 1486 | 2 | state->ptr = ptr - (prefix_len - prefix_skip - 1); | 1487 | 2 | if (flags & SRE_INFO_LITERAL) | 1488 | 0 | return 1; /* we got all of it */ | 1489 | 2 | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1490 | 2 | if (status != 0) | 1491 | 2 | return status; | 1492 | | /* close but no cigar -- try again */ | 1493 | 0 | if (++ptr >= end) | 1494 | 0 | return 0; | 1495 | 0 | RESET_CAPTURE_GROUP(); | 1496 | 0 | } | 1497 | 0 | i = overlap[i]; | 1498 | 0 | } while (i != 0); | 1499 | 2 | } | 1500 | 0 | return 0; | 1501 | 2 | } | 1502 | | | 1503 | 0 | if (charset) { | 1504 | | /* pattern starts with a character from a known set */ | 1505 | 0 | end = (SRE_CHAR *)state->end; | 1506 | 0 | state->must_advance = 0; | 1507 | 0 | for (;;) { | 1508 | 0 | while (ptr < end && !SRE(charset)(state, charset, *ptr)) | 1509 | 0 | ptr++; | 1510 | 0 | if (ptr >= end) | 1511 | 0 | return 0; | 1512 | 0 | TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); | 1513 | 0 | state->start = ptr; | 1514 | 0 | state->ptr = ptr; | 1515 | 0 | status = SRE(match)(state, pattern, 0); | 1516 | 0 | if (status != 0) | 1517 | 0 | break; | 1518 | 0 | ptr++; | 1519 | 0 | RESET_CAPTURE_GROUP(); | 1520 | 0 | } | 1521 | 0 | } else { | 1522 | | /* general case */ | 1523 | 0 | assert(ptr <= end); | 1524 | 0 | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1525 | 0 | state->start = state->ptr = ptr; | 1526 | 0 | status = SRE(match)(state, pattern, 1); | 1527 | 0 | state->must_advance = 0; | 1528 | 0 | while (status == 0 && ptr < end) { | 1529 | 0 | ptr++; | 1530 | 0 | RESET_CAPTURE_GROUP(); | 1531 | 0 | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1532 | 0 | state->start = state->ptr = ptr; | 1533 | 0 | status = SRE(match)(state, pattern, 0); | 1534 | 0 | } | 1535 | 0 | } | 1536 | | | 1537 | 0 | return status; | 1538 | 0 | } |
Unexecuted instantiation: _sre.c:sre_ucs2_search Unexecuted instantiation: _sre.c:sre_ucs4_search |
1539 | | |
1540 | | #undef SRE_CHAR |
1541 | | #undef SIZEOF_SRE_CHAR |
1542 | | #undef SRE |
1543 | | |
1544 | | /* vim:ts=4:sw=4:et |
1545 | | */ |