Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Modules/sre_lib.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the _sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
17
3
{
18
    /* check if pointer is at given position */
19
20
3
    Py_ssize_t thisp, thatp;
21
22
3
    switch (at) {
23
24
3
    case SRE_AT_BEGINNING:
25
3
    case SRE_AT_BEGINNING_STRING:
26
3
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
0
    case SRE_AT_END:
33
0
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
0
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
0
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
0
    case SRE_AT_END_STRING:
42
0
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        if (state->beginning == state->end)
46
0
            return 0;
47
0
        thatp = ((void*) ptr > state->beginning) ?
48
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
49
0
        thisp = ((void*) ptr < state->end) ?
50
0
            SRE_IS_WORD((int) ptr[0]) : 0;
51
0
        return thisp != thatp;
52
53
0
    case SRE_AT_NON_BOUNDARY:
54
0
        if (state->beginning == state->end)
55
0
            return 0;
56
0
        thatp = ((void*) ptr > state->beginning) ?
57
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
58
0
        thisp = ((void*) ptr < state->end) ?
59
0
            SRE_IS_WORD((int) ptr[0]) : 0;
60
0
        return thisp == thatp;
61
62
0
    case SRE_AT_LOC_BOUNDARY:
63
0
        if (state->beginning == state->end)
64
0
            return 0;
65
0
        thatp = ((void*) ptr > state->beginning) ?
66
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67
0
        thisp = ((void*) ptr < state->end) ?
68
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69
0
        return thisp != thatp;
70
71
0
    case SRE_AT_LOC_NON_BOUNDARY:
72
0
        if (state->beginning == state->end)
73
0
            return 0;
74
0
        thatp = ((void*) ptr > state->beginning) ?
75
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76
0
        thisp = ((void*) ptr < state->end) ?
77
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78
0
        return thisp == thatp;
79
80
0
    case SRE_AT_UNI_BOUNDARY:
81
0
        if (state->beginning == state->end)
82
0
            return 0;
83
0
        thatp = ((void*) ptr > state->beginning) ?
84
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85
0
        thisp = ((void*) ptr < state->end) ?
86
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87
0
        return thisp != thatp;
88
89
0
    case SRE_AT_UNI_NON_BOUNDARY:
90
0
        if (state->beginning == state->end)
91
0
            return 0;
92
0
        thatp = ((void*) ptr > state->beginning) ?
93
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94
0
        thisp = ((void*) ptr < state->end) ?
95
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96
0
        return thisp == thatp;
97
98
3
    }
99
100
0
    return 0;
101
3
}
_sre.c:sre_ucs1_at
Line
Count
Source
17
3
{
18
    /* check if pointer is at given position */
19
20
3
    Py_ssize_t thisp, thatp;
21
22
3
    switch (at) {
23
24
3
    case SRE_AT_BEGINNING:
25
3
    case SRE_AT_BEGINNING_STRING:
26
3
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
0
    case SRE_AT_END:
33
0
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
0
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
0
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
0
    case SRE_AT_END_STRING:
42
0
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        if (state->beginning == state->end)
46
0
            return 0;
47
0
        thatp = ((void*) ptr > state->beginning) ?
48
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
49
0
        thisp = ((void*) ptr < state->end) ?
50
0
            SRE_IS_WORD((int) ptr[0]) : 0;
51
0
        return thisp != thatp;
52
53
0
    case SRE_AT_NON_BOUNDARY:
54
0
        if (state->beginning == state->end)
55
0
            return 0;
56
0
        thatp = ((void*) ptr > state->beginning) ?
57
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
58
0
        thisp = ((void*) ptr < state->end) ?
59
0
            SRE_IS_WORD((int) ptr[0]) : 0;
60
0
        return thisp == thatp;
61
62
0
    case SRE_AT_LOC_BOUNDARY:
63
0
        if (state->beginning == state->end)
64
0
            return 0;
65
0
        thatp = ((void*) ptr > state->beginning) ?
66
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67
0
        thisp = ((void*) ptr < state->end) ?
68
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69
0
        return thisp != thatp;
70
71
0
    case SRE_AT_LOC_NON_BOUNDARY:
72
0
        if (state->beginning == state->end)
73
0
            return 0;
74
0
        thatp = ((void*) ptr > state->beginning) ?
75
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76
0
        thisp = ((void*) ptr < state->end) ?
77
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78
0
        return thisp == thatp;
79
80
0
    case SRE_AT_UNI_BOUNDARY:
81
0
        if (state->beginning == state->end)
82
0
            return 0;
83
0
        thatp = ((void*) ptr > state->beginning) ?
84
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85
0
        thisp = ((void*) ptr < state->end) ?
86
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87
0
        return thisp != thatp;
88
89
0
    case SRE_AT_UNI_NON_BOUNDARY:
90
0
        if (state->beginning == state->end)
91
0
            return 0;
92
0
        thatp = ((void*) ptr > state->beginning) ?
93
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94
0
        thisp = ((void*) ptr < state->end) ?
95
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96
0
        return thisp == thatp;
97
98
3
    }
99
100
0
    return 0;
101
3
}
Unexecuted instantiation: _sre.c:sre_ucs2_at
Unexecuted instantiation: _sre.c:sre_ucs4_at
102
103
LOCAL(int)
104
SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
105
29
{
106
    /* check if character is a member of the given set */
107
108
29
    int ok = 1;
109
110
38
    for (;;) {
111
38
        switch (*set++) {
112
113
9
        case SRE_OP_FAILURE:
114
9
            return !ok;
115
116
0
        case SRE_OP_LITERAL:
117
            /* <LITERAL> <code> */
118
0
            if (ch == set[0])
119
0
                return ok;
120
0
            set++;
121
0
            break;
122
123
20
        case SRE_OP_CATEGORY:
124
            /* <CATEGORY> <code> */
125
20
            if (sre_category(set[0], (int) ch))
126
16
                return ok;
127
4
            set++;
128
4
            break;
129
130
7
        case SRE_OP_CHARSET:
131
            /* <CHARSET> <bitmap> */
132
7
            if (ch < 256 &&
133
7
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134
2
                return ok;
135
5
            set += 256/SRE_CODE_BITS;
136
5
            break;
137
138
0
        case SRE_OP_RANGE:
139
            /* <RANGE> <lower> <upper> */
140
0
            if (set[0] <= ch && ch <= set[1])
141
0
                return ok;
142
0
            set += 2;
143
0
            break;
144
145
0
        case SRE_OP_RANGE_UNI_IGNORE:
146
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
147
0
        {
148
0
            SRE_CODE uch;
149
            /* ch is already lower cased */
150
0
            if (set[0] <= ch && ch <= set[1])
151
0
                return ok;
152
0
            uch = sre_upper_unicode(ch);
153
0
            if (set[0] <= uch && uch <= set[1])
154
0
                return ok;
155
0
            set += 2;
156
0
            break;
157
0
        }
158
159
0
        case SRE_OP_NEGATE:
160
0
            ok = !ok;
161
0
            break;
162
163
2
        case SRE_OP_BIGCHARSET:
164
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
165
2
        {
166
2
            Py_ssize_t count, block;
167
2
            count = *(set++);
168
169
2
            if (ch < 0x10000u)
170
2
                block = ((unsigned char*)set)[ch >> 8];
171
0
            else
172
0
                block = -1;
173
2
            set += 256/sizeof(SRE_CODE);
174
2
            if (block >=0 &&
175
2
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
176
2
                    (1u << (ch & (SRE_CODE_BITS-1)))))
177
2
                return ok;
178
0
            set += count * (256/SRE_CODE_BITS);
179
0
            break;
180
2
        }
181
182
0
        default:
183
            /* internal error -- there's not much we can do about it
184
               here, so let's just pretend it didn't match... */
185
0
            return 0;
186
38
        }
187
38
    }
188
29
}
_sre.c:sre_ucs1_charset
Line
Count
Source
105
29
{
106
    /* check if character is a member of the given set */
107
108
29
    int ok = 1;
109
110
38
    for (;;) {
111
38
        switch (*set++) {
112
113
9
        case SRE_OP_FAILURE:
114
9
            return !ok;
115
116
0
        case SRE_OP_LITERAL:
117
            /* <LITERAL> <code> */
118
0
            if (ch == set[0])
119
0
                return ok;
120
0
            set++;
121
0
            break;
122
123
20
        case SRE_OP_CATEGORY:
124
            /* <CATEGORY> <code> */
125
20
            if (sre_category(set[0], (int) ch))
126
16
                return ok;
127
4
            set++;
128
4
            break;
129
130
7
        case SRE_OP_CHARSET:
131
            /* <CHARSET> <bitmap> */
132
7
            if (ch < 256 &&
133
7
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134
2
                return ok;
135
5
            set += 256/SRE_CODE_BITS;
136
5
            break;
137
138
0
        case SRE_OP_RANGE:
139
            /* <RANGE> <lower> <upper> */
140
0
            if (set[0] <= ch && ch <= set[1])
141
0
                return ok;
142
0
            set += 2;
143
0
            break;
144
145
0
        case SRE_OP_RANGE_UNI_IGNORE:
146
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
147
0
        {
148
0
            SRE_CODE uch;
149
            /* ch is already lower cased */
150
0
            if (set[0] <= ch && ch <= set[1])
151
0
                return ok;
152
0
            uch = sre_upper_unicode(ch);
153
0
            if (set[0] <= uch && uch <= set[1])
154
0
                return ok;
155
0
            set += 2;
156
0
            break;
157
0
        }
158
159
0
        case SRE_OP_NEGATE:
160
0
            ok = !ok;
161
0
            break;
162
163
2
        case SRE_OP_BIGCHARSET:
164
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
165
2
        {
166
2
            Py_ssize_t count, block;
167
2
            count = *(set++);
168
169
2
            if (ch < 0x10000u)
170
2
                block = ((unsigned char*)set)[ch >> 8];
171
0
            else
172
0
                block = -1;
173
2
            set += 256/sizeof(SRE_CODE);
174
2
            if (block >=0 &&
175
2
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
176
2
                    (1u << (ch & (SRE_CODE_BITS-1)))))
177
2
                return ok;
178
0
            set += count * (256/SRE_CODE_BITS);
179
0
            break;
180
2
        }
181
182
0
        default:
183
            /* internal error -- there's not much we can do about it
184
               here, so let's just pretend it didn't match... */
185
0
            return 0;
186
38
        }
187
38
    }
188
29
}
Unexecuted instantiation: _sre.c:sre_ucs2_charset
Unexecuted instantiation: _sre.c:sre_ucs4_charset
189
190
LOCAL(int)
191
SRE(charset_loc_ignore)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
192
0
{
193
0
    SRE_CODE lo, up;
194
0
    lo = sre_lower_locale(ch);
195
0
    if (SRE(charset)(state, set, lo))
196
0
       return 1;
197
198
0
    up = sre_upper_locale(ch);
199
0
    return up != lo && SRE(charset)(state, set, up);
200
0
}
Unexecuted instantiation: _sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: _sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: _sre.c:sre_ucs4_charset_loc_ignore
201
202
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel);
203
204
LOCAL(Py_ssize_t)
205
SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
206
122
{
207
122
    SRE_CODE chr;
208
122
    SRE_CHAR c;
209
122
    SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
210
122
    SRE_CHAR* end = (SRE_CHAR *)state->end;
211
122
    Py_ssize_t i;
212
213
    /* adjust end */
214
122
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
215
111
        end = ptr + maxcount;
216
217
122
    switch (pattern[0]) {
218
219
9
    case SRE_OP_IN:
220
        /* repeated set */
221
9
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
222
25
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
223
16
            ptr++;
224
9
        break;
225
226
113
    case SRE_OP_ANY:
227
        /* repeated dot wildcard. */
228
113
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
229
224
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
230
111
            ptr++;
231
113
        break;
232
233
0
    case SRE_OP_ANY_ALL:
234
        /* repeated dot wildcard.  skip to the end of the target
235
           string, and backtrack from there */
236
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
237
0
        ptr = end;
238
0
        break;
239
240
0
    case SRE_OP_LITERAL:
241
        /* repeated literal */
242
0
        chr = pattern[1];
243
0
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
244
0
        c = (SRE_CHAR) chr;
245
#if SIZEOF_SRE_CHAR < 4
246
0
        if ((SRE_CODE) c != chr)
247
0
            ; /* literal can't match: doesn't fit in char width */
248
0
        else
249
0
#endif
250
0
        while (ptr < end && *ptr == c)
251
0
            ptr++;
252
0
        break;
253
254
0
    case SRE_OP_LITERAL_IGNORE:
255
        /* repeated literal */
256
0
        chr = pattern[1];
257
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
258
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
259
0
            ptr++;
260
0
        break;
261
262
0
    case SRE_OP_LITERAL_UNI_IGNORE:
263
        /* repeated literal */
264
0
        chr = pattern[1];
265
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
266
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
267
0
            ptr++;
268
0
        break;
269
270
0
    case SRE_OP_LITERAL_LOC_IGNORE:
271
        /* repeated literal */
272
0
        chr = pattern[1];
273
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
274
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
275
0
            ptr++;
276
0
        break;
277
278
0
    case SRE_OP_NOT_LITERAL:
279
        /* repeated non-literal */
280
0
        chr = pattern[1];
281
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
282
0
        c = (SRE_CHAR) chr;
283
#if SIZEOF_SRE_CHAR < 4
284
0
        if ((SRE_CODE) c != chr)
285
0
            ptr = end; /* literal can't match: doesn't fit in char width */
286
0
        else
287
0
#endif
288
0
        while (ptr < end && *ptr != c)
289
0
            ptr++;
290
0
        break;
291
292
0
    case SRE_OP_NOT_LITERAL_IGNORE:
293
        /* repeated non-literal */
294
0
        chr = pattern[1];
295
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
296
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
297
0
            ptr++;
298
0
        break;
299
300
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
301
        /* repeated non-literal */
302
0
        chr = pattern[1];
303
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
304
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
305
0
            ptr++;
306
0
        break;
307
308
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
309
        /* repeated non-literal */
310
0
        chr = pattern[1];
311
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
312
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
313
0
            ptr++;
314
0
        break;
315
316
0
    default:
317
        /* repeated single character pattern */
318
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
319
0
        while ((SRE_CHAR*) state->ptr < end) {
320
0
            i = SRE(match)(state, pattern, 0);
321
0
            if (i < 0)
322
0
                return i;
323
0
            if (!i)
324
0
                break;
325
0
        }
326
0
        TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
327
0
               (SRE_CHAR*) state->ptr - ptr));
328
0
        return (SRE_CHAR*) state->ptr - ptr;
329
122
    }
330
331
122
    TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
332
122
           ptr - (SRE_CHAR*) state->ptr));
333
122
    return ptr - (SRE_CHAR*) state->ptr;
334
122
}
_sre.c:sre_ucs1_count
Line
Count
Source
206
122
{
207
122
    SRE_CODE chr;
208
122
    SRE_CHAR c;
209
122
    SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
210
122
    SRE_CHAR* end = (SRE_CHAR *)state->end;
211
122
    Py_ssize_t i;
212
213
    /* adjust end */
214
122
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
215
111
        end = ptr + maxcount;
216
217
122
    switch (pattern[0]) {
218
219
9
    case SRE_OP_IN:
220
        /* repeated set */
221
9
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
222
25
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
223
16
            ptr++;
224
9
        break;
225
226
113
    case SRE_OP_ANY:
227
        /* repeated dot wildcard. */
228
113
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
229
224
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
230
111
            ptr++;
231
113
        break;
232
233
0
    case SRE_OP_ANY_ALL:
234
        /* repeated dot wildcard.  skip to the end of the target
235
           string, and backtrack from there */
236
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
237
0
        ptr = end;
238
0
        break;
239
240
0
    case SRE_OP_LITERAL:
241
        /* repeated literal */
242
0
        chr = pattern[1];
243
0
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
244
0
        c = (SRE_CHAR) chr;
245
0
#if SIZEOF_SRE_CHAR < 4
246
0
        if ((SRE_CODE) c != chr)
247
0
            ; /* literal can't match: doesn't fit in char width */
248
0
        else
249
0
#endif
250
0
        while (ptr < end && *ptr == c)
251
0
            ptr++;
252
0
        break;
253
254
0
    case SRE_OP_LITERAL_IGNORE:
255
        /* repeated literal */
256
0
        chr = pattern[1];
257
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
258
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
259
0
            ptr++;
260
0
        break;
261
262
0
    case SRE_OP_LITERAL_UNI_IGNORE:
263
        /* repeated literal */
264
0
        chr = pattern[1];
265
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
266
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
267
0
            ptr++;
268
0
        break;
269
270
0
    case SRE_OP_LITERAL_LOC_IGNORE:
271
        /* repeated literal */
272
0
        chr = pattern[1];
273
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
274
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
275
0
            ptr++;
276
0
        break;
277
278
0
    case SRE_OP_NOT_LITERAL:
279
        /* repeated non-literal */
280
0
        chr = pattern[1];
281
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
282
0
        c = (SRE_CHAR) chr;
283
0
#if SIZEOF_SRE_CHAR < 4
284
0
        if ((SRE_CODE) c != chr)
285
0
            ptr = end; /* literal can't match: doesn't fit in char width */
286
0
        else
287
0
#endif
288
0
        while (ptr < end && *ptr != c)
289
0
            ptr++;
290
0
        break;
291
292
0
    case SRE_OP_NOT_LITERAL_IGNORE:
293
        /* repeated non-literal */
294
0
        chr = pattern[1];
295
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
296
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
297
0
            ptr++;
298
0
        break;
299
300
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
301
        /* repeated non-literal */
302
0
        chr = pattern[1];
303
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
304
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
305
0
            ptr++;
306
0
        break;
307
308
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
309
        /* repeated non-literal */
310
0
        chr = pattern[1];
311
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
312
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
313
0
            ptr++;
314
0
        break;
315
316
0
    default:
317
        /* repeated single character pattern */
318
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
319
0
        while ((SRE_CHAR*) state->ptr < end) {
320
0
            i = SRE(match)(state, pattern, 0);
321
0
            if (i < 0)
322
0
                return i;
323
0
            if (!i)
324
0
                break;
325
0
        }
326
0
        TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
327
0
               (SRE_CHAR*) state->ptr - ptr));
328
0
        return (SRE_CHAR*) state->ptr - ptr;
329
122
    }
330
331
122
    TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
332
122
           ptr - (SRE_CHAR*) state->ptr));
333
122
    return ptr - (SRE_CHAR*) state->ptr;
334
122
}
Unexecuted instantiation: _sre.c:sre_ucs2_count
Unexecuted instantiation: _sre.c:sre_ucs4_count
335
336
#if 0 /* not used in this release */
337
LOCAL(int)
338
SRE(info)(SRE_STATE* state, SRE_CODE* pattern)
339
{
340
    /* check if an SRE_OP_INFO block matches at the current position.
341
       returns the number of SRE_CODE objects to skip if successful, 0
342
       if no match */
343
344
    SRE_CHAR* end = (SRE_CHAR*) state->end;
345
    SRE_CHAR* ptr = (SRE_CHAR*) state->ptr;
346
    Py_ssize_t i;
347
348
    /* check minimal length */
349
    if (pattern[3] && end - ptr < pattern[3])
350
        return 0;
351
352
    /* check known prefix */
353
    if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
354
        /* <length> <skip> <prefix data> <overlap data> */
355
        for (i = 0; i < pattern[5]; i++)
356
            if ((SRE_CODE) ptr[i] != pattern[7 + i])
357
                return 0;
358
        return pattern[0] + 2 * pattern[6];
359
    }
360
    return pattern[0];
361
}
362
#endif
363
364
/* The macros below should be used to protect recursive SRE(match)()
365
 * calls that *failed* and do *not* return immediately (IOW, those
366
 * that will backtrack). Explaining:
367
 *
368
 * - Recursive SRE(match)() returned true: that's usually a success
369
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
370
 *   reason to restore lastmark;
371
 *
372
 * - Recursive SRE(match)() returned false but the current SRE(match)()
373
 *   is returning to the caller: If the current SRE(match)() is the
374
 *   top function of the recursion, returning false will be a matching
375
 *   failure, and it doesn't matter where lastmark is pointing to.
376
 *   If it's *not* the top function, it will be a recursive SRE(match)()
377
 *   failure by itself, and the calling SRE(match)() will have to deal
378
 *   with the failure by the same rules explained here (it will restore
379
 *   lastmark by itself if necessary);
380
 *
381
 * - Recursive SRE(match)() returned false, and will continue the
382
 *   outside 'for' loop: must be protected when breaking, since the next
383
 *   OP could potentially depend on lastmark;
384
 *
385
 * - Recursive SRE(match)() returned false, and will be called again
386
 *   inside a local for/while loop: must be protected between each
387
 *   loop iteration, since the recursive SRE(match)() could do anything,
388
 *   and could potentially depend on lastmark.
389
 *
390
 * For more information, check the discussion at SF patch #712900.
391
 */
392
#define LASTMARK_SAVE()     \
393
16
    do { \
394
16
        ctx->lastmark = state->lastmark; \
395
16
        ctx->lastindex = state->lastindex; \
396
16
    } while (0)
397
#define LASTMARK_RESTORE()  \
398
119
    do { \
399
119
        state->lastmark = ctx->lastmark; \
400
119
        state->lastindex = ctx->lastindex; \
401
119
    } while (0)
402
403
0
#define RETURN_ERROR(i) do { return i; } while(0)
404
123
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
405
18
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
406
407
#define RETURN_ON_ERROR(i) \
408
136
    do { if (i < 0) RETURN_ERROR(i); } while (0)
409
#define RETURN_ON_SUCCESS(i) \
410
4
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
411
#define RETURN_ON_FAILURE(i) \
412
0
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
413
414
141
#define DATA_STACK_ALLOC(state, type, ptr) \
415
141
do { \
416
141
    alloc_pos = state->data_stack_base; \
417
141
    TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
418
141
           "(%" PY_FORMAT_SIZE_T "d)\n", \
419
141
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
420
141
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
421
6
        int j = data_stack_grow(state, sizeof(type)); \
422
6
        if (j < 0) return j; \
423
6
        if (ctx_pos != -1) \
424
6
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
425
6
    } \
426
141
    ptr = (type*)(state->data_stack+alloc_pos); \
427
141
    state->data_stack_base += sizeof(type); \
428
141
} while (0)
429
430
257
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
431
257
do { \
432
257
    TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", Py_STRINGIFY(type), pos)); \
433
257
    ptr = (type*)(state->data_stack+pos); \
434
257
} while (0)
435
436
4
#define DATA_STACK_PUSH(state, data, size) \
437
4
do { \
438
4
    TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
439
4
           "(%" PY_FORMAT_SIZE_T "d)\n", \
440
4
           data, state->data_stack_base, size)); \
441
4
    if (size > state->data_stack_size - state->data_stack_base) { \
442
0
        int j = data_stack_grow(state, size); \
443
0
        if (j < 0) return j; \
444
0
        if (ctx_pos != -1) \
445
0
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
446
0
    } \
447
4
    memcpy(state->data_stack+state->data_stack_base, data, size); \
448
4
    state->data_stack_base += size; \
449
4
} while (0)
450
451
4
#define DATA_STACK_POP(state, data, size, discard) \
452
4
do { \
453
4
    TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
454
4
           "(%" PY_FORMAT_SIZE_T "d)\n", \
455
4
           data, state->data_stack_base-size, size)); \
456
4
    memcpy(data, state->data_stack+state->data_stack_base-size, size); \
457
4
    if (discard) \
458
4
        state->data_stack_base -= size; \
459
4
} while (0)
460
461
141
#define DATA_STACK_POP_DISCARD(state, size) \
462
141
do { \
463
141
    TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
464
141
           "(%" PY_FORMAT_SIZE_T "d)\n", \
465
141
           state->data_stack_base-size, size)); \
466
141
    state->data_stack_base -= size; \
467
141
} while(0)
468
469
#define DATA_PUSH(x) \
470
4
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
471
#define DATA_POP(x) \
472
4
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
473
#define DATA_POP_DISCARD(x) \
474
141
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
475
#define DATA_ALLOC(t,p) \
476
141
    DATA_STACK_ALLOC(state, t, p)
477
#define DATA_LOOKUP_AT(t,p,pos) \
478
257
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
479
480
#define MARK_PUSH(lastmark) \
481
6
    do if (lastmark > 0) { \
482
0
        i = lastmark; /* ctx->lastmark may change if reallocated */ \
483
0
        DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
484
6
    } while (0)
485
#define MARK_POP(lastmark) \
486
4
    do if (lastmark > 0) { \
487
0
        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
488
4
    } while (0)
489
#define MARK_POP_KEEP(lastmark) \
490
2
    do if (lastmark > 0) { \
491
0
        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
492
2
    } while (0)
493
#define MARK_POP_DISCARD(lastmark) \
494
2
    do if (lastmark > 0) { \
495
0
        DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
496
2
    } while (0)
497
498
6
#define JUMP_NONE            0
499
0
#define JUMP_MAX_UNTIL_1     1
500
4
#define JUMP_MAX_UNTIL_2     2
501
4
#define JUMP_MAX_UNTIL_3     3
502
0
#define JUMP_MIN_UNTIL_1     4
503
0
#define JUMP_MIN_UNTIL_2     5
504
0
#define JUMP_MIN_UNTIL_3     6
505
4
#define JUMP_REPEAT          7
506
4
#define JUMP_REPEAT_ONE_1    8
507
3
#define JUMP_REPEAT_ONE_2    9
508
113
#define JUMP_MIN_REPEAT_ONE  10
509
3
#define JUMP_BRANCH          11
510
0
#define JUMP_ASSERT          12
511
0
#define JUMP_ASSERT_NOT      13
512
513
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
514
135
    DATA_ALLOC(SRE(match_context), nextctx); \
515
135
    nextctx->last_ctx_pos = ctx_pos; \
516
135
    nextctx->jump = jumpvalue; \
517
135
    nextctx->pattern = nextpattern; \
518
135
    nextctx->toplevel = toplevel_; \
519
135
    ctx_pos = alloc_pos; \
520
135
    ctx = nextctx; \
521
135
    goto entrance; \
522
135
    jumplabel: \
523
135
    while (0) /* gcc doesn't like labels at end of scopes */ \
524
525
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
526
135
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
527
528
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
529
0
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
530
531
typedef struct {
532
    Py_ssize_t last_ctx_pos;
533
    Py_ssize_t jump;
534
    SRE_CHAR* ptr;
535
    SRE_CODE* pattern;
536
    Py_ssize_t count;
537
    Py_ssize_t lastmark;
538
    Py_ssize_t lastindex;
539
    union {
540
        SRE_CODE chr;
541
        SRE_REPEAT* rep;
542
    } u;
543
    int toplevel;
544
} SRE(match_context);
545
546
/* check if string matches the given pattern.  returns <0 for
547
   error, 0 for failure, and 1 for success */
548
LOCAL(Py_ssize_t)
549
SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel)
550
6
{
551
6
    SRE_CHAR* end = (SRE_CHAR *)state->end;
552
6
    Py_ssize_t alloc_pos, ctx_pos = -1;
553
6
    Py_ssize_t i, ret = 0;
554
6
    Py_ssize_t jump;
555
6
    unsigned int sigcount=0;
556
557
6
    SRE(match_context)* ctx;
558
6
    SRE(match_context)* nextctx;
559
560
6
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
561
562
6
    DATA_ALLOC(SRE(match_context), ctx);
563
6
    ctx->last_ctx_pos = -1;
564
6
    ctx->jump = JUMP_NONE;
565
6
    ctx->pattern = pattern;
566
6
    ctx->toplevel = toplevel;
567
6
    ctx_pos = alloc_pos;
568
569
141
entrance:
570
571
141
    ctx->ptr = (SRE_CHAR *)state->ptr;
572
573
141
    if (ctx->pattern[0] == SRE_OP_INFO) {
574
        /* optimization info block */
575
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
576
4
        if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
577
0
            TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
578
0
                   "need %" PY_FORMAT_SIZE_T "d)\n",
579
0
                   end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
580
0
            RETURN_FAILURE;
581
0
        }
582
4
        ctx->pattern += ctx->pattern[1] + 1;
583
4
    }
584
585
157
    for (;;) {
586
157
        ++sigcount;
587
157
        if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
588
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);
589
590
157
        switch (*ctx->pattern++) {
591
592
4
        case SRE_OP_MARK:
593
            /* set mark */
594
            /* <MARK> <gid> */
595
4
            TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
596
4
                   ctx->ptr, ctx->pattern[0]));
597
4
            i = ctx->pattern[0];
598
4
            if (i & 1)
599
0
                state->lastindex = i/2 + 1;
600
4
            if (i > state->lastmark) {
601
                /* state->lastmark is the highest valid index in the
602
                   state->mark array.  If it is increased by more than 1,
603
                   the intervening marks must be set to NULL to signal
604
                   that these marks have not been encountered. */
605
4
                Py_ssize_t j = state->lastmark + 1;
606
8
                while (j < i)
607
4
                    state->mark[j++] = NULL;
608
4
                state->lastmark = i;
609
4
            }
610
4
            state->mark[i] = ctx->ptr;
611
4
            ctx->pattern++;
612
4
            break;
613
614
120
        case SRE_OP_LITERAL:
615
            /* match literal string */
616
            /* <LITERAL> <code> */
617
120
            TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
618
120
                   ctx->ptr, *ctx->pattern));
619
120
            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
620
115
                RETURN_FAILURE;
621
5
            ctx->pattern++;
622
5
            ctx->ptr++;
623
5
            break;
624
625
0
        case SRE_OP_NOT_LITERAL:
626
            /* match anything that is not literal character */
627
            /* <NOT_LITERAL> <code> */
628
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
629
0
                   ctx->ptr, *ctx->pattern));
630
0
            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
631
0
                RETURN_FAILURE;
632
0
            ctx->pattern++;
633
0
            ctx->ptr++;
634
0
            break;
635
636
4
        case SRE_OP_SUCCESS:
637
            /* end of pattern */
638
4
            TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
639
4
            if (ctx->toplevel &&
640
4
                ((state->match_all && ctx->ptr != state->end) ||
641
2
                 (state->must_advance && ctx->ptr == state->start)))
642
0
            {
643
0
                RETURN_FAILURE;
644
0
            }
645
4
            state->ptr = ctx->ptr;
646
4
            RETURN_SUCCESS;
647
648
3
        case SRE_OP_AT:
649
            /* match at given position */
650
            /* <AT> <code> */
651
3
            TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
652
3
            if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
653
0
                RETURN_FAILURE;
654
3
            ctx->pattern++;
655
3
            break;
656
657
0
        case SRE_OP_CATEGORY:
658
            /* match at given category */
659
            /* <CATEGORY> <code> */
660
0
            TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
661
0
                   ctx->ptr, *ctx->pattern));
662
0
            if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
663
0
                RETURN_FAILURE;
664
0
            ctx->pattern++;
665
0
            ctx->ptr++;
666
0
            break;
667
668
0
        case SRE_OP_ANY:
669
            /* match anything (except a newline) */
670
            /* <ANY> */
671
0
            TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
672
0
            if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
673
0
                RETURN_FAILURE;
674
0
            ctx->ptr++;
675
0
            break;
676
677
0
        case SRE_OP_ANY_ALL:
678
            /* match anything */
679
            /* <ANY_ALL> */
680
0
            TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
681
0
            if (ctx->ptr >= end)
682
0
                RETURN_FAILURE;
683
0
            ctx->ptr++;
684
0
            break;
685
686
1
        case SRE_OP_IN:
687
            /* match set member (or non_member) */
688
            /* <IN> <skip> <set> */
689
1
            TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
690
1
            if (ctx->ptr >= end ||
691
1
                !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr))
692
0
                RETURN_FAILURE;
693
1
            ctx->pattern += ctx->pattern[0];
694
1
            ctx->ptr++;
695
1
            break;
696
697
0
        case SRE_OP_LITERAL_IGNORE:
698
0
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
699
0
                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
700
0
            if (ctx->ptr >= end ||
701
0
                sre_lower_ascii(*ctx->ptr) != *ctx->pattern)
702
0
                RETURN_FAILURE;
703
0
            ctx->pattern++;
704
0
            ctx->ptr++;
705
0
            break;
706
707
0
        case SRE_OP_LITERAL_UNI_IGNORE:
708
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
709
0
                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
710
0
            if (ctx->ptr >= end ||
711
0
                sre_lower_unicode(*ctx->ptr) != *ctx->pattern)
712
0
                RETURN_FAILURE;
713
0
            ctx->pattern++;
714
0
            ctx->ptr++;
715
0
            break;
716
717
0
        case SRE_OP_LITERAL_LOC_IGNORE:
718
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
719
0
                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
720
0
            if (ctx->ptr >= end
721
0
                || !char_loc_ignore(*ctx->pattern, *ctx->ptr))
722
0
                RETURN_FAILURE;
723
0
            ctx->pattern++;
724
0
            ctx->ptr++;
725
0
            break;
726
727
0
        case SRE_OP_NOT_LITERAL_IGNORE:
728
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
729
0
                   ctx->pattern, ctx->ptr, *ctx->pattern));
730
0
            if (ctx->ptr >= end ||
731
0
                sre_lower_ascii(*ctx->ptr) == *ctx->pattern)
732
0
                RETURN_FAILURE;
733
0
            ctx->pattern++;
734
0
            ctx->ptr++;
735
0
            break;
736
737
0
        case SRE_OP_NOT_LITERAL_UNI_IGNORE:
738
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
739
0
                   ctx->pattern, ctx->ptr, *ctx->pattern));
740
0
            if (ctx->ptr >= end ||
741
0
                sre_lower_unicode(*ctx->ptr) == *ctx->pattern)
742
0
                RETURN_FAILURE;
743
0
            ctx->pattern++;
744
0
            ctx->ptr++;
745
0
            break;
746
747
0
        case SRE_OP_NOT_LITERAL_LOC_IGNORE:
748
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
749
0
                   ctx->pattern, ctx->ptr, *ctx->pattern));
750
0
            if (ctx->ptr >= end
751
0
                || char_loc_ignore(*ctx->pattern, *ctx->ptr))
752
0
                RETURN_FAILURE;
753
0
            ctx->pattern++;
754
0
            ctx->ptr++;
755
0
            break;
756
757
0
        case SRE_OP_IN_IGNORE:
758
0
            TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
759
0
            if (ctx->ptr >= end
760
0
                || !SRE(charset)(state, ctx->pattern+1,
761
0
                                 (SRE_CODE)sre_lower_ascii(*ctx->ptr)))
762
0
                RETURN_FAILURE;
763
0
            ctx->pattern += ctx->pattern[0];
764
0
            ctx->ptr++;
765
0
            break;
766
767
2
        case SRE_OP_IN_UNI_IGNORE:
768
2
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", ctx->pattern, ctx->ptr));
769
2
            if (ctx->ptr >= end
770
2
                || !SRE(charset)(state, ctx->pattern+1,
771
2
                                 (SRE_CODE)sre_lower_unicode(*ctx->ptr)))
772
0
                RETURN_FAILURE;
773
2
            ctx->pattern += ctx->pattern[0];
774
2
            ctx->ptr++;
775
2
            break;
776
777
0
        case SRE_OP_IN_LOC_IGNORE:
778
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr));
779
0
            if (ctx->ptr >= end
780
0
                || !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr))
781
0
                RETURN_FAILURE;
782
0
            ctx->pattern += ctx->pattern[0];
783
0
            ctx->ptr++;
784
0
            break;
785
786
1
        case SRE_OP_JUMP:
787
1
        case SRE_OP_INFO:
788
            /* jump forward */
789
            /* <JUMP> <offset> */
790
1
            TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
791
1
                   ctx->ptr, ctx->pattern[0]));
792
1
            ctx->pattern += ctx->pattern[0];
793
1
            break;
794
795
3
        case SRE_OP_BRANCH:
796
            /* alternation */
797
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
798
3
            TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
799
3
            LASTMARK_SAVE();
800
3
            ctx->u.rep = state->repeat;
801
3
            if (ctx->u.rep)
802
2
                MARK_PUSH(ctx->lastmark);
803
7
            for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
804
5
                if (ctx->pattern[1] == SRE_OP_LITERAL &&
805
5
                    (ctx->ptr >= end ||
806
2
                     (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
807
2
                    continue;
808
3
                if (ctx->pattern[1] == SRE_OP_IN &&
809
3
                    (ctx->ptr >= end ||
810
1
                     !SRE(charset)(state, ctx->pattern + 3,
811
1
                                   (SRE_CODE) *ctx->ptr)))
812
0
                    continue;
813
3
                state->ptr = ctx->ptr;
814
3
                DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
815
3
                if (ret) {
816
1
                    if (ctx->u.rep)
817
0
                        MARK_POP_DISCARD(ctx->lastmark);
818
1
                    RETURN_ON_ERROR(ret);
819
1
                    RETURN_SUCCESS;
820
1
                }
821
2
                if (ctx->u.rep)
822
2
                    MARK_POP_KEEP(ctx->lastmark);
823
2
                LASTMARK_RESTORE();
824
2
            }
825
2
            if (ctx->u.rep)
826
2
                MARK_POP_DISCARD(ctx->lastmark);
827
2
            RETURN_FAILURE;
828
829
9
        case SRE_OP_REPEAT_ONE:
830
            /* match repeated sequence (maximizing regexp) */
831
832
            /* this operator only works if the repeated item is
833
               exactly one character wide, and we're not already
834
               collecting backtracking points.  for other cases,
835
               use the MAX_REPEAT operator */
836
837
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
838
839
9
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
840
9
                   ctx->pattern[1], ctx->pattern[2]));
841
842
9
            if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
843
0
                RETURN_FAILURE; /* cannot match */
844
845
9
            state->ptr = ctx->ptr;
846
847
9
            ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
848
9
            RETURN_ON_ERROR(ret);
849
9
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
850
9
            ctx->count = ret;
851
9
            ctx->ptr += ctx->count;
852
853
            /* when we arrive here, count contains the number of
854
               matches, and ctx->ptr points to the tail of the target
855
               string.  check if the rest of the pattern matches,
856
               and backtrack if not. */
857
858
9
            if (ctx->count < (Py_ssize_t) ctx->pattern[1])
859
2
                RETURN_FAILURE;
860
861
7
            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
862
7
                ctx->ptr == state->end &&
863
7
                !(ctx->toplevel && state->must_advance && ctx->ptr == state->start))
864
0
            {
865
                /* tail is empty.  we're finished */
866
0
                state->ptr = ctx->ptr;
867
0
                RETURN_SUCCESS;
868
0
            }
869
870
7
            LASTMARK_SAVE();
871
872
7
            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
873
                /* tail starts with a literal. skip positions where
874
                   the rest of the pattern cannot possibly match */
875
4
                ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
876
6
                for (;;) {
877
6
                    while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
878
6
                           (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
879
0
                        ctx->ptr--;
880
0
                        ctx->count--;
881
0
                    }
882
6
                    if (ctx->count < (Py_ssize_t) ctx->pattern[1])
883
2
                        break;
884
4
                    state->ptr = ctx->ptr;
885
4
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
886
0
                            ctx->pattern+ctx->pattern[0]);
887
4
                    if (ret) {
888
2
                        RETURN_ON_ERROR(ret);
889
2
                        RETURN_SUCCESS;
890
2
                    }
891
892
2
                    LASTMARK_RESTORE();
893
894
2
                    ctx->ptr--;
895
2
                    ctx->count--;
896
2
                }
897
898
4
            } else {
899
                /* general case */
900
3
                while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
901
3
                    state->ptr = ctx->ptr;
902
3
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
903
0
                            ctx->pattern+ctx->pattern[0]);
904
3
                    if (ret) {
905
3
                        RETURN_ON_ERROR(ret);
906
3
                        RETURN_SUCCESS;
907
3
                    }
908
0
                    ctx->ptr--;
909
0
                    ctx->count--;
910
0
                    LASTMARK_RESTORE();
911
0
                }
912
3
            }
913
2
            RETURN_FAILURE;
914
915
2
        case SRE_OP_MIN_REPEAT_ONE:
916
            /* match repeated sequence (minimizing regexp) */
917
918
            /* this operator only works if the repeated item is
919
               exactly one character wide, and we're not already
920
               collecting backtracking points.  for other cases,
921
               use the MIN_REPEAT operator */
922
923
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
924
925
2
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
926
2
                   ctx->pattern[1], ctx->pattern[2]));
927
928
2
            if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
929
0
                RETURN_FAILURE; /* cannot match */
930
931
2
            state->ptr = ctx->ptr;
932
933
2
            if (ctx->pattern[1] == 0)
934
2
                ctx->count = 0;
935
0
            else {
936
                /* count using pattern min as the maximum */
937
0
                ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
938
0
                RETURN_ON_ERROR(ret);
939
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
940
0
                if (ret < (Py_ssize_t) ctx->pattern[1])
941
                    /* didn't match minimum number of times */
942
0
                    RETURN_FAILURE;
943
                /* advance past minimum matches of repeat */
944
0
                ctx->count = ret;
945
0
                ctx->ptr += ctx->count;
946
0
            }
947
948
2
            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
949
2
                !(ctx->toplevel &&
950
0
                  ((state->match_all && ctx->ptr != state->end) ||
951
0
                   (state->must_advance && ctx->ptr == state->start))))
952
0
            {
953
                /* tail is empty.  we're finished */
954
0
                state->ptr = ctx->ptr;
955
0
                RETURN_SUCCESS;
956
957
2
            } else {
958
                /* general case */
959
2
                LASTMARK_SAVE();
960
113
                while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
961
113
                       || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
962
113
                    state->ptr = ctx->ptr;
963
113
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
964
0
                            ctx->pattern+ctx->pattern[0]);
965
113
                    if (ret) {
966
0
                        RETURN_ON_ERROR(ret);
967
0
                        RETURN_SUCCESS;
968
0
                    }
969
113
                    state->ptr = ctx->ptr;
970
113
                    ret = SRE(count)(state, ctx->pattern+3, 1);
971
113
                    RETURN_ON_ERROR(ret);
972
113
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
973
113
                    if (ret == 0)
974
2
                        break;
975
111
                    assert(ret == 1);
976
111
                    ctx->ptr++;
977
111
                    ctx->count++;
978
111
                    LASTMARK_RESTORE();
979
111
                }
980
2
            }
981
2
            RETURN_FAILURE;
982
983
4
        case SRE_OP_REPEAT:
984
            /* create repeat context.  all the hard work is done
985
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
986
            /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
987
4
            TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
988
4
                   ctx->pattern[1], ctx->pattern[2]));
989
990
            /* install new repeat context */
991
4
            ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
992
4
            if (!ctx->u.rep) {
993
0
                PyErr_NoMemory();
994
0
                RETURN_FAILURE;
995
0
            }
996
4
            ctx->u.rep->count = -1;
997
4
            ctx->u.rep->pattern = ctx->pattern;
998
4
            ctx->u.rep->prev = state->repeat;
999
4
            ctx->u.rep->last_ptr = NULL;
1000
4
            state->repeat = ctx->u.rep;
1001
1002
4
            state->ptr = ctx->ptr;
1003
4
            DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
1004
4
            state->repeat = ctx->u.rep->prev;
1005
4
            PyObject_FREE(ctx->u.rep);
1006
1007
4
            if (ret) {
1008
4
                RETURN_ON_ERROR(ret);
1009
4
                RETURN_SUCCESS;
1010
4
            }
1011
0
            RETURN_FAILURE;
1012
1013
4
        case SRE_OP_MAX_UNTIL:
1014
            /* maximizing repeat */
1015
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1016
1017
            /* FIXME: we probably need to deal with zero-width
1018
               matches in here... */
1019
1020
4
            ctx->u.rep = state->repeat;
1021
4
            if (!ctx->u.rep)
1022
0
                RETURN_ERROR(SRE_ERROR_STATE);
1023
1024
4
            state->ptr = ctx->ptr;
1025
1026
4
            ctx->count = ctx->u.rep->count+1;
1027
1028
4
            TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1029
4
                   ctx->ptr, ctx->count));
1030
1031
4
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1032
                /* not enough matches */
1033
0
                ctx->u.rep->count = ctx->count;
1034
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1035
0
                        ctx->u.rep->pattern+3);
1036
0
                if (ret) {
1037
0
                    RETURN_ON_ERROR(ret);
1038
0
                    RETURN_SUCCESS;
1039
0
                }
1040
0
                ctx->u.rep->count = ctx->count-1;
1041
0
                state->ptr = ctx->ptr;
1042
0
                RETURN_FAILURE;
1043
0
            }
1044
1045
4
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1046
4
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1047
4
                state->ptr != ctx->u.rep->last_ptr) {
1048
                /* we may have enough matches, but if we can
1049
                   match another item, do so */
1050
4
                ctx->u.rep->count = ctx->count;
1051
4
                LASTMARK_SAVE();
1052
4
                MARK_PUSH(ctx->lastmark);
1053
                /* zero-width match protection */
1054
4
                DATA_PUSH(&ctx->u.rep->last_ptr);
1055
4
                ctx->u.rep->last_ptr = state->ptr;
1056
4
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1057
0
                        ctx->u.rep->pattern+3);
1058
4
                DATA_POP(&ctx->u.rep->last_ptr);
1059
4
                if (ret) {
1060
0
                    MARK_POP_DISCARD(ctx->lastmark);
1061
0
                    RETURN_ON_ERROR(ret);
1062
0
                    RETURN_SUCCESS;
1063
0
                }
1064
4
                MARK_POP(ctx->lastmark);
1065
4
                LASTMARK_RESTORE();
1066
4
                ctx->u.rep->count = ctx->count-1;
1067
4
                state->ptr = ctx->ptr;
1068
4
            }
1069
1070
            /* cannot match more repeated items here.  make sure the
1071
               tail matches */
1072
4
            state->repeat = ctx->u.rep->prev;
1073
4
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
1074
4
            RETURN_ON_SUCCESS(ret);
1075
0
            state->repeat = ctx->u.rep;
1076
0
            state->ptr = ctx->ptr;
1077
0
            RETURN_FAILURE;
1078
1079
0
        case SRE_OP_MIN_UNTIL:
1080
            /* minimizing repeat */
1081
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1082
1083
0
            ctx->u.rep = state->repeat;
1084
0
            if (!ctx->u.rep)
1085
0
                RETURN_ERROR(SRE_ERROR_STATE);
1086
1087
0
            state->ptr = ctx->ptr;
1088
1089
0
            ctx->count = ctx->u.rep->count+1;
1090
1091
0
            TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
1092
0
                   ctx->ptr, ctx->count, ctx->u.rep->pattern));
1093
1094
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1095
                /* not enough matches */
1096
0
                ctx->u.rep->count = ctx->count;
1097
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1098
0
                        ctx->u.rep->pattern+3);
1099
0
                if (ret) {
1100
0
                    RETURN_ON_ERROR(ret);
1101
0
                    RETURN_SUCCESS;
1102
0
                }
1103
0
                ctx->u.rep->count = ctx->count-1;
1104
0
                state->ptr = ctx->ptr;
1105
0
                RETURN_FAILURE;
1106
0
            }
1107
1108
0
            LASTMARK_SAVE();
1109
1110
            /* see if the tail matches */
1111
0
            state->repeat = ctx->u.rep->prev;
1112
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1113
0
            if (ret) {
1114
0
                RETURN_ON_ERROR(ret);
1115
0
                RETURN_SUCCESS;
1116
0
            }
1117
1118
0
            state->repeat = ctx->u.rep;
1119
0
            state->ptr = ctx->ptr;
1120
1121
0
            LASTMARK_RESTORE();
1122
1123
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1124
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1125
0
                state->ptr == ctx->u.rep->last_ptr)
1126
0
                RETURN_FAILURE;
1127
1128
0
            ctx->u.rep->count = ctx->count;
1129
            /* zero-width match protection */
1130
0
            DATA_PUSH(&ctx->u.rep->last_ptr);
1131
0
            ctx->u.rep->last_ptr = state->ptr;
1132
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1133
0
                    ctx->u.rep->pattern+3);
1134
0
            DATA_POP(&ctx->u.rep->last_ptr);
1135
0
            if (ret) {
1136
0
                RETURN_ON_ERROR(ret);
1137
0
                RETURN_SUCCESS;
1138
0
            }
1139
0
            ctx->u.rep->count = ctx->count-1;
1140
0
            state->ptr = ctx->ptr;
1141
0
            RETURN_FAILURE;
1142
1143
0
        case SRE_OP_GROUPREF:
1144
            /* match backreference */
1145
0
            TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1146
0
                   ctx->ptr, ctx->pattern[0]));
1147
0
            i = ctx->pattern[0];
1148
0
            {
1149
0
                Py_ssize_t groupref = i+i;
1150
0
                if (groupref >= state->lastmark) {
1151
0
                    RETURN_FAILURE;
1152
0
                } else {
1153
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1154
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1155
0
                    if (!p || !e || e < p)
1156
0
                        RETURN_FAILURE;
1157
0
                    while (p < e) {
1158
0
                        if (ctx->ptr >= end || *ctx->ptr != *p)
1159
0
                            RETURN_FAILURE;
1160
0
                        p++;
1161
0
                        ctx->ptr++;
1162
0
                    }
1163
0
                }
1164
0
            }
1165
0
            ctx->pattern++;
1166
0
            break;
1167
1168
0
        case SRE_OP_GROUPREF_IGNORE:
1169
            /* match backreference */
1170
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1171
0
                   ctx->ptr, ctx->pattern[0]));
1172
0
            i = ctx->pattern[0];
1173
0
            {
1174
0
                Py_ssize_t groupref = i+i;
1175
0
                if (groupref >= state->lastmark) {
1176
0
                    RETURN_FAILURE;
1177
0
                } else {
1178
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1179
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1180
0
                    if (!p || !e || e < p)
1181
0
                        RETURN_FAILURE;
1182
0
                    while (p < e) {
1183
0
                        if (ctx->ptr >= end ||
1184
0
                            sre_lower_ascii(*ctx->ptr) != sre_lower_ascii(*p))
1185
0
                            RETURN_FAILURE;
1186
0
                        p++;
1187
0
                        ctx->ptr++;
1188
0
                    }
1189
0
                }
1190
0
            }
1191
0
            ctx->pattern++;
1192
0
            break;
1193
1194
0
        case SRE_OP_GROUPREF_UNI_IGNORE:
1195
            /* match backreference */
1196
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", ctx->pattern,
1197
0
                   ctx->ptr, ctx->pattern[0]));
1198
0
            i = ctx->pattern[0];
1199
0
            {
1200
0
                Py_ssize_t groupref = i+i;
1201
0
                if (groupref >= state->lastmark) {
1202
0
                    RETURN_FAILURE;
1203
0
                } else {
1204
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1205
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1206
0
                    if (!p || !e || e < p)
1207
0
                        RETURN_FAILURE;
1208
0
                    while (p < e) {
1209
0
                        if (ctx->ptr >= end ||
1210
0
                            sre_lower_unicode(*ctx->ptr) != sre_lower_unicode(*p))
1211
0
                            RETURN_FAILURE;
1212
0
                        p++;
1213
0
                        ctx->ptr++;
1214
0
                    }
1215
0
                }
1216
0
            }
1217
0
            ctx->pattern++;
1218
0
            break;
1219
1220
0
        case SRE_OP_GROUPREF_LOC_IGNORE:
1221
            /* match backreference */
1222
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", ctx->pattern,
1223
0
                   ctx->ptr, ctx->pattern[0]));
1224
0
            i = ctx->pattern[0];
1225
0
            {
1226
0
                Py_ssize_t groupref = i+i;
1227
0
                if (groupref >= state->lastmark) {
1228
0
                    RETURN_FAILURE;
1229
0
                } else {
1230
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1231
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1232
0
                    if (!p || !e || e < p)
1233
0
                        RETURN_FAILURE;
1234
0
                    while (p < e) {
1235
0
                        if (ctx->ptr >= end ||
1236
0
                            sre_lower_locale(*ctx->ptr) != sre_lower_locale(*p))
1237
0
                            RETURN_FAILURE;
1238
0
                        p++;
1239
0
                        ctx->ptr++;
1240
0
                    }
1241
0
                }
1242
0
            }
1243
0
            ctx->pattern++;
1244
0
            break;
1245
1246
0
        case SRE_OP_GROUPREF_EXISTS:
1247
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1248
0
                   ctx->ptr, ctx->pattern[0]));
1249
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1250
0
            i = ctx->pattern[0];
1251
0
            {
1252
0
                Py_ssize_t groupref = i+i;
1253
0
                if (groupref >= state->lastmark) {
1254
0
                    ctx->pattern += ctx->pattern[1];
1255
0
                    break;
1256
0
                } else {
1257
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1258
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1259
0
                    if (!p || !e || e < p) {
1260
0
                        ctx->pattern += ctx->pattern[1];
1261
0
                        break;
1262
0
                    }
1263
0
                }
1264
0
            }
1265
0
            ctx->pattern += 2;
1266
0
            break;
1267
1268
0
        case SRE_OP_ASSERT:
1269
            /* assert subpattern */
1270
            /* <ASSERT> <skip> <back> <pattern> */
1271
0
            TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1272
0
                   ctx->ptr, ctx->pattern[1]));
1273
0
            if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
1274
0
                RETURN_FAILURE;
1275
0
            state->ptr = ctx->ptr - ctx->pattern[1];
1276
0
            DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1277
0
            RETURN_ON_FAILURE(ret);
1278
0
            ctx->pattern += ctx->pattern[0];
1279
0
            break;
1280
1281
0
        case SRE_OP_ASSERT_NOT:
1282
            /* assert not subpattern */
1283
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1284
0
            TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1285
0
                   ctx->ptr, ctx->pattern[1]));
1286
0
            if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
1287
0
                state->ptr = ctx->ptr - ctx->pattern[1];
1288
0
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1289
0
                if (ret) {
1290
0
                    RETURN_ON_ERROR(ret);
1291
0
                    RETURN_FAILURE;
1292
0
                }
1293
0
            }
1294
0
            ctx->pattern += ctx->pattern[0];
1295
0
            break;
1296
1297
0
        case SRE_OP_FAILURE:
1298
            /* immediate failure */
1299
0
            TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1300
0
            RETURN_FAILURE;
1301
1302
0
        default:
1303
0
            TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1304
0
                   ctx->pattern[-1]));
1305
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1306
157
        }
1307
157
    }
1308
1309
141
exit:
1310
141
    ctx_pos = ctx->last_ctx_pos;
1311
141
    jump = ctx->jump;
1312
141
    DATA_POP_DISCARD(ctx);
1313
141
    if (ctx_pos == -1)
1314
6
        return ret;
1315
135
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1316
1317
135
    switch (jump) {
1318
4
        case JUMP_MAX_UNTIL_2:
1319
4
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1320
4
            goto jump_max_until_2;
1321
4
        case JUMP_MAX_UNTIL_3:
1322
4
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1323
4
            goto jump_max_until_3;
1324
0
        case JUMP_MIN_UNTIL_2:
1325
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1326
0
            goto jump_min_until_2;
1327
0
        case JUMP_MIN_UNTIL_3:
1328
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1329
0
            goto jump_min_until_3;
1330
3
        case JUMP_BRANCH:
1331
3
            TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1332
3
            goto jump_branch;
1333
0
        case JUMP_MAX_UNTIL_1:
1334
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1335
0
            goto jump_max_until_1;
1336
0
        case JUMP_MIN_UNTIL_1:
1337
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1338
0
            goto jump_min_until_1;
1339
4
        case JUMP_REPEAT:
1340
4
            TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1341
4
            goto jump_repeat;
1342
4
        case JUMP_REPEAT_ONE_1:
1343
4
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1344
4
            goto jump_repeat_one_1;
1345
3
        case JUMP_REPEAT_ONE_2:
1346
3
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1347
3
            goto jump_repeat_one_2;
1348
113
        case JUMP_MIN_REPEAT_ONE:
1349
113
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1350
113
            goto jump_min_repeat_one;
1351
0
        case JUMP_ASSERT:
1352
0
            TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1353
0
            goto jump_assert;
1354
0
        case JUMP_ASSERT_NOT:
1355
0
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1356
0
            goto jump_assert_not;
1357
0
        case JUMP_NONE:
1358
0
            TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1359
0
                   ctx->ptr, ret));
1360
0
            break;
1361
135
    }
1362
1363
0
    return ret; /* should never get here */
1364
135
}
_sre.c:sre_ucs1_match
Line
Count
Source
550
6
{
551
6
    SRE_CHAR* end = (SRE_CHAR *)state->end;
552
6
    Py_ssize_t alloc_pos, ctx_pos = -1;
553
6
    Py_ssize_t i, ret = 0;
554
6
    Py_ssize_t jump;
555
6
    unsigned int sigcount=0;
556
557
6
    SRE(match_context)* ctx;
558
6
    SRE(match_context)* nextctx;
559
560
6
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
561
562
6
    DATA_ALLOC(SRE(match_context), ctx);
563
6
    ctx->last_ctx_pos = -1;
564
6
    ctx->jump = JUMP_NONE;
565
6
    ctx->pattern = pattern;
566
6
    ctx->toplevel = toplevel;
567
6
    ctx_pos = alloc_pos;
568
569
141
entrance:
570
571
141
    ctx->ptr = (SRE_CHAR *)state->ptr;
572
573
141
    if (ctx->pattern[0] == SRE_OP_INFO) {
574
        /* optimization info block */
575
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
576
4
        if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
577
0
            TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
578
0
                   "need %" PY_FORMAT_SIZE_T "d)\n",
579
0
                   end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
580
0
            RETURN_FAILURE;
581
0
        }
582
4
        ctx->pattern += ctx->pattern[1] + 1;
583
4
    }
584
585
157
    for (;;) {
586
157
        ++sigcount;
587
157
        if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
588
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);
589
590
157
        switch (*ctx->pattern++) {
591
592
4
        case SRE_OP_MARK:
593
            /* set mark */
594
            /* <MARK> <gid> */
595
4
            TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
596
4
                   ctx->ptr, ctx->pattern[0]));
597
4
            i = ctx->pattern[0];
598
4
            if (i & 1)
599
0
                state->lastindex = i/2 + 1;
600
4
            if (i > state->lastmark) {
601
                /* state->lastmark is the highest valid index in the
602
                   state->mark array.  If it is increased by more than 1,
603
                   the intervening marks must be set to NULL to signal
604
                   that these marks have not been encountered. */
605
4
                Py_ssize_t j = state->lastmark + 1;
606
8
                while (j < i)
607
4
                    state->mark[j++] = NULL;
608
4
                state->lastmark = i;
609
4
            }
610
4
            state->mark[i] = ctx->ptr;
611
4
            ctx->pattern++;
612
4
            break;
613
614
120
        case SRE_OP_LITERAL:
615
            /* match literal string */
616
            /* <LITERAL> <code> */
617
120
            TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
618
120
                   ctx->ptr, *ctx->pattern));
619
120
            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
620
115
                RETURN_FAILURE;
621
5
            ctx->pattern++;
622
5
            ctx->ptr++;
623
5
            break;
624
625
0
        case SRE_OP_NOT_LITERAL:
626
            /* match anything that is not literal character */
627
            /* <NOT_LITERAL> <code> */
628
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
629
0
                   ctx->ptr, *ctx->pattern));
630
0
            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
631
0
                RETURN_FAILURE;
632
0
            ctx->pattern++;
633
0
            ctx->ptr++;
634
0
            break;
635
636
4
        case SRE_OP_SUCCESS:
637
            /* end of pattern */
638
4
            TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
639
4
            if (ctx->toplevel &&
640
4
                ((state->match_all && ctx->ptr != state->end) ||
641
2
                 (state->must_advance && ctx->ptr == state->start)))
642
0
            {
643
0
                RETURN_FAILURE;
644
0
            }
645
4
            state->ptr = ctx->ptr;
646
4
            RETURN_SUCCESS;
647
648
3
        case SRE_OP_AT:
649
            /* match at given position */
650
            /* <AT> <code> */
651
3
            TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
652
3
            if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
653
0
                RETURN_FAILURE;
654
3
            ctx->pattern++;
655
3
            break;
656
657
0
        case SRE_OP_CATEGORY:
658
            /* match at given category */
659
            /* <CATEGORY> <code> */
660
0
            TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
661
0
                   ctx->ptr, *ctx->pattern));
662
0
            if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
663
0
                RETURN_FAILURE;
664
0
            ctx->pattern++;
665
0
            ctx->ptr++;
666
0
            break;
667
668
0
        case SRE_OP_ANY:
669
            /* match anything (except a newline) */
670
            /* <ANY> */
671
0
            TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
672
0
            if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
673
0
                RETURN_FAILURE;
674
0
            ctx->ptr++;
675
0
            break;
676
677
0
        case SRE_OP_ANY_ALL:
678
            /* match anything */
679
            /* <ANY_ALL> */
680
0
            TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
681
0
            if (ctx->ptr >= end)
682
0
                RETURN_FAILURE;
683
0
            ctx->ptr++;
684
0
            break;
685
686
1
        case SRE_OP_IN:
687
            /* match set member (or non_member) */
688
            /* <IN> <skip> <set> */
689
1
            TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
690
1
            if (ctx->ptr >= end ||
691
1
                !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr))
692
0
                RETURN_FAILURE;
693
1
            ctx->pattern += ctx->pattern[0];
694
1
            ctx->ptr++;
695
1
            break;
696
697
0
        case SRE_OP_LITERAL_IGNORE:
698
0
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
699
0
                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
700
0
            if (ctx->ptr >= end ||
701
0
                sre_lower_ascii(*ctx->ptr) != *ctx->pattern)
702
0
                RETURN_FAILURE;
703
0
            ctx->pattern++;
704
0
            ctx->ptr++;
705
0
            break;
706
707
0
        case SRE_OP_LITERAL_UNI_IGNORE:
708
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
709
0
                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
710
0
            if (ctx->ptr >= end ||
711
0
                sre_lower_unicode(*ctx->ptr) != *ctx->pattern)
712
0
                RETURN_FAILURE;
713
0
            ctx->pattern++;
714
0
            ctx->ptr++;
715
0
            break;
716
717
0
        case SRE_OP_LITERAL_LOC_IGNORE:
718
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
719
0
                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
720
0
            if (ctx->ptr >= end
721
0
                || !char_loc_ignore(*ctx->pattern, *ctx->ptr))
722
0
                RETURN_FAILURE;
723
0
            ctx->pattern++;
724
0
            ctx->ptr++;
725
0
            break;
726
727
0
        case SRE_OP_NOT_LITERAL_IGNORE:
728
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
729
0
                   ctx->pattern, ctx->ptr, *ctx->pattern));
730
0
            if (ctx->ptr >= end ||
731
0
                sre_lower_ascii(*ctx->ptr) == *ctx->pattern)
732
0
                RETURN_FAILURE;
733
0
            ctx->pattern++;
734
0
            ctx->ptr++;
735
0
            break;
736
737
0
        case SRE_OP_NOT_LITERAL_UNI_IGNORE:
738
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
739
0
                   ctx->pattern, ctx->ptr, *ctx->pattern));
740
0
            if (ctx->ptr >= end ||
741
0
                sre_lower_unicode(*ctx->ptr) == *ctx->pattern)
742
0
                RETURN_FAILURE;
743
0
            ctx->pattern++;
744
0
            ctx->ptr++;
745
0
            break;
746
747
0
        case SRE_OP_NOT_LITERAL_LOC_IGNORE:
748
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
749
0
                   ctx->pattern, ctx->ptr, *ctx->pattern));
750
0
            if (ctx->ptr >= end
751
0
                || char_loc_ignore(*ctx->pattern, *ctx->ptr))
752
0
                RETURN_FAILURE;
753
0
            ctx->pattern++;
754
0
            ctx->ptr++;
755
0
            break;
756
757
0
        case SRE_OP_IN_IGNORE:
758
0
            TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
759
0
            if (ctx->ptr >= end
760
0
                || !SRE(charset)(state, ctx->pattern+1,
761
0
                                 (SRE_CODE)sre_lower_ascii(*ctx->ptr)))
762
0
                RETURN_FAILURE;
763
0
            ctx->pattern += ctx->pattern[0];
764
0
            ctx->ptr++;
765
0
            break;
766
767
2
        case SRE_OP_IN_UNI_IGNORE:
768
2
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", ctx->pattern, ctx->ptr));
769
2
            if (ctx->ptr >= end
770
2
                || !SRE(charset)(state, ctx->pattern+1,
771
2
                                 (SRE_CODE)sre_lower_unicode(*ctx->ptr)))
772
0
                RETURN_FAILURE;
773
2
            ctx->pattern += ctx->pattern[0];
774
2
            ctx->ptr++;
775
2
            break;
776
777
0
        case SRE_OP_IN_LOC_IGNORE:
778
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr));
779
0
            if (ctx->ptr >= end
780
0
                || !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr))
781
0
                RETURN_FAILURE;
782
0
            ctx->pattern += ctx->pattern[0];
783
0
            ctx->ptr++;
784
0
            break;
785
786
1
        case SRE_OP_JUMP:
787
1
        case SRE_OP_INFO:
788
            /* jump forward */
789
            /* <JUMP> <offset> */
790
1
            TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
791
1
                   ctx->ptr, ctx->pattern[0]));
792
1
            ctx->pattern += ctx->pattern[0];
793
1
            break;
794
795
3
        case SRE_OP_BRANCH:
796
            /* alternation */
797
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
798
3
            TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
799
3
            LASTMARK_SAVE();
800
3
            ctx->u.rep = state->repeat;
801
3
            if (ctx->u.rep)
802
2
                MARK_PUSH(ctx->lastmark);
803
7
            for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
804
5
                if (ctx->pattern[1] == SRE_OP_LITERAL &&
805
5
                    (ctx->ptr >= end ||
806
2
                     (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
807
2
                    continue;
808
3
                if (ctx->pattern[1] == SRE_OP_IN &&
809
3
                    (ctx->ptr >= end ||
810
1
                     !SRE(charset)(state, ctx->pattern + 3,
811
1
                                   (SRE_CODE) *ctx->ptr)))
812
0
                    continue;
813
3
                state->ptr = ctx->ptr;
814
3
                DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
815
3
                if (ret) {
816
1
                    if (ctx->u.rep)
817
0
                        MARK_POP_DISCARD(ctx->lastmark);
818
1
                    RETURN_ON_ERROR(ret);
819
1
                    RETURN_SUCCESS;
820
1
                }
821
2
                if (ctx->u.rep)
822
2
                    MARK_POP_KEEP(ctx->lastmark);
823
2
                LASTMARK_RESTORE();
824
2
            }
825
2
            if (ctx->u.rep)
826
2
                MARK_POP_DISCARD(ctx->lastmark);
827
2
            RETURN_FAILURE;
828
829
9
        case SRE_OP_REPEAT_ONE:
830
            /* match repeated sequence (maximizing regexp) */
831
832
            /* this operator only works if the repeated item is
833
               exactly one character wide, and we're not already
834
               collecting backtracking points.  for other cases,
835
               use the MAX_REPEAT operator */
836
837
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
838
839
9
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
840
9
                   ctx->pattern[1], ctx->pattern[2]));
841
842
9
            if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
843
0
                RETURN_FAILURE; /* cannot match */
844
845
9
            state->ptr = ctx->ptr;
846
847
9
            ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
848
9
            RETURN_ON_ERROR(ret);
849
9
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
850
9
            ctx->count = ret;
851
9
            ctx->ptr += ctx->count;
852
853
            /* when we arrive here, count contains the number of
854
               matches, and ctx->ptr points to the tail of the target
855
               string.  check if the rest of the pattern matches,
856
               and backtrack if not. */
857
858
9
            if (ctx->count < (Py_ssize_t) ctx->pattern[1])
859
2
                RETURN_FAILURE;
860
861
7
            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
862
7
                ctx->ptr == state->end &&
863
7
                !(ctx->toplevel && state->must_advance && ctx->ptr == state->start))
864
0
            {
865
                /* tail is empty.  we're finished */
866
0
                state->ptr = ctx->ptr;
867
0
                RETURN_SUCCESS;
868
0
            }
869
870
7
            LASTMARK_SAVE();
871
872
7
            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
873
                /* tail starts with a literal. skip positions where
874
                   the rest of the pattern cannot possibly match */
875
4
                ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
876
6
                for (;;) {
877
6
                    while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
878
6
                           (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
879
0
                        ctx->ptr--;
880
0
                        ctx->count--;
881
0
                    }
882
6
                    if (ctx->count < (Py_ssize_t) ctx->pattern[1])
883
2
                        break;
884
4
                    state->ptr = ctx->ptr;
885
4
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
886
0
                            ctx->pattern+ctx->pattern[0]);
887
4
                    if (ret) {
888
2
                        RETURN_ON_ERROR(ret);
889
2
                        RETURN_SUCCESS;
890
2
                    }
891
892
2
                    LASTMARK_RESTORE();
893
894
2
                    ctx->ptr--;
895
2
                    ctx->count--;
896
2
                }
897
898
4
            } else {
899
                /* general case */
900
3
                while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
901
3
                    state->ptr = ctx->ptr;
902
3
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
903
0
                            ctx->pattern+ctx->pattern[0]);
904
3
                    if (ret) {
905
3
                        RETURN_ON_ERROR(ret);
906
3
                        RETURN_SUCCESS;
907
3
                    }
908
0
                    ctx->ptr--;
909
0
                    ctx->count--;
910
0
                    LASTMARK_RESTORE();
911
0
                }
912
3
            }
913
2
            RETURN_FAILURE;
914
915
2
        case SRE_OP_MIN_REPEAT_ONE:
916
            /* match repeated sequence (minimizing regexp) */
917
918
            /* this operator only works if the repeated item is
919
               exactly one character wide, and we're not already
920
               collecting backtracking points.  for other cases,
921
               use the MIN_REPEAT operator */
922
923
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
924
925
2
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
926
2
                   ctx->pattern[1], ctx->pattern[2]));
927
928
2
            if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
929
0
                RETURN_FAILURE; /* cannot match */
930
931
2
            state->ptr = ctx->ptr;
932
933
2
            if (ctx->pattern[1] == 0)
934
2
                ctx->count = 0;
935
0
            else {
936
                /* count using pattern min as the maximum */
937
0
                ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
938
0
                RETURN_ON_ERROR(ret);
939
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
940
0
                if (ret < (Py_ssize_t) ctx->pattern[1])
941
                    /* didn't match minimum number of times */
942
0
                    RETURN_FAILURE;
943
                /* advance past minimum matches of repeat */
944
0
                ctx->count = ret;
945
0
                ctx->ptr += ctx->count;
946
0
            }
947
948
2
            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
949
2
                !(ctx->toplevel &&
950
0
                  ((state->match_all && ctx->ptr != state->end) ||
951
0
                   (state->must_advance && ctx->ptr == state->start))))
952
0
            {
953
                /* tail is empty.  we're finished */
954
0
                state->ptr = ctx->ptr;
955
0
                RETURN_SUCCESS;
956
957
2
            } else {
958
                /* general case */
959
2
                LASTMARK_SAVE();
960
113
                while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
961
113
                       || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
962
113
                    state->ptr = ctx->ptr;
963
113
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
964
0
                            ctx->pattern+ctx->pattern[0]);
965
113
                    if (ret) {
966
0
                        RETURN_ON_ERROR(ret);
967
0
                        RETURN_SUCCESS;
968
0
                    }
969
113
                    state->ptr = ctx->ptr;
970
113
                    ret = SRE(count)(state, ctx->pattern+3, 1);
971
113
                    RETURN_ON_ERROR(ret);
972
113
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
973
113
                    if (ret == 0)
974
2
                        break;
975
111
                    assert(ret == 1);
976
111
                    ctx->ptr++;
977
111
                    ctx->count++;
978
111
                    LASTMARK_RESTORE();
979
111
                }
980
2
            }
981
2
            RETURN_FAILURE;
982
983
4
        case SRE_OP_REPEAT:
984
            /* create repeat context.  all the hard work is done
985
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
986
            /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
987
4
            TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
988
4
                   ctx->pattern[1], ctx->pattern[2]));
989
990
            /* install new repeat context */
991
4
            ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
992
4
            if (!ctx->u.rep) {
993
0
                PyErr_NoMemory();
994
0
                RETURN_FAILURE;
995
0
            }
996
4
            ctx->u.rep->count = -1;
997
4
            ctx->u.rep->pattern = ctx->pattern;
998
4
            ctx->u.rep->prev = state->repeat;
999
4
            ctx->u.rep->last_ptr = NULL;
1000
4
            state->repeat = ctx->u.rep;
1001
1002
4
            state->ptr = ctx->ptr;
1003
4
            DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
1004
4
            state->repeat = ctx->u.rep->prev;
1005
4
            PyObject_FREE(ctx->u.rep);
1006
1007
4
            if (ret) {
1008
4
                RETURN_ON_ERROR(ret);
1009
4
                RETURN_SUCCESS;
1010
4
            }
1011
0
            RETURN_FAILURE;
1012
1013
4
        case SRE_OP_MAX_UNTIL:
1014
            /* maximizing repeat */
1015
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1016
1017
            /* FIXME: we probably need to deal with zero-width
1018
               matches in here... */
1019
1020
4
            ctx->u.rep = state->repeat;
1021
4
            if (!ctx->u.rep)
1022
0
                RETURN_ERROR(SRE_ERROR_STATE);
1023
1024
4
            state->ptr = ctx->ptr;
1025
1026
4
            ctx->count = ctx->u.rep->count+1;
1027
1028
4
            TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1029
4
                   ctx->ptr, ctx->count));
1030
1031
4
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1032
                /* not enough matches */
1033
0
                ctx->u.rep->count = ctx->count;
1034
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1035
0
                        ctx->u.rep->pattern+3);
1036
0
                if (ret) {
1037
0
                    RETURN_ON_ERROR(ret);
1038
0
                    RETURN_SUCCESS;
1039
0
                }
1040
0
                ctx->u.rep->count = ctx->count-1;
1041
0
                state->ptr = ctx->ptr;
1042
0
                RETURN_FAILURE;
1043
0
            }
1044
1045
4
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1046
4
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1047
4
                state->ptr != ctx->u.rep->last_ptr) {
1048
                /* we may have enough matches, but if we can
1049
                   match another item, do so */
1050
4
                ctx->u.rep->count = ctx->count;
1051
4
                LASTMARK_SAVE();
1052
4
                MARK_PUSH(ctx->lastmark);
1053
                /* zero-width match protection */
1054
4
                DATA_PUSH(&ctx->u.rep->last_ptr);
1055
4
                ctx->u.rep->last_ptr = state->ptr;
1056
4
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1057
0
                        ctx->u.rep->pattern+3);
1058
4
                DATA_POP(&ctx->u.rep->last_ptr);
1059
4
                if (ret) {
1060
0
                    MARK_POP_DISCARD(ctx->lastmark);
1061
0
                    RETURN_ON_ERROR(ret);
1062
0
                    RETURN_SUCCESS;
1063
0
                }
1064
4
                MARK_POP(ctx->lastmark);
1065
4
                LASTMARK_RESTORE();
1066
4
                ctx->u.rep->count = ctx->count-1;
1067
4
                state->ptr = ctx->ptr;
1068
4
            }
1069
1070
            /* cannot match more repeated items here.  make sure the
1071
               tail matches */
1072
4
            state->repeat = ctx->u.rep->prev;
1073
4
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
1074
4
            RETURN_ON_SUCCESS(ret);
1075
0
            state->repeat = ctx->u.rep;
1076
0
            state->ptr = ctx->ptr;
1077
0
            RETURN_FAILURE;
1078
1079
0
        case SRE_OP_MIN_UNTIL:
1080
            /* minimizing repeat */
1081
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1082
1083
0
            ctx->u.rep = state->repeat;
1084
0
            if (!ctx->u.rep)
1085
0
                RETURN_ERROR(SRE_ERROR_STATE);
1086
1087
0
            state->ptr = ctx->ptr;
1088
1089
0
            ctx->count = ctx->u.rep->count+1;
1090
1091
0
            TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
1092
0
                   ctx->ptr, ctx->count, ctx->u.rep->pattern));
1093
1094
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1095
                /* not enough matches */
1096
0
                ctx->u.rep->count = ctx->count;
1097
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1098
0
                        ctx->u.rep->pattern+3);
1099
0
                if (ret) {
1100
0
                    RETURN_ON_ERROR(ret);
1101
0
                    RETURN_SUCCESS;
1102
0
                }
1103
0
                ctx->u.rep->count = ctx->count-1;
1104
0
                state->ptr = ctx->ptr;
1105
0
                RETURN_FAILURE;
1106
0
            }
1107
1108
0
            LASTMARK_SAVE();
1109
1110
            /* see if the tail matches */
1111
0
            state->repeat = ctx->u.rep->prev;
1112
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1113
0
            if (ret) {
1114
0
                RETURN_ON_ERROR(ret);
1115
0
                RETURN_SUCCESS;
1116
0
            }
1117
1118
0
            state->repeat = ctx->u.rep;
1119
0
            state->ptr = ctx->ptr;
1120
1121
0
            LASTMARK_RESTORE();
1122
1123
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1124
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1125
0
                state->ptr == ctx->u.rep->last_ptr)
1126
0
                RETURN_FAILURE;
1127
1128
0
            ctx->u.rep->count = ctx->count;
1129
            /* zero-width match protection */
1130
0
            DATA_PUSH(&ctx->u.rep->last_ptr);
1131
0
            ctx->u.rep->last_ptr = state->ptr;
1132
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1133
0
                    ctx->u.rep->pattern+3);
1134
0
            DATA_POP(&ctx->u.rep->last_ptr);
1135
0
            if (ret) {
1136
0
                RETURN_ON_ERROR(ret);
1137
0
                RETURN_SUCCESS;
1138
0
            }
1139
0
            ctx->u.rep->count = ctx->count-1;
1140
0
            state->ptr = ctx->ptr;
1141
0
            RETURN_FAILURE;
1142
1143
0
        case SRE_OP_GROUPREF:
1144
            /* match backreference */
1145
0
            TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1146
0
                   ctx->ptr, ctx->pattern[0]));
1147
0
            i = ctx->pattern[0];
1148
0
            {
1149
0
                Py_ssize_t groupref = i+i;
1150
0
                if (groupref >= state->lastmark) {
1151
0
                    RETURN_FAILURE;
1152
0
                } else {
1153
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1154
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1155
0
                    if (!p || !e || e < p)
1156
0
                        RETURN_FAILURE;
1157
0
                    while (p < e) {
1158
0
                        if (ctx->ptr >= end || *ctx->ptr != *p)
1159
0
                            RETURN_FAILURE;
1160
0
                        p++;
1161
0
                        ctx->ptr++;
1162
0
                    }
1163
0
                }
1164
0
            }
1165
0
            ctx->pattern++;
1166
0
            break;
1167
1168
0
        case SRE_OP_GROUPREF_IGNORE:
1169
            /* match backreference */
1170
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1171
0
                   ctx->ptr, ctx->pattern[0]));
1172
0
            i = ctx->pattern[0];
1173
0
            {
1174
0
                Py_ssize_t groupref = i+i;
1175
0
                if (groupref >= state->lastmark) {
1176
0
                    RETURN_FAILURE;
1177
0
                } else {
1178
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1179
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1180
0
                    if (!p || !e || e < p)
1181
0
                        RETURN_FAILURE;
1182
0
                    while (p < e) {
1183
0
                        if (ctx->ptr >= end ||
1184
0
                            sre_lower_ascii(*ctx->ptr) != sre_lower_ascii(*p))
1185
0
                            RETURN_FAILURE;
1186
0
                        p++;
1187
0
                        ctx->ptr++;
1188
0
                    }
1189
0
                }
1190
0
            }
1191
0
            ctx->pattern++;
1192
0
            break;
1193
1194
0
        case SRE_OP_GROUPREF_UNI_IGNORE:
1195
            /* match backreference */
1196
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", ctx->pattern,
1197
0
                   ctx->ptr, ctx->pattern[0]));
1198
0
            i = ctx->pattern[0];
1199
0
            {
1200
0
                Py_ssize_t groupref = i+i;
1201
0
                if (groupref >= state->lastmark) {
1202
0
                    RETURN_FAILURE;
1203
0
                } else {
1204
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1205
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1206
0
                    if (!p || !e || e < p)
1207
0
                        RETURN_FAILURE;
1208
0
                    while (p < e) {
1209
0
                        if (ctx->ptr >= end ||
1210
0
                            sre_lower_unicode(*ctx->ptr) != sre_lower_unicode(*p))
1211
0
                            RETURN_FAILURE;
1212
0
                        p++;
1213
0
                        ctx->ptr++;
1214
0
                    }
1215
0
                }
1216
0
            }
1217
0
            ctx->pattern++;
1218
0
            break;
1219
1220
0
        case SRE_OP_GROUPREF_LOC_IGNORE:
1221
            /* match backreference */
1222
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", ctx->pattern,
1223
0
                   ctx->ptr, ctx->pattern[0]));
1224
0
            i = ctx->pattern[0];
1225
0
            {
1226
0
                Py_ssize_t groupref = i+i;
1227
0
                if (groupref >= state->lastmark) {
1228
0
                    RETURN_FAILURE;
1229
0
                } else {
1230
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1231
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1232
0
                    if (!p || !e || e < p)
1233
0
                        RETURN_FAILURE;
1234
0
                    while (p < e) {
1235
0
                        if (ctx->ptr >= end ||
1236
0
                            sre_lower_locale(*ctx->ptr) != sre_lower_locale(*p))
1237
0
                            RETURN_FAILURE;
1238
0
                        p++;
1239
0
                        ctx->ptr++;
1240
0
                    }
1241
0
                }
1242
0
            }
1243
0
            ctx->pattern++;
1244
0
            break;
1245
1246
0
        case SRE_OP_GROUPREF_EXISTS:
1247
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1248
0
                   ctx->ptr, ctx->pattern[0]));
1249
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1250
0
            i = ctx->pattern[0];
1251
0
            {
1252
0
                Py_ssize_t groupref = i+i;
1253
0
                if (groupref >= state->lastmark) {
1254
0
                    ctx->pattern += ctx->pattern[1];
1255
0
                    break;
1256
0
                } else {
1257
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1258
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1259
0
                    if (!p || !e || e < p) {
1260
0
                        ctx->pattern += ctx->pattern[1];
1261
0
                        break;
1262
0
                    }
1263
0
                }
1264
0
            }
1265
0
            ctx->pattern += 2;
1266
0
            break;
1267
1268
0
        case SRE_OP_ASSERT:
1269
            /* assert subpattern */
1270
            /* <ASSERT> <skip> <back> <pattern> */
1271
0
            TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1272
0
                   ctx->ptr, ctx->pattern[1]));
1273
0
            if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
1274
0
                RETURN_FAILURE;
1275
0
            state->ptr = ctx->ptr - ctx->pattern[1];
1276
0
            DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1277
0
            RETURN_ON_FAILURE(ret);
1278
0
            ctx->pattern += ctx->pattern[0];
1279
0
            break;
1280
1281
0
        case SRE_OP_ASSERT_NOT:
1282
            /* assert not subpattern */
1283
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1284
0
            TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1285
0
                   ctx->ptr, ctx->pattern[1]));
1286
0
            if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
1287
0
                state->ptr = ctx->ptr - ctx->pattern[1];
1288
0
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1289
0
                if (ret) {
1290
0
                    RETURN_ON_ERROR(ret);
1291
0
                    RETURN_FAILURE;
1292
0
                }
1293
0
            }
1294
0
            ctx->pattern += ctx->pattern[0];
1295
0
            break;
1296
1297
0
        case SRE_OP_FAILURE:
1298
            /* immediate failure */
1299
0
            TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1300
0
            RETURN_FAILURE;
1301
1302
0
        default:
1303
0
            TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1304
0
                   ctx->pattern[-1]));
1305
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1306
157
        }
1307
157
    }
1308
1309
141
exit:
1310
141
    ctx_pos = ctx->last_ctx_pos;
1311
141
    jump = ctx->jump;
1312
141
    DATA_POP_DISCARD(ctx);
1313
141
    if (ctx_pos == -1)
1314
6
        return ret;
1315
135
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1316
1317
135
    switch (jump) {
1318
4
        case JUMP_MAX_UNTIL_2:
1319
4
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1320
4
            goto jump_max_until_2;
1321
4
        case JUMP_MAX_UNTIL_3:
1322
4
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1323
4
            goto jump_max_until_3;
1324
0
        case JUMP_MIN_UNTIL_2:
1325
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1326
0
            goto jump_min_until_2;
1327
0
        case JUMP_MIN_UNTIL_3:
1328
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1329
0
            goto jump_min_until_3;
1330
3
        case JUMP_BRANCH:
1331
3
            TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1332
3
            goto jump_branch;
1333
0
        case JUMP_MAX_UNTIL_1:
1334
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1335
0
            goto jump_max_until_1;
1336
0
        case JUMP_MIN_UNTIL_1:
1337
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1338
0
            goto jump_min_until_1;
1339
4
        case JUMP_REPEAT:
1340
4
            TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1341
4
            goto jump_repeat;
1342
4
        case JUMP_REPEAT_ONE_1:
1343
4
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1344
4
            goto jump_repeat_one_1;
1345
3
        case JUMP_REPEAT_ONE_2:
1346
3
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1347
3
            goto jump_repeat_one_2;
1348
113
        case JUMP_MIN_REPEAT_ONE:
1349
113
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1350
113
            goto jump_min_repeat_one;
1351
0
        case JUMP_ASSERT:
1352
0
            TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1353
0
            goto jump_assert;
1354
0
        case JUMP_ASSERT_NOT:
1355
0
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1356
0
            goto jump_assert_not;
1357
0
        case JUMP_NONE:
1358
0
            TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1359
0
                   ctx->ptr, ret));
1360
0
            break;
1361
135
    }
1362
1363
0
    return ret; /* should never get here */
1364
135
}
Unexecuted instantiation: _sre.c:sre_ucs2_match
Unexecuted instantiation: _sre.c:sre_ucs4_match
1365
1366
/* need to reset capturing groups between two SRE(match) callings in loops */
1367
#define RESET_CAPTURE_GROUP() \
1368
0
    do { state->lastmark = state->lastindex = -1; } while (0)
1369
1370
LOCAL(Py_ssize_t)
1371
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1372
2
{
1373
2
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1374
2
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1375
2
    Py_ssize_t status = 0;
1376
2
    Py_ssize_t prefix_len = 0;
1377
2
    Py_ssize_t prefix_skip = 0;
1378
2
    SRE_CODE* prefix = NULL;
1379
2
    SRE_CODE* charset = NULL;
1380
2
    SRE_CODE* overlap = NULL;
1381
2
    int flags = 0;
1382
1383
2
    if (ptr > end)
1384
0
        return 0;
1385
1386
2
    if (pattern[0] == SRE_OP_INFO) {
1387
        /* optimization info block */
1388
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1389
1390
2
        flags = pattern[2];
1391
1392
2
        if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
1393
0
            TRACE(("reject (got %u chars, need %u)\n",
1394
0
                   (unsigned int)(end - ptr), pattern[3]));
1395
0
            return 0;
1396
0
        }
1397
2
        if (pattern[3] > 1) {
1398
            /* adjust end point (but make sure we leave at least one
1399
               character in there, so literal search will work) */
1400
2
            end -= pattern[3] - 1;
1401
2
            if (end <= ptr)
1402
0
                end = ptr;
1403
2
        }
1404
1405
2
        if (flags & SRE_INFO_PREFIX) {
1406
            /* pattern starts with a known prefix */
1407
            /* <length> <skip> <prefix data> <overlap data> */
1408
2
            prefix_len = pattern[5];
1409
2
            prefix_skip = pattern[6];
1410
2
            prefix = pattern + 7;
1411
2
            overlap = prefix + prefix_len - 1;
1412
2
        } else if (flags & SRE_INFO_CHARSET)
1413
            /* pattern starts with a character from a known set */
1414
            /* <charset> */
1415
0
            charset = pattern + 5;
1416
1417
2
        pattern += 1 + pattern[1];
1418
2
    }
1419
1420
2
    TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1421
2
           prefix, prefix_len, prefix_skip));
1422
2
    TRACE(("charset = %p\n", charset));
1423
1424
2
    if (prefix_len == 1) {
1425
        /* pattern starts with a literal character */
1426
0
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1427
#if SIZEOF_SRE_CHAR < 4
1428
0
        if ((SRE_CODE) c != prefix[0])
1429
0
            return 0; /* literal can't match: doesn't fit in char width */
1430
0
#endif
1431
0
        end = (SRE_CHAR *)state->end;
1432
0
        state->must_advance = 0;
1433
0
        while (ptr < end) {
1434
0
            while (*ptr != c) {
1435
0
                if (++ptr >= end)
1436
0
                    return 0;
1437
0
            }
1438
0
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1439
0
            state->start = ptr;
1440
0
            state->ptr = ptr + prefix_skip;
1441
0
            if (flags & SRE_INFO_LITERAL)
1442
0
                return 1; /* we got all of it */
1443
0
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1444
0
            if (status != 0)
1445
0
                return status;
1446
0
            ++ptr;
1447
0
            RESET_CAPTURE_GROUP();
1448
0
        }
1449
0
        return 0;
1450
0
    }
1451
1452
2
    if (prefix_len > 1) {
1453
        /* pattern starts with a known prefix.  use the overlap
1454
           table to skip forward as fast as we possibly can */
1455
2
        Py_ssize_t i = 0;
1456
1457
2
        end = (SRE_CHAR *)state->end;
1458
2
        if (prefix_len > end - ptr)
1459
0
            return 0;
1460
#if SIZEOF_SRE_CHAR < 4
1461
6
        for (i = 0; i < prefix_len; i++)
1462
4
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1463
0
                return 0; /* literal can't match: doesn't fit in char width */
1464
2
#endif
1465
2
        while (ptr < end) {
1466
2
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1467
2
            while (*ptr++ != c) {
1468
0
                if (ptr >= end)
1469
0
                    return 0;
1470
0
            }
1471
2
            if (ptr >= end)
1472
0
                return 0;
1473
1474
2
            i = 1;
1475
2
            state->must_advance = 0;
1476
2
            do {
1477
2
                if (*ptr == (SRE_CHAR) prefix[i]) {
1478
2
                    if (++i != prefix_len) {
1479
0
                        if (++ptr >= end)
1480
0
                            return 0;
1481
0
                        continue;
1482
0
                    }
1483
                    /* found a potential match */
1484
2
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1485
2
                    state->start = ptr - (prefix_len - 1);
1486
2
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1487
2
                    if (flags & SRE_INFO_LITERAL)
1488
0
                        return 1; /* we got all of it */
1489
2
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1490
2
                    if (status != 0)
1491
2
                        return status;
1492
                    /* close but no cigar -- try again */
1493
0
                    if (++ptr >= end)
1494
0
                        return 0;
1495
0
                    RESET_CAPTURE_GROUP();
1496
0
                }
1497
0
                i = overlap[i];
1498
0
            } while (i != 0);
1499
2
        }
1500
0
        return 0;
1501
2
    }
1502
1503
0
    if (charset) {
1504
        /* pattern starts with a character from a known set */
1505
0
        end = (SRE_CHAR *)state->end;
1506
0
        state->must_advance = 0;
1507
0
        for (;;) {
1508
0
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1509
0
                ptr++;
1510
0
            if (ptr >= end)
1511
0
                return 0;
1512
0
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1513
0
            state->start = ptr;
1514
0
            state->ptr = ptr;
1515
0
            status = SRE(match)(state, pattern, 0);
1516
0
            if (status != 0)
1517
0
                break;
1518
0
            ptr++;
1519
0
            RESET_CAPTURE_GROUP();
1520
0
        }
1521
0
    } else {
1522
        /* general case */
1523
0
        assert(ptr <= end);
1524
0
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1525
0
        state->start = state->ptr = ptr;
1526
0
        status = SRE(match)(state, pattern, 1);
1527
0
        state->must_advance = 0;
1528
0
        while (status == 0 && ptr < end) {
1529
0
            ptr++;
1530
0
            RESET_CAPTURE_GROUP();
1531
0
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1532
0
            state->start = state->ptr = ptr;
1533
0
            status = SRE(match)(state, pattern, 0);
1534
0
        }
1535
0
    }
1536
1537
0
    return status;
1538
0
}
_sre.c:sre_ucs1_search
Line
Count
Source
1372
2
{
1373
2
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1374
2
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1375
2
    Py_ssize_t status = 0;
1376
2
    Py_ssize_t prefix_len = 0;
1377
2
    Py_ssize_t prefix_skip = 0;
1378
2
    SRE_CODE* prefix = NULL;
1379
2
    SRE_CODE* charset = NULL;
1380
2
    SRE_CODE* overlap = NULL;
1381
2
    int flags = 0;
1382
1383
2
    if (ptr > end)
1384
0
        return 0;
1385
1386
2
    if (pattern[0] == SRE_OP_INFO) {
1387
        /* optimization info block */
1388
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1389
1390
2
        flags = pattern[2];
1391
1392
2
        if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
1393
0
            TRACE(("reject (got %u chars, need %u)\n",
1394
0
                   (unsigned int)(end - ptr), pattern[3]));
1395
0
            return 0;
1396
0
        }
1397
2
        if (pattern[3] > 1) {
1398
            /* adjust end point (but make sure we leave at least one
1399
               character in there, so literal search will work) */
1400
2
            end -= pattern[3] - 1;
1401
2
            if (end <= ptr)
1402
0
                end = ptr;
1403
2
        }
1404
1405
2
        if (flags & SRE_INFO_PREFIX) {
1406
            /* pattern starts with a known prefix */
1407
            /* <length> <skip> <prefix data> <overlap data> */
1408
2
            prefix_len = pattern[5];
1409
2
            prefix_skip = pattern[6];
1410
2
            prefix = pattern + 7;
1411
2
            overlap = prefix + prefix_len - 1;
1412
2
        } else if (flags & SRE_INFO_CHARSET)
1413
            /* pattern starts with a character from a known set */
1414
            /* <charset> */
1415
0
            charset = pattern + 5;
1416
1417
2
        pattern += 1 + pattern[1];
1418
2
    }
1419
1420
2
    TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1421
2
           prefix, prefix_len, prefix_skip));
1422
2
    TRACE(("charset = %p\n", charset));
1423
1424
2
    if (prefix_len == 1) {
1425
        /* pattern starts with a literal character */
1426
0
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1427
0
#if SIZEOF_SRE_CHAR < 4
1428
0
        if ((SRE_CODE) c != prefix[0])
1429
0
            return 0; /* literal can't match: doesn't fit in char width */
1430
0
#endif
1431
0
        end = (SRE_CHAR *)state->end;
1432
0
        state->must_advance = 0;
1433
0
        while (ptr < end) {
1434
0
            while (*ptr != c) {
1435
0
                if (++ptr >= end)
1436
0
                    return 0;
1437
0
            }
1438
0
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1439
0
            state->start = ptr;
1440
0
            state->ptr = ptr + prefix_skip;
1441
0
            if (flags & SRE_INFO_LITERAL)
1442
0
                return 1; /* we got all of it */
1443
0
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1444
0
            if (status != 0)
1445
0
                return status;
1446
0
            ++ptr;
1447
0
            RESET_CAPTURE_GROUP();
1448
0
        }
1449
0
        return 0;
1450
0
    }
1451
1452
2
    if (prefix_len > 1) {
1453
        /* pattern starts with a known prefix.  use the overlap
1454
           table to skip forward as fast as we possibly can */
1455
2
        Py_ssize_t i = 0;
1456
1457
2
        end = (SRE_CHAR *)state->end;
1458
2
        if (prefix_len > end - ptr)
1459
0
            return 0;
1460
2
#if SIZEOF_SRE_CHAR < 4
1461
6
        for (i = 0; i < prefix_len; i++)
1462
4
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1463
0
                return 0; /* literal can't match: doesn't fit in char width */
1464
2
#endif
1465
2
        while (ptr < end) {
1466
2
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1467
2
            while (*ptr++ != c) {
1468
0
                if (ptr >= end)
1469
0
                    return 0;
1470
0
            }
1471
2
            if (ptr >= end)
1472
0
                return 0;
1473
1474
2
            i = 1;
1475
2
            state->must_advance = 0;
1476
2
            do {
1477
2
                if (*ptr == (SRE_CHAR) prefix[i]) {
1478
2
                    if (++i != prefix_len) {
1479
0
                        if (++ptr >= end)
1480
0
                            return 0;
1481
0
                        continue;
1482
0
                    }
1483
                    /* found a potential match */
1484
2
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1485
2
                    state->start = ptr - (prefix_len - 1);
1486
2
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1487
2
                    if (flags & SRE_INFO_LITERAL)
1488
0
                        return 1; /* we got all of it */
1489
2
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1490
2
                    if (status != 0)
1491
2
                        return status;
1492
                    /* close but no cigar -- try again */
1493
0
                    if (++ptr >= end)
1494
0
                        return 0;
1495
0
                    RESET_CAPTURE_GROUP();
1496
0
                }
1497
0
                i = overlap[i];
1498
0
            } while (i != 0);
1499
2
        }
1500
0
        return 0;
1501
2
    }
1502
1503
0
    if (charset) {
1504
        /* pattern starts with a character from a known set */
1505
0
        end = (SRE_CHAR *)state->end;
1506
0
        state->must_advance = 0;
1507
0
        for (;;) {
1508
0
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1509
0
                ptr++;
1510
0
            if (ptr >= end)
1511
0
                return 0;
1512
0
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1513
0
            state->start = ptr;
1514
0
            state->ptr = ptr;
1515
0
            status = SRE(match)(state, pattern, 0);
1516
0
            if (status != 0)
1517
0
                break;
1518
0
            ptr++;
1519
0
            RESET_CAPTURE_GROUP();
1520
0
        }
1521
0
    } else {
1522
        /* general case */
1523
0
        assert(ptr <= end);
1524
0
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1525
0
        state->start = state->ptr = ptr;
1526
0
        status = SRE(match)(state, pattern, 1);
1527
0
        state->must_advance = 0;
1528
0
        while (status == 0 && ptr < end) {
1529
0
            ptr++;
1530
0
            RESET_CAPTURE_GROUP();
1531
0
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1532
0
            state->start = state->ptr = ptr;
1533
0
            status = SRE(match)(state, pattern, 0);
1534
0
        }
1535
0
    }
1536
1537
0
    return status;
1538
0
}
Unexecuted instantiation: _sre.c:sre_ucs2_search
Unexecuted instantiation: _sre.c:sre_ucs4_search
1539
1540
#undef SRE_CHAR
1541
#undef SIZEOF_SRE_CHAR
1542
#undef SRE
1543
1544
/* vim:ts=4:sw=4:et
1545
*/