Coverage Report

Created: 2026-06-01 06:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
352
{
18
    /* check if pointer is at given position */
19
20
352
    Py_ssize_t thisp, thatp;
21
22
352
    switch (at) {
23
24
290
    case SRE_AT_BEGINNING:
25
290
    case SRE_AT_BEGINNING_STRING:
26
290
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
44
    case SRE_AT_END:
33
44
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
6
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
43
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
9
    case SRE_AT_END_STRING:
42
9
        return ((void*) ptr == state->end);
43
44
9
    case SRE_AT_BOUNDARY:
45
9
        thatp = ((void*) ptr > state->beginning) ?
46
9
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
9
        thisp = ((void*) ptr < state->end) ?
48
8
            SRE_IS_WORD((int) ptr[0]) : 0;
49
9
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
352
    }
87
88
0
    return 0;
89
352
}
sre.c:sre_ucs1_at
Line
Count
Source
17
352
{
18
    /* check if pointer is at given position */
19
20
352
    Py_ssize_t thisp, thatp;
21
22
352
    switch (at) {
23
24
290
    case SRE_AT_BEGINNING:
25
290
    case SRE_AT_BEGINNING_STRING:
26
290
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
44
    case SRE_AT_END:
33
44
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
6
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
43
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
9
    case SRE_AT_END_STRING:
42
9
        return ((void*) ptr == state->end);
43
44
9
    case SRE_AT_BOUNDARY:
45
9
        thatp = ((void*) ptr > state->beginning) ?
46
9
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
9
        thisp = ((void*) ptr < state->end) ?
48
8
            SRE_IS_WORD((int) ptr[0]) : 0;
49
9
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
352
    }
87
88
0
    return 0;
89
352
}
Unexecuted instantiation: sre.c:sre_ucs2_at
Unexecuted instantiation: sre.c:sre_ucs4_at
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
13.5M
{
94
    /* check if character is a member of the given set */
95
96
13.5M
    int ok = 1;
97
98
14.9M
    for (;;) {
99
14.9M
        switch (*set++) {
100
101
1.30M
        case SRE_OP_FAILURE:
102
1.30M
            return !ok;
103
104
1.36M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.36M
            if (ch == set[0])
107
1.26M
                return ok;
108
100k
            set++;
109
100k
            break;
110
111
1.29M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
1.29M
            if (sre_category(set[0], (int) ch))
114
1.38k
                return ok;
115
1.29M
            set++;
116
1.29M
            break;
117
118
11.0M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
11.0M
            if (ch < 256 &&
121
11.0M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
11.0M
                return ok;
123
8.98k
            set += 256/SRE_CODE_BITS;
124
8.98k
            break;
125
126
10
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
10
            if (set[0] <= ch && ch <= set[1])
129
1
                return ok;
130
9
            set += 2;
131
9
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
10
        case SRE_OP_NEGATE:
148
10
            ok = !ok;
149
10
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
14.9M
        }
175
14.9M
    }
176
13.5M
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
8.07M
{
94
    /* check if character is a member of the given set */
95
96
8.07M
    int ok = 1;
97
98
9.47M
    for (;;) {
99
9.47M
        switch (*set++) {
100
101
1.30M
        case SRE_OP_FAILURE:
102
1.30M
            return !ok;
103
104
1.36M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.36M
            if (ch == set[0])
107
1.26M
                return ok;
108
100k
            set++;
109
100k
            break;
110
111
1.29M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
1.29M
            if (sre_category(set[0], (int) ch))
114
1.38k
                return ok;
115
1.29M
            set++;
116
1.29M
            break;
117
118
5.50M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
5.50M
            if (ch < 256 &&
121
5.50M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
5.50M
                return ok;
123
5.03k
            set += 256/SRE_CODE_BITS;
124
5.03k
            break;
125
126
10
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
10
            if (set[0] <= ch && ch <= set[1])
129
1
                return ok;
130
9
            set += 2;
131
9
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
10
        case SRE_OP_NEGATE:
148
10
            ok = !ok;
149
10
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
9.47M
        }
175
9.47M
    }
176
8.07M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
3.25M
{
94
    /* check if character is a member of the given set */
95
96
3.25M
    int ok = 1;
97
98
3.25M
    for (;;) {
99
3.25M
        switch (*set++) {
100
101
1.80k
        case SRE_OP_FAILURE:
102
1.80k
            return !ok;
103
104
0
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
0
            if (ch == set[0])
107
0
                return ok;
108
0
            set++;
109
0
            break;
110
111
0
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
0
            if (sre_category(set[0], (int) ch))
114
0
                return ok;
115
0
            set++;
116
0
            break;
117
118
3.25M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
3.25M
            if (ch < 256 &&
121
3.25M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
3.25M
                return ok;
123
1.80k
            set += 256/SRE_CODE_BITS;
124
1.80k
            break;
125
126
0
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
0
            if (set[0] <= ch && ch <= set[1])
129
0
                return ok;
130
0
            set += 2;
131
0
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
0
        case SRE_OP_NEGATE:
148
0
            ok = !ok;
149
0
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.25M
        }
175
3.25M
    }
176
3.25M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
2.25M
{
94
    /* check if character is a member of the given set */
95
96
2.25M
    int ok = 1;
97
98
2.26M
    for (;;) {
99
2.26M
        switch (*set++) {
100
101
2.14k
        case SRE_OP_FAILURE:
102
2.14k
            return !ok;
103
104
0
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
0
            if (ch == set[0])
107
0
                return ok;
108
0
            set++;
109
0
            break;
110
111
0
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
0
            if (sre_category(set[0], (int) ch))
114
0
                return ok;
115
0
            set++;
116
0
            break;
117
118
2.25M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
2.25M
            if (ch < 256 &&
121
2.25M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
2.25M
                return ok;
123
2.14k
            set += 256/SRE_CODE_BITS;
124
2.14k
            break;
125
126
0
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
0
            if (set[0] <= ch && ch <= set[1])
129
0
                return ok;
130
0
            set += 2;
131
0
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
0
        case SRE_OP_NEGATE:
148
0
            ok = !ok;
149
0
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
2.26M
        }
175
2.26M
    }
176
2.25M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
1.30M
{
195
1.30M
    SRE_CODE chr;
196
1.30M
    SRE_CHAR c;
197
1.30M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
1.30M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
1.30M
    Py_ssize_t i;
200
1.30M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
1.30M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
1.29M
        end = ptr + maxcount;
205
206
1.30M
    switch (pattern[0]) {
207
208
1.30M
    case SRE_OP_IN:
209
        /* repeated set */
210
1.30M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
12.3M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
11.0M
            ptr++;
213
1.30M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
633
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
633
        chr = pattern[1];
232
633
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
633
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
633
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
633
        else
238
633
#endif
239
439k
        while (ptr < end && *ptr == c)
240
438k
            ptr++;
241
633
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
0
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
0
        chr = pattern[1];
270
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
0
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
0
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
0
        else
276
0
#endif
277
0
        while (ptr < end && *ptr != c)
278
0
            ptr++;
279
0
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
1.30M
    }
319
320
1.30M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
1.30M
           ptr - (SRE_CHAR*) state->ptr));
322
1.30M
    return ptr - (SRE_CHAR*) state->ptr;
323
1.30M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
1.30M
{
195
1.30M
    SRE_CODE chr;
196
1.30M
    SRE_CHAR c;
197
1.30M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
1.30M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
1.30M
    Py_ssize_t i;
200
1.30M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
1.30M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
1.29M
        end = ptr + maxcount;
205
206
1.30M
    switch (pattern[0]) {
207
208
1.30M
    case SRE_OP_IN:
209
        /* repeated set */
210
1.30M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
6.80M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
5.50M
            ptr++;
213
1.30M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
633
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
633
        chr = pattern[1];
232
633
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
633
        c = (SRE_CHAR) chr;
234
633
#if SIZEOF_SRE_CHAR < 4
235
633
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
633
        else
238
633
#endif
239
439k
        while (ptr < end && *ptr == c)
240
438k
            ptr++;
241
633
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
0
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
0
        chr = pattern[1];
270
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
0
        c = (SRE_CHAR) chr;
272
0
#if SIZEOF_SRE_CHAR < 4
273
0
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
0
        else
276
0
#endif
277
0
        while (ptr < end && *ptr != c)
278
0
            ptr++;
279
0
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
1.30M
    }
319
320
1.30M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
1.30M
           ptr - (SRE_CHAR*) state->ptr));
322
1.30M
    return ptr - (SRE_CHAR*) state->ptr;
323
1.30M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
1.84k
{
195
1.84k
    SRE_CODE chr;
196
1.84k
    SRE_CHAR c;
197
1.84k
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
1.84k
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
1.84k
    Py_ssize_t i;
200
1.84k
    INIT_TRACE(state);
201
202
    /* adjust end */
203
1.84k
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
0
        end = ptr + maxcount;
205
206
1.84k
    switch (pattern[0]) {
207
208
1.84k
    case SRE_OP_IN:
209
        /* repeated set */
210
1.84k
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
3.25M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
3.25M
            ptr++;
213
1.84k
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
0
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
0
        chr = pattern[1];
232
0
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
0
        c = (SRE_CHAR) chr;
234
0
#if SIZEOF_SRE_CHAR < 4
235
0
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
0
        else
238
0
#endif
239
0
        while (ptr < end && *ptr == c)
240
0
            ptr++;
241
0
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
0
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
0
        chr = pattern[1];
270
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
0
        c = (SRE_CHAR) chr;
272
0
#if SIZEOF_SRE_CHAR < 4
273
0
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
0
        else
276
0
#endif
277
0
        while (ptr < end && *ptr != c)
278
0
            ptr++;
279
0
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
1.84k
    }
319
320
1.84k
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
1.84k
           ptr - (SRE_CHAR*) state->ptr));
322
1.84k
    return ptr - (SRE_CHAR*) state->ptr;
323
1.84k
}
sre.c:sre_ucs4_count
Line
Count
Source
194
2.21k
{
195
2.21k
    SRE_CODE chr;
196
2.21k
    SRE_CHAR c;
197
2.21k
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
2.21k
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
2.21k
    Py_ssize_t i;
200
2.21k
    INIT_TRACE(state);
201
202
    /* adjust end */
203
2.21k
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
0
        end = ptr + maxcount;
205
206
2.21k
    switch (pattern[0]) {
207
208
2.21k
    case SRE_OP_IN:
209
        /* repeated set */
210
2.21k
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
2.25M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
2.25M
            ptr++;
213
2.21k
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
0
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
0
        chr = pattern[1];
232
0
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
0
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
0
        while (ptr < end && *ptr == c)
240
0
            ptr++;
241
0
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
0
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
0
        chr = pattern[1];
270
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
0
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
0
        while (ptr < end && *ptr != c)
278
0
            ptr++;
279
0
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
2.21k
    }
319
320
2.21k
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
2.21k
           ptr - (SRE_CHAR*) state->ptr));
322
2.21k
    return ptr - (SRE_CHAR*) state->ptr;
323
2.21k
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
1.28M
    do { \
355
1.28M
        ctx->lastmark = state->lastmark; \
356
1.28M
        ctx->lastindex = state->lastindex; \
357
1.28M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
1.30M
    do { \
360
1.30M
        state->lastmark = ctx->lastmark; \
361
1.30M
        state->lastindex = ctx->lastindex; \
362
1.30M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
1.26M
    do { \
366
1.26M
        TRACE(("push last_ptr: %zd", \
367
1.26M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
1.26M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
1.26M
    } while (0)
370
#define LAST_PTR_POP()  \
371
1.26M
    do { \
372
1.26M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
1.26M
        TRACE(("pop last_ptr: %zd", \
374
1.26M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
1.26M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
2.53M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
62.1k
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
2.58M
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
1.22M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
10
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
2.59M
#define DATA_STACK_ALLOC(state, type, ptr) \
389
2.59M
do { \
390
2.59M
    alloc_pos = state->data_stack_base; \
391
2.59M
    TRACE(("allocating %s in %zd (%zd)\n", \
392
2.59M
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
2.59M
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
12.8k
        int j = data_stack_grow(state, sizeof(type)); \
395
12.8k
        if (j < 0) return j; \
396
12.8k
        if (ctx_pos != -1) \
397
12.8k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
12.8k
    } \
399
2.59M
    ptr = (type*)(state->data_stack+alloc_pos); \
400
2.59M
    state->data_stack_base += sizeof(type); \
401
2.59M
} while (0)
402
403
3.89M
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
3.89M
do { \
405
3.89M
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
3.89M
    ptr = (type*)(state->data_stack+pos); \
407
3.89M
} while (0)
408
409
2.53M
#define DATA_STACK_PUSH(state, data, size) \
410
2.53M
do { \
411
2.53M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
2.53M
           data, state->data_stack_base, size)); \
413
2.53M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
577
        int j = data_stack_grow(state, size); \
415
577
        if (j < 0) return j; \
416
577
        if (ctx_pos != -1) \
417
577
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
577
    } \
419
2.53M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
2.53M
    state->data_stack_base += size; \
421
2.53M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
2.49M
#define DATA_STACK_POP(state, data, size, discard) \
427
2.49M
do { \
428
2.49M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
2.49M
           data, state->data_stack_base-size, size)); \
430
2.49M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
2.49M
    if (discard) \
432
2.49M
        state->data_stack_base -= size; \
433
2.49M
} while (0)
434
435
2.63M
#define DATA_STACK_POP_DISCARD(state, size) \
436
2.63M
do { \
437
2.63M
    TRACE(("discard data from %zd (%zd)\n", \
438
2.63M
           state->data_stack_base-size, size)); \
439
2.63M
    state->data_stack_base -= size; \
440
2.63M
} while(0)
441
442
#define DATA_PUSH(x) \
443
1.26M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
1.26M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
2.59M
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
2.59M
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
3.89M
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
1.27M
    do if (lastmark >= 0) { \
473
1.26M
        MARK_TRACE("push", (lastmark)); \
474
1.26M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
1.26M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
1.27M
    } while (0)
477
#define MARK_POP(lastmark) \
478
1.22M
    do if (lastmark >= 0) { \
479
1.22M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
1.22M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
1.22M
        MARK_TRACE("pop", (lastmark)); \
482
1.22M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
0
    do if (lastmark >= 0) { \
485
0
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
0
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
0
        MARK_TRACE("pop keep", (lastmark)); \
488
0
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
42.3k
    do if (lastmark >= 0) { \
491
42.2k
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
42.2k
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
42.2k
        MARK_TRACE("pop discard", (lastmark)); \
494
42.3k
    } while (0)
495
496
10.7k
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
1.26M
#define JUMP_MAX_UNTIL_2     2
499
1.22M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
3.16k
#define JUMP_REPEAT          7
504
4
#define JUMP_REPEAT_ONE_1    8
505
82.4k
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
23
#define JUMP_BRANCH          11
508
10
#define JUMP_ASSERT          12
509
0
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
2.58M
    ctx->pattern = pattern; \
516
2.58M
    ctx->ptr = ptr; \
517
2.58M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
2.58M
    nextctx->pattern = nextpattern; \
519
2.58M
    nextctx->toplevel = toplevel_; \
520
2.58M
    nextctx->jump = jumpvalue; \
521
2.58M
    nextctx->last_ctx_pos = ctx_pos; \
522
2.58M
    pattern = nextpattern; \
523
2.58M
    ctx_pos = alloc_pos; \
524
2.58M
    ctx = nextctx; \
525
2.58M
    goto entrance; \
526
2.58M
    jumplabel: \
527
2.58M
    pattern = ctx->pattern; \
528
2.58M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
2.58M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
10
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
6.39M
    do {                                                           \
553
6.39M
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
6.39M
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
6.39M
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
6.39M
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
6.39M
        do {                               \
588
6.39M
            MAYBE_CHECK_SIGNALS;           \
589
6.39M
            goto *sre_targets[*pattern++]; \
590
6.39M
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
10.7k
{
601
10.7k
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
10.7k
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
10.7k
    Py_ssize_t ret = 0;
604
10.7k
    int jump;
605
10.7k
    unsigned int sigcount = state->sigcount;
606
607
10.7k
    SRE(match_context)* ctx;
608
10.7k
    SRE(match_context)* nextctx;
609
10.7k
    INIT_TRACE(state);
610
611
10.7k
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
10.7k
    DATA_ALLOC(SRE(match_context), ctx);
614
10.7k
    ctx->last_ctx_pos = -1;
615
10.7k
    ctx->jump = JUMP_NONE;
616
10.7k
    ctx->toplevel = toplevel;
617
10.7k
    ctx_pos = alloc_pos;
618
619
10.7k
#if USE_COMPUTED_GOTOS
620
10.7k
#include "sre_targets.h"
621
10.7k
#endif
622
623
2.59M
entrance:
624
625
2.59M
    ;  // Fashion statement.
626
2.59M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
2.59M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
10.7k
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
2
            TRACE(("reject (got %tu chars, need %zu)\n",
633
2
                   end - ptr, (size_t) pattern[3]));
634
2
            RETURN_FAILURE;
635
2
        }
636
10.7k
        pattern += pattern[1] + 1;
637
10.7k
    }
638
639
2.59M
#if USE_COMPUTED_GOTOS
640
2.59M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
2.59M
    {
647
648
2.59M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
2.53M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
2.53M
                   ptr, pattern[0]));
653
2.53M
            {
654
2.53M
                int i = pattern[0];
655
2.53M
                if (i & 1)
656
1.26M
                    state->lastindex = i/2 + 1;
657
2.53M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
3.50k
                    int j = state->lastmark + 1;
663
3.69k
                    while (j < i)
664
186
                        state->mark[j++] = NULL;
665
3.50k
                    state->lastmark = i;
666
3.50k
                }
667
2.53M
                state->mark[i] = ptr;
668
2.53M
            }
669
2.53M
            pattern++;
670
2.53M
            DISPATCH;
671
672
2.53M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
43
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
43
                   ptr, *pattern));
677
43
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
3
                RETURN_FAILURE;
679
40
            pattern++;
680
40
            ptr++;
681
40
            DISPATCH;
682
683
40
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
9.08k
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
9.08k
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
9.08k
            if (ctx->toplevel &&
698
9.08k
                ((state->match_all && ptr != state->end) ||
699
9.08k
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
9.08k
            state->ptr = ptr;
704
9.08k
            RETURN_SUCCESS;
705
706
352
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
352
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
352
            if (!SRE(at)(state, ptr, *pattern))
711
22
                RETURN_FAILURE;
712
330
            pattern++;
713
330
            DISPATCH;
714
715
330
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
4
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
4
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
4
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
1
                RETURN_FAILURE;
732
3
            ptr++;
733
3
            DISPATCH;
734
735
3
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
1.26M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
1.26M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
1.26M
            if (ptr >= end ||
749
1.26M
                !SRE(charset)(state, pattern + 1, *ptr))
750
3.13k
                RETURN_FAILURE;
751
1.26M
            pattern += pattern[0];
752
1.26M
            ptr++;
753
1.26M
            DISPATCH;
754
755
1.26M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
0
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
0
                   pattern, ptr, pattern[0]));
758
0
            if (ptr >= end ||
759
0
                sre_lower_ascii(*ptr) != *pattern)
760
0
                RETURN_FAILURE;
761
0
            pattern++;
762
0
            ptr++;
763
0
            DISPATCH;
764
765
21
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
21
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
21
                   pattern, ptr, pattern[0]));
768
21
            if (ptr >= end ||
769
21
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
21
            pattern++;
772
21
            ptr++;
773
21
            DISPATCH;
774
775
21
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
18
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
18
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
18
            if (ptr >= end
828
12
                || !SRE(charset)(state, pattern+1,
829
12
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
12
                RETURN_FAILURE;
831
6
            pattern += pattern[0];
832
6
            ptr++;
833
6
            DISPATCH;
834
835
6
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
10
        TARGET(SRE_OP_JUMP):
845
10
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
10
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
10
                   ptr, pattern[0]));
850
10
            pattern += pattern[0];
851
10
            DISPATCH;
852
853
23
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
23
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
23
            LASTMARK_SAVE();
858
23
            if (state->repeat)
859
0
                MARK_PUSH(ctx->lastmark);
860
61
            for (; pattern[0]; pattern += pattern[0]) {
861
48
                if (pattern[1] == SRE_OP_LITERAL &&
862
18
                    (ptr >= end ||
863
18
                     (SRE_CODE) *ptr != pattern[2]))
864
16
                    continue;
865
32
                if (pattern[1] == SRE_OP_IN &&
866
9
                    (ptr >= end ||
867
9
                     !SRE(charset)(state, pattern + 3,
868
9
                                   (SRE_CODE) *ptr)))
869
9
                    continue;
870
23
                state->ptr = ptr;
871
23
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
23
                if (ret) {
873
10
                    if (state->repeat)
874
0
                        MARK_POP_DISCARD(ctx->lastmark);
875
10
                    RETURN_ON_ERROR(ret);
876
10
                    RETURN_SUCCESS;
877
10
                }
878
13
                if (state->repeat)
879
0
                    MARK_POP_KEEP(ctx->lastmark);
880
13
                LASTMARK_RESTORE();
881
13
            }
882
13
            if (state->repeat)
883
0
                MARK_POP_DISCARD(ctx->lastmark);
884
13
            RETURN_FAILURE;
885
886
1.30M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
1.30M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
1.30M
                   pattern[1], pattern[2]));
898
899
1.30M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
624
                RETURN_FAILURE; /* cannot match */
901
902
1.30M
            state->ptr = ptr;
903
904
1.30M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
1.30M
            RETURN_ON_ERROR(ret);
906
1.30M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
1.30M
            ctx->count = ret;
908
1.30M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
1.30M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
1.29M
                RETURN_FAILURE;
917
918
11.3k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
10.3k
                ptr == state->end &&
920
1.34k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
1.34k
            {
922
                /* tail is empty.  we're finished */
923
1.34k
                state->ptr = ptr;
924
1.34k
                RETURN_SUCCESS;
925
1.34k
            }
926
927
10.0k
            LASTMARK_SAVE();
928
10.0k
            if (state->repeat)
929
38
                MARK_PUSH(ctx->lastmark);
930
931
10.0k
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
50
                ctx->u.chr = pattern[pattern[0]+1];
935
50
                for (;;) {
936
319k
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
319k
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
319k
                        ptr--;
939
319k
                        ctx->count--;
940
319k
                    }
941
50
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
46
                        break;
943
4
                    state->ptr = ptr;
944
4
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4
                            pattern+pattern[0]);
946
4
                    if (ret) {
947
4
                        if (state->repeat)
948
0
                            MARK_POP_DISCARD(ctx->lastmark);
949
4
                        RETURN_ON_ERROR(ret);
950
4
                        RETURN_SUCCESS;
951
4
                    }
952
0
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
0
                    LASTMARK_RESTORE();
955
956
0
                    ptr--;
957
0
                    ctx->count--;
958
0
                }
959
46
                if (state->repeat)
960
0
                    MARK_POP_DISCARD(ctx->lastmark);
961
9.99k
            } else {
962
                /* general case */
963
83.3k
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
82.4k
                    state->ptr = ptr;
965
82.4k
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
82.4k
                            pattern+pattern[0]);
967
82.4k
                    if (ret) {
968
9.15k
                        if (state->repeat)
969
38
                            MARK_POP_DISCARD(ctx->lastmark);
970
9.15k
                        RETURN_ON_ERROR(ret);
971
9.15k
                        RETURN_SUCCESS;
972
9.15k
                    }
973
73.3k
                    if (state->repeat)
974
0
                        MARK_POP_KEEP(ctx->lastmark);
975
73.3k
                    LASTMARK_RESTORE();
976
977
73.3k
                    ptr--;
978
73.3k
                    ctx->count--;
979
73.3k
                }
980
845
                if (state->repeat)
981
0
                    MARK_POP_DISCARD(ctx->lastmark);
982
845
            }
983
891
            RETURN_FAILURE;
984
985
27
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
27
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
27
                   pattern[1], pattern[2]));
997
998
27
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
27
            state->ptr = ptr;
1002
1003
27
            if (pattern[1] == 0)
1004
13
                ctx->count = 0;
1005
14
            else {
1006
                /* count using pattern min as the maximum */
1007
14
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
14
                RETURN_ON_ERROR(ret);
1009
14
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
14
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
12
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
2
                ctx->count = ret;
1015
2
                ptr += ctx->count;
1016
2
            }
1017
1018
15
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
15
                !(ctx->toplevel &&
1020
15
                  ((state->match_all && ptr != state->end) ||
1021
15
                   (state->must_advance && ptr == state->start))))
1022
15
            {
1023
                /* tail is empty.  we're finished */
1024
15
                state->ptr = ptr;
1025
15
                RETURN_SUCCESS;
1026
1027
15
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
3.16k
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
3.16k
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
3.16k
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
3.16k
            ctx->u.rep = repeat_pool_malloc(state);
1127
3.16k
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
3.16k
            ctx->u.rep->count = -1;
1131
3.16k
            ctx->u.rep->pattern = pattern;
1132
3.16k
            ctx->u.rep->prev = state->repeat;
1133
3.16k
            ctx->u.rep->last_ptr = NULL;
1134
3.16k
            state->repeat = ctx->u.rep;
1135
1136
3.16k
            state->ptr = ptr;
1137
3.16k
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
3.16k
            state->repeat = ctx->u.rep->prev;
1139
3.16k
            repeat_pool_free(state, ctx->u.rep);
1140
1141
3.16k
            if (ret) {
1142
89
                RETURN_ON_ERROR(ret);
1143
89
                RETURN_SUCCESS;
1144
89
            }
1145
3.07k
            RETURN_FAILURE;
1146
1147
1.27M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
1.27M
            ctx->u.rep = state->repeat;
1155
1.27M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
1.27M
            state->ptr = ptr;
1159
1160
1.27M
            ctx->count = ctx->u.rep->count+1;
1161
1162
1.27M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
1.27M
                   ptr, ctx->count));
1164
1165
1.27M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
1.27M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
1.27M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
1.26M
                ctx->u.rep->count = ctx->count;
1185
1.26M
                LASTMARK_SAVE();
1186
1.26M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
1.26M
                LAST_PTR_PUSH();
1189
1.26M
                ctx->u.rep->last_ptr = state->ptr;
1190
1.26M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
1.26M
                        ctx->u.rep->pattern+3);
1192
1.26M
                LAST_PTR_POP();
1193
1.26M
                if (ret) {
1194
42.3k
                    MARK_POP_DISCARD(ctx->lastmark);
1195
42.3k
                    RETURN_ON_ERROR(ret);
1196
42.3k
                    RETURN_SUCCESS;
1197
42.3k
                }
1198
1.22M
                MARK_POP(ctx->lastmark);
1199
1.22M
                LASTMARK_RESTORE();
1200
1.22M
                ctx->u.rep->count = ctx->count-1;
1201
1.22M
                state->ptr = ptr;
1202
1.22M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
1.22M
            state->repeat = ctx->u.rep->prev;
1207
1.22M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
1.22M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
1.22M
            RETURN_ON_SUCCESS(ret);
1211
1.22M
            state->ptr = ptr;
1212
1.22M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
10
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
10
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
10
                   ptr, pattern[1]));
1565
10
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
10
            state->ptr = ptr - pattern[1];
1568
10
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
10
            RETURN_ON_FAILURE(ret);
1570
1
            pattern += pattern[0];
1571
1
            DISPATCH;
1572
1573
1
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
0
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
0
                   ptr, pattern[1]));
1578
0
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
0
                state->ptr = ptr - pattern[1];
1580
0
                LASTMARK_SAVE();
1581
0
                if (state->repeat)
1582
0
                    MARK_PUSH(ctx->lastmark);
1583
1584
0
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
0
                if (ret) {
1586
0
                    if (state->repeat)
1587
0
                        MARK_POP_DISCARD(ctx->lastmark);
1588
0
                    RETURN_ON_ERROR(ret);
1589
0
                    RETURN_FAILURE;
1590
0
                }
1591
0
                if (state->repeat)
1592
0
                    MARK_POP(ctx->lastmark);
1593
0
                LASTMARK_RESTORE();
1594
0
            }
1595
0
            pattern += pattern[0];
1596
0
            DISPATCH;
1597
1598
0
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
2.59M
exit:
1620
2.59M
    ctx_pos = ctx->last_ctx_pos;
1621
2.59M
    jump = ctx->jump;
1622
2.59M
    DATA_POP_DISCARD(ctx);
1623
2.59M
    if (ctx_pos == -1) {
1624
10.7k
        state->sigcount = sigcount;
1625
10.7k
        return ret;
1626
10.7k
    }
1627
2.58M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
2.58M
    switch (jump) {
1630
1.26M
        case JUMP_MAX_UNTIL_2:
1631
1.26M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
1.26M
            goto jump_max_until_2;
1633
1.22M
        case JUMP_MAX_UNTIL_3:
1634
1.22M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
1.22M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
23
        case JUMP_BRANCH:
1643
23
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
23
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
3.16k
        case JUMP_REPEAT:
1658
3.16k
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
3.16k
            goto jump_repeat;
1660
4
        case JUMP_REPEAT_ONE_1:
1661
4
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4
            goto jump_repeat_one_1;
1663
82.4k
        case JUMP_REPEAT_ONE_2:
1664
82.4k
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
82.4k
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
10
        case JUMP_ASSERT:
1673
10
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
10
            goto jump_assert;
1675
0
        case JUMP_ASSERT_NOT:
1676
0
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
0
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
2.58M
    }
1683
1684
0
    return ret; /* should never get here */
1685
2.58M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
6.72k
{
601
6.72k
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
6.72k
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
6.72k
    Py_ssize_t ret = 0;
604
6.72k
    int jump;
605
6.72k
    unsigned int sigcount = state->sigcount;
606
607
6.72k
    SRE(match_context)* ctx;
608
6.72k
    SRE(match_context)* nextctx;
609
6.72k
    INIT_TRACE(state);
610
611
6.72k
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
6.72k
    DATA_ALLOC(SRE(match_context), ctx);
614
6.72k
    ctx->last_ctx_pos = -1;
615
6.72k
    ctx->jump = JUMP_NONE;
616
6.72k
    ctx->toplevel = toplevel;
617
6.72k
    ctx_pos = alloc_pos;
618
619
6.72k
#if USE_COMPUTED_GOTOS
620
6.72k
#include "sre_targets.h"
621
6.72k
#endif
622
623
2.58M
entrance:
624
625
2.58M
    ;  // Fashion statement.
626
2.58M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
2.58M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
6.72k
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
2
            TRACE(("reject (got %tu chars, need %zu)\n",
633
2
                   end - ptr, (size_t) pattern[3]));
634
2
            RETURN_FAILURE;
635
2
        }
636
6.72k
        pattern += pattern[1] + 1;
637
6.72k
    }
638
639
2.58M
#if USE_COMPUTED_GOTOS
640
2.58M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
2.58M
    {
647
648
2.58M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
2.53M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
2.53M
                   ptr, pattern[0]));
653
2.53M
            {
654
2.53M
                int i = pattern[0];
655
2.53M
                if (i & 1)
656
1.26M
                    state->lastindex = i/2 + 1;
657
2.53M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
3.50k
                    int j = state->lastmark + 1;
663
3.69k
                    while (j < i)
664
186
                        state->mark[j++] = NULL;
665
3.50k
                    state->lastmark = i;
666
3.50k
                }
667
2.53M
                state->mark[i] = ptr;
668
2.53M
            }
669
2.53M
            pattern++;
670
2.53M
            DISPATCH;
671
672
2.53M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
43
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
43
                   ptr, *pattern));
677
43
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
3
                RETURN_FAILURE;
679
40
            pattern++;
680
40
            ptr++;
681
40
            DISPATCH;
682
683
40
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
5.12k
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
5.12k
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
5.12k
            if (ctx->toplevel &&
698
5.12k
                ((state->match_all && ptr != state->end) ||
699
5.12k
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
5.12k
            state->ptr = ptr;
704
5.12k
            RETURN_SUCCESS;
705
706
352
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
352
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
352
            if (!SRE(at)(state, ptr, *pattern))
711
22
                RETURN_FAILURE;
712
330
            pattern++;
713
330
            DISPATCH;
714
715
330
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
4
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
4
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
4
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
1
                RETURN_FAILURE;
732
3
            ptr++;
733
3
            DISPATCH;
734
735
3
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
1.26M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
1.26M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
1.26M
            if (ptr >= end ||
749
1.26M
                !SRE(charset)(state, pattern + 1, *ptr))
750
3.13k
                RETURN_FAILURE;
751
1.26M
            pattern += pattern[0];
752
1.26M
            ptr++;
753
1.26M
            DISPATCH;
754
755
1.26M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
0
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
0
                   pattern, ptr, pattern[0]));
758
0
            if (ptr >= end ||
759
0
                sre_lower_ascii(*ptr) != *pattern)
760
0
                RETURN_FAILURE;
761
0
            pattern++;
762
0
            ptr++;
763
0
            DISPATCH;
764
765
21
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
21
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
21
                   pattern, ptr, pattern[0]));
768
21
            if (ptr >= end ||
769
21
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
21
            pattern++;
772
21
            ptr++;
773
21
            DISPATCH;
774
775
21
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
18
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
18
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
18
            if (ptr >= end
828
12
                || !SRE(charset)(state, pattern+1,
829
12
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
12
                RETURN_FAILURE;
831
6
            pattern += pattern[0];
832
6
            ptr++;
833
6
            DISPATCH;
834
835
6
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
10
        TARGET(SRE_OP_JUMP):
845
10
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
10
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
10
                   ptr, pattern[0]));
850
10
            pattern += pattern[0];
851
10
            DISPATCH;
852
853
23
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
23
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
23
            LASTMARK_SAVE();
858
23
            if (state->repeat)
859
0
                MARK_PUSH(ctx->lastmark);
860
61
            for (; pattern[0]; pattern += pattern[0]) {
861
48
                if (pattern[1] == SRE_OP_LITERAL &&
862
18
                    (ptr >= end ||
863
18
                     (SRE_CODE) *ptr != pattern[2]))
864
16
                    continue;
865
32
                if (pattern[1] == SRE_OP_IN &&
866
9
                    (ptr >= end ||
867
9
                     !SRE(charset)(state, pattern + 3,
868
9
                                   (SRE_CODE) *ptr)))
869
9
                    continue;
870
23
                state->ptr = ptr;
871
23
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
23
                if (ret) {
873
10
                    if (state->repeat)
874
0
                        MARK_POP_DISCARD(ctx->lastmark);
875
10
                    RETURN_ON_ERROR(ret);
876
10
                    RETURN_SUCCESS;
877
10
                }
878
13
                if (state->repeat)
879
0
                    MARK_POP_KEEP(ctx->lastmark);
880
13
                LASTMARK_RESTORE();
881
13
            }
882
13
            if (state->repeat)
883
0
                MARK_POP_DISCARD(ctx->lastmark);
884
13
            RETURN_FAILURE;
885
886
1.30M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
1.30M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
1.30M
                   pattern[1], pattern[2]));
898
899
1.30M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
624
                RETURN_FAILURE; /* cannot match */
901
902
1.30M
            state->ptr = ptr;
903
904
1.30M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
1.30M
            RETURN_ON_ERROR(ret);
906
1.30M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
1.30M
            ctx->count = ret;
908
1.30M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
1.30M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
1.29M
                RETURN_FAILURE;
917
918
7.33k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
6.27k
                ptr == state->end &&
920
1.24k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
1.24k
            {
922
                /* tail is empty.  we're finished */
923
1.24k
                state->ptr = ptr;
924
1.24k
                RETURN_SUCCESS;
925
1.24k
            }
926
927
6.09k
            LASTMARK_SAVE();
928
6.09k
            if (state->repeat)
929
38
                MARK_PUSH(ctx->lastmark);
930
931
6.09k
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
50
                ctx->u.chr = pattern[pattern[0]+1];
935
50
                for (;;) {
936
319k
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
319k
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
319k
                        ptr--;
939
319k
                        ctx->count--;
940
319k
                    }
941
50
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
46
                        break;
943
4
                    state->ptr = ptr;
944
4
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4
                            pattern+pattern[0]);
946
4
                    if (ret) {
947
4
                        if (state->repeat)
948
0
                            MARK_POP_DISCARD(ctx->lastmark);
949
4
                        RETURN_ON_ERROR(ret);
950
4
                        RETURN_SUCCESS;
951
4
                    }
952
0
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
0
                    LASTMARK_RESTORE();
955
956
0
                    ptr--;
957
0
                    ctx->count--;
958
0
                }
959
46
                if (state->repeat)
960
0
                    MARK_POP_DISCARD(ctx->lastmark);
961
6.04k
            } else {
962
                /* general case */
963
79.3k
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
78.5k
                    state->ptr = ptr;
965
78.5k
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
78.5k
                            pattern+pattern[0]);
967
78.5k
                    if (ret) {
968
5.19k
                        if (state->repeat)
969
38
                            MARK_POP_DISCARD(ctx->lastmark);
970
5.19k
                        RETURN_ON_ERROR(ret);
971
5.19k
                        RETURN_SUCCESS;
972
5.19k
                    }
973
73.3k
                    if (state->repeat)
974
0
                        MARK_POP_KEEP(ctx->lastmark);
975
73.3k
                    LASTMARK_RESTORE();
976
977
73.3k
                    ptr--;
978
73.3k
                    ctx->count--;
979
73.3k
                }
980
845
                if (state->repeat)
981
0
                    MARK_POP_DISCARD(ctx->lastmark);
982
845
            }
983
891
            RETURN_FAILURE;
984
985
27
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
27
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
27
                   pattern[1], pattern[2]));
997
998
27
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
27
            state->ptr = ptr;
1002
1003
27
            if (pattern[1] == 0)
1004
13
                ctx->count = 0;
1005
14
            else {
1006
                /* count using pattern min as the maximum */
1007
14
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
14
                RETURN_ON_ERROR(ret);
1009
14
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
14
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
12
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
2
                ctx->count = ret;
1015
2
                ptr += ctx->count;
1016
2
            }
1017
1018
15
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
15
                !(ctx->toplevel &&
1020
15
                  ((state->match_all && ptr != state->end) ||
1021
15
                   (state->must_advance && ptr == state->start))))
1022
15
            {
1023
                /* tail is empty.  we're finished */
1024
15
                state->ptr = ptr;
1025
15
                RETURN_SUCCESS;
1026
1027
15
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
3.16k
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
3.16k
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
3.16k
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
3.16k
            ctx->u.rep = repeat_pool_malloc(state);
1127
3.16k
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
3.16k
            ctx->u.rep->count = -1;
1131
3.16k
            ctx->u.rep->pattern = pattern;
1132
3.16k
            ctx->u.rep->prev = state->repeat;
1133
3.16k
            ctx->u.rep->last_ptr = NULL;
1134
3.16k
            state->repeat = ctx->u.rep;
1135
1136
3.16k
            state->ptr = ptr;
1137
3.16k
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
3.16k
            state->repeat = ctx->u.rep->prev;
1139
3.16k
            repeat_pool_free(state, ctx->u.rep);
1140
1141
3.16k
            if (ret) {
1142
89
                RETURN_ON_ERROR(ret);
1143
89
                RETURN_SUCCESS;
1144
89
            }
1145
3.07k
            RETURN_FAILURE;
1146
1147
1.27M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
1.27M
            ctx->u.rep = state->repeat;
1155
1.27M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
1.27M
            state->ptr = ptr;
1159
1160
1.27M
            ctx->count = ctx->u.rep->count+1;
1161
1162
1.27M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
1.27M
                   ptr, ctx->count));
1164
1165
1.27M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
1.27M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
1.27M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
1.26M
                ctx->u.rep->count = ctx->count;
1185
1.26M
                LASTMARK_SAVE();
1186
1.26M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
1.26M
                LAST_PTR_PUSH();
1189
1.26M
                ctx->u.rep->last_ptr = state->ptr;
1190
1.26M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
1.26M
                        ctx->u.rep->pattern+3);
1192
1.26M
                LAST_PTR_POP();
1193
1.26M
                if (ret) {
1194
42.3k
                    MARK_POP_DISCARD(ctx->lastmark);
1195
42.3k
                    RETURN_ON_ERROR(ret);
1196
42.3k
                    RETURN_SUCCESS;
1197
42.3k
                }
1198
1.22M
                MARK_POP(ctx->lastmark);
1199
1.22M
                LASTMARK_RESTORE();
1200
1.22M
                ctx->u.rep->count = ctx->count-1;
1201
1.22M
                state->ptr = ptr;
1202
1.22M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
1.22M
            state->repeat = ctx->u.rep->prev;
1207
1.22M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
1.22M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
1.22M
            RETURN_ON_SUCCESS(ret);
1211
1.22M
            state->ptr = ptr;
1212
1.22M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
10
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
10
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
10
                   ptr, pattern[1]));
1565
10
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
10
            state->ptr = ptr - pattern[1];
1568
10
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
10
            RETURN_ON_FAILURE(ret);
1570
1
            pattern += pattern[0];
1571
1
            DISPATCH;
1572
1573
1
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
0
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
0
                   ptr, pattern[1]));
1578
0
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
0
                state->ptr = ptr - pattern[1];
1580
0
                LASTMARK_SAVE();
1581
0
                if (state->repeat)
1582
0
                    MARK_PUSH(ctx->lastmark);
1583
1584
0
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
0
                if (ret) {
1586
0
                    if (state->repeat)
1587
0
                        MARK_POP_DISCARD(ctx->lastmark);
1588
0
                    RETURN_ON_ERROR(ret);
1589
0
                    RETURN_FAILURE;
1590
0
                }
1591
0
                if (state->repeat)
1592
0
                    MARK_POP(ctx->lastmark);
1593
0
                LASTMARK_RESTORE();
1594
0
            }
1595
0
            pattern += pattern[0];
1596
0
            DISPATCH;
1597
1598
0
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
2.58M
exit:
1620
2.58M
    ctx_pos = ctx->last_ctx_pos;
1621
2.58M
    jump = ctx->jump;
1622
2.58M
    DATA_POP_DISCARD(ctx);
1623
2.58M
    if (ctx_pos == -1) {
1624
6.72k
        state->sigcount = sigcount;
1625
6.72k
        return ret;
1626
6.72k
    }
1627
2.57M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
2.57M
    switch (jump) {
1630
1.26M
        case JUMP_MAX_UNTIL_2:
1631
1.26M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
1.26M
            goto jump_max_until_2;
1633
1.22M
        case JUMP_MAX_UNTIL_3:
1634
1.22M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
1.22M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
23
        case JUMP_BRANCH:
1643
23
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
23
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
3.16k
        case JUMP_REPEAT:
1658
3.16k
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
3.16k
            goto jump_repeat;
1660
4
        case JUMP_REPEAT_ONE_1:
1661
4
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4
            goto jump_repeat_one_1;
1663
78.5k
        case JUMP_REPEAT_ONE_2:
1664
78.5k
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
78.5k
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
10
        case JUMP_ASSERT:
1673
10
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
10
            goto jump_assert;
1675
0
        case JUMP_ASSERT_NOT:
1676
0
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
0
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
2.57M
    }
1683
1684
0
    return ret; /* should never get here */
1685
2.57M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
1.84k
{
601
1.84k
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
1.84k
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
1.84k
    Py_ssize_t ret = 0;
604
1.84k
    int jump;
605
1.84k
    unsigned int sigcount = state->sigcount;
606
607
1.84k
    SRE(match_context)* ctx;
608
1.84k
    SRE(match_context)* nextctx;
609
1.84k
    INIT_TRACE(state);
610
611
1.84k
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
1.84k
    DATA_ALLOC(SRE(match_context), ctx);
614
1.84k
    ctx->last_ctx_pos = -1;
615
1.84k
    ctx->jump = JUMP_NONE;
616
1.84k
    ctx->toplevel = toplevel;
617
1.84k
    ctx_pos = alloc_pos;
618
619
1.84k
#if USE_COMPUTED_GOTOS
620
1.84k
#include "sre_targets.h"
621
1.84k
#endif
622
623
3.65k
entrance:
624
625
3.65k
    ;  // Fashion statement.
626
3.65k
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
3.65k
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
1.84k
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
0
            TRACE(("reject (got %tu chars, need %zu)\n",
633
0
                   end - ptr, (size_t) pattern[3]));
634
0
            RETURN_FAILURE;
635
0
        }
636
1.84k
        pattern += pattern[1] + 1;
637
1.84k
    }
638
639
3.65k
#if USE_COMPUTED_GOTOS
640
3.65k
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
3.65k
    {
647
648
3.65k
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
0
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
0
                   ptr, pattern[0]));
653
0
            {
654
0
                int i = pattern[0];
655
0
                if (i & 1)
656
0
                    state->lastindex = i/2 + 1;
657
0
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
0
                    int j = state->lastmark + 1;
663
0
                    while (j < i)
664
0
                        state->mark[j++] = NULL;
665
0
                    state->lastmark = i;
666
0
                }
667
0
                state->mark[i] = ptr;
668
0
            }
669
0
            pattern++;
670
0
            DISPATCH;
671
672
0
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
0
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
0
                   ptr, *pattern));
677
0
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
0
                RETURN_FAILURE;
679
0
            pattern++;
680
0
            ptr++;
681
0
            DISPATCH;
682
683
0
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
1.80k
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
1.80k
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
1.80k
            if (ctx->toplevel &&
698
1.80k
                ((state->match_all && ptr != state->end) ||
699
1.80k
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
1.80k
            state->ptr = ptr;
704
1.80k
            RETURN_SUCCESS;
705
706
0
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
0
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
0
            if (!SRE(at)(state, ptr, *pattern))
711
0
                RETURN_FAILURE;
712
0
            pattern++;
713
0
            DISPATCH;
714
715
0
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
0
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
0
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
0
            if (ptr >= end ||
749
0
                !SRE(charset)(state, pattern + 1, *ptr))
750
0
                RETURN_FAILURE;
751
0
            pattern += pattern[0];
752
0
            ptr++;
753
0
            DISPATCH;
754
755
0
        TARGET(SRE_OP_LITERAL_IGNORE):
756
0
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
0
                   pattern, ptr, pattern[0]));
758
0
            if (ptr >= end ||
759
0
                sre_lower_ascii(*ptr) != *pattern)
760
0
                RETURN_FAILURE;
761
0
            pattern++;
762
0
            ptr++;
763
0
            DISPATCH;
764
765
0
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
0
        TARGET(SRE_OP_JUMP):
845
0
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
0
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
0
                   ptr, pattern[0]));
850
0
            pattern += pattern[0];
851
0
            DISPATCH;
852
853
0
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
0
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
0
            LASTMARK_SAVE();
858
0
            if (state->repeat)
859
0
                MARK_PUSH(ctx->lastmark);
860
0
            for (; pattern[0]; pattern += pattern[0]) {
861
0
                if (pattern[1] == SRE_OP_LITERAL &&
862
0
                    (ptr >= end ||
863
0
                     (SRE_CODE) *ptr != pattern[2]))
864
0
                    continue;
865
0
                if (pattern[1] == SRE_OP_IN &&
866
0
                    (ptr >= end ||
867
0
                     !SRE(charset)(state, pattern + 3,
868
0
                                   (SRE_CODE) *ptr)))
869
0
                    continue;
870
0
                state->ptr = ptr;
871
0
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
0
                if (ret) {
873
0
                    if (state->repeat)
874
0
                        MARK_POP_DISCARD(ctx->lastmark);
875
0
                    RETURN_ON_ERROR(ret);
876
0
                    RETURN_SUCCESS;
877
0
                }
878
0
                if (state->repeat)
879
0
                    MARK_POP_KEEP(ctx->lastmark);
880
0
                LASTMARK_RESTORE();
881
0
            }
882
0
            if (state->repeat)
883
0
                MARK_POP_DISCARD(ctx->lastmark);
884
0
            RETURN_FAILURE;
885
886
1.84k
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
1.84k
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
1.84k
                   pattern[1], pattern[2]));
898
899
1.84k
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
0
                RETURN_FAILURE; /* cannot match */
901
902
1.84k
            state->ptr = ptr;
903
904
1.84k
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
1.84k
            RETURN_ON_ERROR(ret);
906
1.84k
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
1.84k
            ctx->count = ret;
908
1.84k
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
1.84k
            if (ctx->count < (Py_ssize_t) pattern[1])
916
0
                RETURN_FAILURE;
917
918
1.84k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.84k
                ptr == state->end &&
920
40
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
40
            {
922
                /* tail is empty.  we're finished */
923
40
                state->ptr = ptr;
924
40
                RETURN_SUCCESS;
925
40
            }
926
927
1.80k
            LASTMARK_SAVE();
928
1.80k
            if (state->repeat)
929
0
                MARK_PUSH(ctx->lastmark);
930
931
1.80k
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
0
                ctx->u.chr = pattern[pattern[0]+1];
935
0
                for (;;) {
936
0
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
0
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
0
                        ptr--;
939
0
                        ctx->count--;
940
0
                    }
941
0
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
0
                        break;
943
0
                    state->ptr = ptr;
944
0
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
0
                            pattern+pattern[0]);
946
0
                    if (ret) {
947
0
                        if (state->repeat)
948
0
                            MARK_POP_DISCARD(ctx->lastmark);
949
0
                        RETURN_ON_ERROR(ret);
950
0
                        RETURN_SUCCESS;
951
0
                    }
952
0
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
0
                    LASTMARK_RESTORE();
955
956
0
                    ptr--;
957
0
                    ctx->count--;
958
0
                }
959
0
                if (state->repeat)
960
0
                    MARK_POP_DISCARD(ctx->lastmark);
961
1.80k
            } else {
962
                /* general case */
963
1.80k
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
1.80k
                    state->ptr = ptr;
965
1.80k
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
1.80k
                            pattern+pattern[0]);
967
1.80k
                    if (ret) {
968
1.80k
                        if (state->repeat)
969
0
                            MARK_POP_DISCARD(ctx->lastmark);
970
1.80k
                        RETURN_ON_ERROR(ret);
971
1.80k
                        RETURN_SUCCESS;
972
1.80k
                    }
973
0
                    if (state->repeat)
974
0
                        MARK_POP_KEEP(ctx->lastmark);
975
0
                    LASTMARK_RESTORE();
976
977
0
                    ptr--;
978
0
                    ctx->count--;
979
0
                }
980
0
                if (state->repeat)
981
0
                    MARK_POP_DISCARD(ctx->lastmark);
982
0
            }
983
0
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
0
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
0
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
0
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
0
            ctx->u.rep = repeat_pool_malloc(state);
1127
0
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
0
            ctx->u.rep->count = -1;
1131
0
            ctx->u.rep->pattern = pattern;
1132
0
            ctx->u.rep->prev = state->repeat;
1133
0
            ctx->u.rep->last_ptr = NULL;
1134
0
            state->repeat = ctx->u.rep;
1135
1136
0
            state->ptr = ptr;
1137
0
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
0
            state->repeat = ctx->u.rep->prev;
1139
0
            repeat_pool_free(state, ctx->u.rep);
1140
1141
0
            if (ret) {
1142
0
                RETURN_ON_ERROR(ret);
1143
0
                RETURN_SUCCESS;
1144
0
            }
1145
0
            RETURN_FAILURE;
1146
1147
0
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
0
            ctx->u.rep = state->repeat;
1155
0
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
0
            state->ptr = ptr;
1159
1160
0
            ctx->count = ctx->u.rep->count+1;
1161
1162
0
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
0
                   ptr, ctx->count));
1164
1165
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
0
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
0
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
0
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
0
                ctx->u.rep->count = ctx->count;
1185
0
                LASTMARK_SAVE();
1186
0
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
0
                LAST_PTR_PUSH();
1189
0
                ctx->u.rep->last_ptr = state->ptr;
1190
0
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
0
                        ctx->u.rep->pattern+3);
1192
0
                LAST_PTR_POP();
1193
0
                if (ret) {
1194
0
                    MARK_POP_DISCARD(ctx->lastmark);
1195
0
                    RETURN_ON_ERROR(ret);
1196
0
                    RETURN_SUCCESS;
1197
0
                }
1198
0
                MARK_POP(ctx->lastmark);
1199
0
                LASTMARK_RESTORE();
1200
0
                ctx->u.rep->count = ctx->count-1;
1201
0
                state->ptr = ptr;
1202
0
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
0
            state->repeat = ctx->u.rep->prev;
1207
0
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
0
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
0
            RETURN_ON_SUCCESS(ret);
1211
0
            state->ptr = ptr;
1212
0
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
0
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
0
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
0
                   ptr, pattern[1]));
1565
0
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
0
            state->ptr = ptr - pattern[1];
1568
0
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
0
            RETURN_ON_FAILURE(ret);
1570
0
            pattern += pattern[0];
1571
0
            DISPATCH;
1572
1573
0
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
0
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
0
                   ptr, pattern[1]));
1578
0
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
0
                state->ptr = ptr - pattern[1];
1580
0
                LASTMARK_SAVE();
1581
0
                if (state->repeat)
1582
0
                    MARK_PUSH(ctx->lastmark);
1583
1584
0
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
0
                if (ret) {
1586
0
                    if (state->repeat)
1587
0
                        MARK_POP_DISCARD(ctx->lastmark);
1588
0
                    RETURN_ON_ERROR(ret);
1589
0
                    RETURN_FAILURE;
1590
0
                }
1591
0
                if (state->repeat)
1592
0
                    MARK_POP(ctx->lastmark);
1593
0
                LASTMARK_RESTORE();
1594
0
            }
1595
0
            pattern += pattern[0];
1596
0
            DISPATCH;
1597
1598
0
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
3.65k
exit:
1620
3.65k
    ctx_pos = ctx->last_ctx_pos;
1621
3.65k
    jump = ctx->jump;
1622
3.65k
    DATA_POP_DISCARD(ctx);
1623
3.65k
    if (ctx_pos == -1) {
1624
1.84k
        state->sigcount = sigcount;
1625
1.84k
        return ret;
1626
1.84k
    }
1627
1.80k
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
1.80k
    switch (jump) {
1630
0
        case JUMP_MAX_UNTIL_2:
1631
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
0
            goto jump_max_until_2;
1633
0
        case JUMP_MAX_UNTIL_3:
1634
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
0
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
0
        case JUMP_BRANCH:
1643
0
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
0
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
0
        case JUMP_REPEAT:
1658
0
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
0
            goto jump_repeat;
1660
0
        case JUMP_REPEAT_ONE_1:
1661
0
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
0
            goto jump_repeat_one_1;
1663
1.80k
        case JUMP_REPEAT_ONE_2:
1664
1.80k
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
1.80k
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
0
        case JUMP_ASSERT:
1673
0
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
0
            goto jump_assert;
1675
0
        case JUMP_ASSERT_NOT:
1676
0
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
0
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
1.80k
    }
1683
1684
0
    return ret; /* should never get here */
1685
1.80k
}
sre.c:sre_ucs4_match
Line
Count
Source
600
2.21k
{
601
2.21k
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
2.21k
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
2.21k
    Py_ssize_t ret = 0;
604
2.21k
    int jump;
605
2.21k
    unsigned int sigcount = state->sigcount;
606
607
2.21k
    SRE(match_context)* ctx;
608
2.21k
    SRE(match_context)* nextctx;
609
2.21k
    INIT_TRACE(state);
610
611
2.21k
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
2.21k
    DATA_ALLOC(SRE(match_context), ctx);
614
2.21k
    ctx->last_ctx_pos = -1;
615
2.21k
    ctx->jump = JUMP_NONE;
616
2.21k
    ctx->toplevel = toplevel;
617
2.21k
    ctx_pos = alloc_pos;
618
619
2.21k
#if USE_COMPUTED_GOTOS
620
2.21k
#include "sre_targets.h"
621
2.21k
#endif
622
623
4.35k
entrance:
624
625
4.35k
    ;  // Fashion statement.
626
4.35k
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
4.35k
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
2.21k
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
0
            TRACE(("reject (got %tu chars, need %zu)\n",
633
0
                   end - ptr, (size_t) pattern[3]));
634
0
            RETURN_FAILURE;
635
0
        }
636
2.21k
        pattern += pattern[1] + 1;
637
2.21k
    }
638
639
4.35k
#if USE_COMPUTED_GOTOS
640
4.35k
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
4.35k
    {
647
648
4.35k
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
0
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
0
                   ptr, pattern[0]));
653
0
            {
654
0
                int i = pattern[0];
655
0
                if (i & 1)
656
0
                    state->lastindex = i/2 + 1;
657
0
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
0
                    int j = state->lastmark + 1;
663
0
                    while (j < i)
664
0
                        state->mark[j++] = NULL;
665
0
                    state->lastmark = i;
666
0
                }
667
0
                state->mark[i] = ptr;
668
0
            }
669
0
            pattern++;
670
0
            DISPATCH;
671
672
0
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
0
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
0
                   ptr, *pattern));
677
0
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
0
                RETURN_FAILURE;
679
0
            pattern++;
680
0
            ptr++;
681
0
            DISPATCH;
682
683
0
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
2.14k
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
2.14k
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
2.14k
            if (ctx->toplevel &&
698
2.14k
                ((state->match_all && ptr != state->end) ||
699
2.14k
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
2.14k
            state->ptr = ptr;
704
2.14k
            RETURN_SUCCESS;
705
706
0
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
0
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
0
            if (!SRE(at)(state, ptr, *pattern))
711
0
                RETURN_FAILURE;
712
0
            pattern++;
713
0
            DISPATCH;
714
715
0
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
0
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
0
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
0
            if (ptr >= end ||
749
0
                !SRE(charset)(state, pattern + 1, *ptr))
750
0
                RETURN_FAILURE;
751
0
            pattern += pattern[0];
752
0
            ptr++;
753
0
            DISPATCH;
754
755
0
        TARGET(SRE_OP_LITERAL_IGNORE):
756
0
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
0
                   pattern, ptr, pattern[0]));
758
0
            if (ptr >= end ||
759
0
                sre_lower_ascii(*ptr) != *pattern)
760
0
                RETURN_FAILURE;
761
0
            pattern++;
762
0
            ptr++;
763
0
            DISPATCH;
764
765
0
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
0
        TARGET(SRE_OP_JUMP):
845
0
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
0
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
0
                   ptr, pattern[0]));
850
0
            pattern += pattern[0];
851
0
            DISPATCH;
852
853
0
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
0
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
0
            LASTMARK_SAVE();
858
0
            if (state->repeat)
859
0
                MARK_PUSH(ctx->lastmark);
860
0
            for (; pattern[0]; pattern += pattern[0]) {
861
0
                if (pattern[1] == SRE_OP_LITERAL &&
862
0
                    (ptr >= end ||
863
0
                     (SRE_CODE) *ptr != pattern[2]))
864
0
                    continue;
865
0
                if (pattern[1] == SRE_OP_IN &&
866
0
                    (ptr >= end ||
867
0
                     !SRE(charset)(state, pattern + 3,
868
0
                                   (SRE_CODE) *ptr)))
869
0
                    continue;
870
0
                state->ptr = ptr;
871
0
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
0
                if (ret) {
873
0
                    if (state->repeat)
874
0
                        MARK_POP_DISCARD(ctx->lastmark);
875
0
                    RETURN_ON_ERROR(ret);
876
0
                    RETURN_SUCCESS;
877
0
                }
878
0
                if (state->repeat)
879
0
                    MARK_POP_KEEP(ctx->lastmark);
880
0
                LASTMARK_RESTORE();
881
0
            }
882
0
            if (state->repeat)
883
0
                MARK_POP_DISCARD(ctx->lastmark);
884
0
            RETURN_FAILURE;
885
886
2.21k
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
2.21k
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
2.21k
                   pattern[1], pattern[2]));
898
899
2.21k
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
0
                RETURN_FAILURE; /* cannot match */
901
902
2.21k
            state->ptr = ptr;
903
904
2.21k
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
2.21k
            RETURN_ON_ERROR(ret);
906
2.21k
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
2.21k
            ctx->count = ret;
908
2.21k
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
2.21k
            if (ctx->count < (Py_ssize_t) pattern[1])
916
0
                RETURN_FAILURE;
917
918
2.21k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
2.21k
                ptr == state->end &&
920
63
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
63
            {
922
                /* tail is empty.  we're finished */
923
63
                state->ptr = ptr;
924
63
                RETURN_SUCCESS;
925
63
            }
926
927
2.14k
            LASTMARK_SAVE();
928
2.14k
            if (state->repeat)
929
0
                MARK_PUSH(ctx->lastmark);
930
931
2.14k
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
0
                ctx->u.chr = pattern[pattern[0]+1];
935
0
                for (;;) {
936
0
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
0
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
0
                        ptr--;
939
0
                        ctx->count--;
940
0
                    }
941
0
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
0
                        break;
943
0
                    state->ptr = ptr;
944
0
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
0
                            pattern+pattern[0]);
946
0
                    if (ret) {
947
0
                        if (state->repeat)
948
0
                            MARK_POP_DISCARD(ctx->lastmark);
949
0
                        RETURN_ON_ERROR(ret);
950
0
                        RETURN_SUCCESS;
951
0
                    }
952
0
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
0
                    LASTMARK_RESTORE();
955
956
0
                    ptr--;
957
0
                    ctx->count--;
958
0
                }
959
0
                if (state->repeat)
960
0
                    MARK_POP_DISCARD(ctx->lastmark);
961
2.14k
            } else {
962
                /* general case */
963
2.14k
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
2.14k
                    state->ptr = ptr;
965
2.14k
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
2.14k
                            pattern+pattern[0]);
967
2.14k
                    if (ret) {
968
2.14k
                        if (state->repeat)
969
0
                            MARK_POP_DISCARD(ctx->lastmark);
970
2.14k
                        RETURN_ON_ERROR(ret);
971
2.14k
                        RETURN_SUCCESS;
972
2.14k
                    }
973
0
                    if (state->repeat)
974
0
                        MARK_POP_KEEP(ctx->lastmark);
975
0
                    LASTMARK_RESTORE();
976
977
0
                    ptr--;
978
0
                    ctx->count--;
979
0
                }
980
0
                if (state->repeat)
981
0
                    MARK_POP_DISCARD(ctx->lastmark);
982
0
            }
983
0
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
0
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
0
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
0
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
0
            ctx->u.rep = repeat_pool_malloc(state);
1127
0
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
0
            ctx->u.rep->count = -1;
1131
0
            ctx->u.rep->pattern = pattern;
1132
0
            ctx->u.rep->prev = state->repeat;
1133
0
            ctx->u.rep->last_ptr = NULL;
1134
0
            state->repeat = ctx->u.rep;
1135
1136
0
            state->ptr = ptr;
1137
0
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
0
            state->repeat = ctx->u.rep->prev;
1139
0
            repeat_pool_free(state, ctx->u.rep);
1140
1141
0
            if (ret) {
1142
0
                RETURN_ON_ERROR(ret);
1143
0
                RETURN_SUCCESS;
1144
0
            }
1145
0
            RETURN_FAILURE;
1146
1147
0
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
0
            ctx->u.rep = state->repeat;
1155
0
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
0
            state->ptr = ptr;
1159
1160
0
            ctx->count = ctx->u.rep->count+1;
1161
1162
0
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
0
                   ptr, ctx->count));
1164
1165
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
0
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
0
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
0
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
0
                ctx->u.rep->count = ctx->count;
1185
0
                LASTMARK_SAVE();
1186
0
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
0
                LAST_PTR_PUSH();
1189
0
                ctx->u.rep->last_ptr = state->ptr;
1190
0
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
0
                        ctx->u.rep->pattern+3);
1192
0
                LAST_PTR_POP();
1193
0
                if (ret) {
1194
0
                    MARK_POP_DISCARD(ctx->lastmark);
1195
0
                    RETURN_ON_ERROR(ret);
1196
0
                    RETURN_SUCCESS;
1197
0
                }
1198
0
                MARK_POP(ctx->lastmark);
1199
0
                LASTMARK_RESTORE();
1200
0
                ctx->u.rep->count = ctx->count-1;
1201
0
                state->ptr = ptr;
1202
0
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
0
            state->repeat = ctx->u.rep->prev;
1207
0
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
0
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
0
            RETURN_ON_SUCCESS(ret);
1211
0
            state->ptr = ptr;
1212
0
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
0
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
0
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
0
                   ptr, pattern[1]));
1565
0
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
0
            state->ptr = ptr - pattern[1];
1568
0
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
0
            RETURN_ON_FAILURE(ret);
1570
0
            pattern += pattern[0];
1571
0
            DISPATCH;
1572
1573
0
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
0
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
0
                   ptr, pattern[1]));
1578
0
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
0
                state->ptr = ptr - pattern[1];
1580
0
                LASTMARK_SAVE();
1581
0
                if (state->repeat)
1582
0
                    MARK_PUSH(ctx->lastmark);
1583
1584
0
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
0
                if (ret) {
1586
0
                    if (state->repeat)
1587
0
                        MARK_POP_DISCARD(ctx->lastmark);
1588
0
                    RETURN_ON_ERROR(ret);
1589
0
                    RETURN_FAILURE;
1590
0
                }
1591
0
                if (state->repeat)
1592
0
                    MARK_POP(ctx->lastmark);
1593
0
                LASTMARK_RESTORE();
1594
0
            }
1595
0
            pattern += pattern[0];
1596
0
            DISPATCH;
1597
1598
0
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
4.35k
exit:
1620
4.35k
    ctx_pos = ctx->last_ctx_pos;
1621
4.35k
    jump = ctx->jump;
1622
4.35k
    DATA_POP_DISCARD(ctx);
1623
4.35k
    if (ctx_pos == -1) {
1624
2.21k
        state->sigcount = sigcount;
1625
2.21k
        return ret;
1626
2.21k
    }
1627
2.14k
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
2.14k
    switch (jump) {
1630
0
        case JUMP_MAX_UNTIL_2:
1631
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
0
            goto jump_max_until_2;
1633
0
        case JUMP_MAX_UNTIL_3:
1634
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
0
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
0
        case JUMP_BRANCH:
1643
0
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
0
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
0
        case JUMP_REPEAT:
1658
0
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
0
            goto jump_repeat;
1660
0
        case JUMP_REPEAT_ONE_1:
1661
0
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
0
            goto jump_repeat_one_1;
1663
2.14k
        case JUMP_REPEAT_ONE_2:
1664
2.14k
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
2.14k
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
0
        case JUMP_ASSERT:
1673
0
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
0
            goto jump_assert;
1675
0
        case JUMP_ASSERT_NOT:
1676
0
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
0
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
2.14k
    }
1683
1684
0
    return ret; /* should never get here */
1685
2.14k
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
0
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
0
{
1694
0
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
0
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
0
    Py_ssize_t status = 0;
1697
0
    Py_ssize_t prefix_len = 0;
1698
0
    Py_ssize_t prefix_skip = 0;
1699
0
    SRE_CODE* prefix = NULL;
1700
0
    SRE_CODE* charset = NULL;
1701
0
    SRE_CODE* overlap = NULL;
1702
0
    int flags = 0;
1703
0
    INIT_TRACE(state);
1704
1705
0
    if (ptr > end)
1706
0
        return 0;
1707
1708
0
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
0
        flags = pattern[2];
1713
1714
0
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
0
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
0
                   end - ptr, (size_t) pattern[3]));
1717
0
            return 0;
1718
0
        }
1719
0
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
0
            end -= pattern[3] - 1;
1723
0
            if (end <= ptr)
1724
0
                end = ptr;
1725
0
        }
1726
1727
0
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
0
            prefix_len = pattern[5];
1731
0
            prefix_skip = pattern[6];
1732
0
            prefix = pattern + 7;
1733
0
            overlap = prefix + prefix_len - 1;
1734
0
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
0
            charset = pattern + 5;
1738
1739
0
        pattern += 1 + pattern[1];
1740
0
    }
1741
1742
0
    TRACE(("prefix = %p %zd %zd\n",
1743
0
           prefix, prefix_len, prefix_skip));
1744
0
    TRACE(("charset = %p\n", charset));
1745
1746
0
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
0
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
0
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
0
#endif
1753
0
        end = (SRE_CHAR *)state->end;
1754
0
        state->must_advance = 0;
1755
0
        while (ptr < end) {
1756
0
            while (*ptr != c) {
1757
0
                if (++ptr >= end)
1758
0
                    return 0;
1759
0
            }
1760
0
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
0
            state->start = ptr;
1762
0
            state->ptr = ptr + prefix_skip;
1763
0
            if (flags & SRE_INFO_LITERAL)
1764
0
                return 1; /* we got all of it */
1765
0
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
0
            if (status != 0)
1767
0
                return status;
1768
0
            ++ptr;
1769
0
            RESET_CAPTURE_GROUP();
1770
0
        }
1771
0
        return 0;
1772
0
    }
1773
1774
0
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
0
        Py_ssize_t i = 0;
1778
1779
0
        end = (SRE_CHAR *)state->end;
1780
0
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
0
        for (i = 0; i < prefix_len; i++)
1784
0
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
0
#endif
1787
0
        while (ptr < end) {
1788
0
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
0
            while (*ptr++ != c) {
1790
0
                if (ptr >= end)
1791
0
                    return 0;
1792
0
            }
1793
0
            if (ptr >= end)
1794
0
                return 0;
1795
1796
0
            i = 1;
1797
0
            state->must_advance = 0;
1798
0
            do {
1799
0
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
0
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
0
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
0
                    state->start = ptr - (prefix_len - 1);
1808
0
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
0
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
0
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
0
                    if (status != 0)
1813
0
                        return status;
1814
                    /* close but no cigar -- try again */
1815
0
                    if (++ptr >= end)
1816
0
                        return 0;
1817
0
                    RESET_CAPTURE_GROUP();
1818
0
                }
1819
0
                i = overlap[i];
1820
0
            } while (i != 0);
1821
0
        }
1822
0
        return 0;
1823
0
    }
1824
1825
0
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
0
        end = (SRE_CHAR *)state->end;
1828
0
        state->must_advance = 0;
1829
0
        for (;;) {
1830
0
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
0
                ptr++;
1832
0
            if (ptr >= end)
1833
0
                return 0;
1834
0
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
0
            state->start = ptr;
1836
0
            state->ptr = ptr;
1837
0
            status = SRE(match)(state, pattern, 0);
1838
0
            if (status != 0)
1839
0
                break;
1840
0
            ptr++;
1841
0
            RESET_CAPTURE_GROUP();
1842
0
        }
1843
0
    } else {
1844
        /* general case */
1845
0
        assert(ptr <= end);
1846
0
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
0
        state->start = state->ptr = ptr;
1848
0
        status = SRE(match)(state, pattern, 1);
1849
0
        state->must_advance = 0;
1850
0
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
0
        while (status == 0 && ptr < end) {
1858
0
            ptr++;
1859
0
            RESET_CAPTURE_GROUP();
1860
0
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
0
            state->start = state->ptr = ptr;
1862
0
            status = SRE(match)(state, pattern, 0);
1863
0
        }
1864
0
    }
1865
1866
0
    return status;
1867
0
}
Unexecuted instantiation: sre.c:sre_ucs1_search
Unexecuted instantiation: sre.c:sre_ucs2_search
Unexecuted instantiation: sre.c:sre_ucs4_search
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/