Coverage Report

Created: 2026-01-13 06:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
368
{
18
    /* check if pointer is at given position */
19
20
368
    Py_ssize_t thisp, thatp;
21
22
368
    switch (at) {
23
24
296
    case SRE_AT_BEGINNING:
25
296
    case SRE_AT_BEGINNING_STRING:
26
296
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
54
    case SRE_AT_END:
33
54
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
15
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
53
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
12
    case SRE_AT_END_STRING:
42
12
        return ((void*) ptr == state->end);
43
44
6
    case SRE_AT_BOUNDARY:
45
6
        thatp = ((void*) ptr > state->beginning) ?
46
6
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
6
        thisp = ((void*) ptr < state->end) ?
48
5
            SRE_IS_WORD((int) ptr[0]) : 0;
49
6
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
368
    }
87
88
0
    return 0;
89
368
}
sre.c:sre_ucs1_at
Line
Count
Source
17
368
{
18
    /* check if pointer is at given position */
19
20
368
    Py_ssize_t thisp, thatp;
21
22
368
    switch (at) {
23
24
296
    case SRE_AT_BEGINNING:
25
296
    case SRE_AT_BEGINNING_STRING:
26
296
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
54
    case SRE_AT_END:
33
54
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
15
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
53
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
12
    case SRE_AT_END_STRING:
42
12
        return ((void*) ptr == state->end);
43
44
6
    case SRE_AT_BOUNDARY:
45
6
        thatp = ((void*) ptr > state->beginning) ?
46
6
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
6
        thisp = ((void*) ptr < state->end) ?
48
5
            SRE_IS_WORD((int) ptr[0]) : 0;
49
6
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
368
    }
87
88
0
    return 0;
89
368
}
Unexecuted instantiation: sre.c:sre_ucs2_at
Unexecuted instantiation: sre.c:sre_ucs4_at
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
9.45M
{
94
    /* check if character is a member of the given set */
95
96
9.45M
    int ok = 1;
97
98
11.1M
    for (;;) {
99
11.1M
        switch (*set++) {
100
101
1.56M
        case SRE_OP_FAILURE:
102
1.56M
            return !ok;
103
104
1.49M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.49M
            if (ch == set[0])
107
1.39M
                return ok;
108
91.8k
            set++;
109
91.8k
            break;
110
111
1.55M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
1.55M
            if (sre_category(set[0], (int) ch))
114
1.48k
                return ok;
115
1.55M
            set++;
116
1.55M
            break;
117
118
6.49M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
6.49M
            if (ch < 256 &&
121
6.49M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
6.48M
                return ok;
123
8.64k
            set += 256/SRE_CODE_BITS;
124
8.64k
            break;
125
126
6
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
6
            if (set[0] <= ch && ch <= set[1])
129
1
                return ok;
130
5
            set += 2;
131
5
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
6
        case SRE_OP_NEGATE:
148
6
            ok = !ok;
149
6
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
11.1M
        }
175
11.1M
    }
176
9.45M
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
8.75M
{
94
    /* check if character is a member of the given set */
95
96
8.75M
    int ok = 1;
97
98
10.3M
    for (;;) {
99
10.3M
        switch (*set++) {
100
101
1.55M
        case SRE_OP_FAILURE:
102
1.55M
            return !ok;
103
104
1.49M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.49M
            if (ch == set[0])
107
1.39M
                return ok;
108
91.8k
            set++;
109
91.8k
            break;
110
111
1.55M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
1.55M
            if (sre_category(set[0], (int) ch))
114
1.48k
                return ok;
115
1.55M
            set++;
116
1.55M
            break;
117
118
5.79M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
5.79M
            if (ch < 256 &&
121
5.79M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
5.79M
                return ok;
123
4.75k
            set += 256/SRE_CODE_BITS;
124
4.75k
            break;
125
126
6
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
6
            if (set[0] <= ch && ch <= set[1])
129
1
                return ok;
130
5
            set += 2;
131
5
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
6
        case SRE_OP_NEGATE:
148
6
            ok = !ok;
149
6
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
10.3M
        }
175
10.3M
    }
176
8.75M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
519k
{
94
    /* check if character is a member of the given set */
95
96
519k
    int ok = 1;
97
98
521k
    for (;;) {
99
521k
        switch (*set++) {
100
101
1.72k
        case SRE_OP_FAILURE:
102
1.72k
            return !ok;
103
104
0
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
0
            if (ch == set[0])
107
0
                return ok;
108
0
            set++;
109
0
            break;
110
111
0
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
0
            if (sre_category(set[0], (int) ch))
114
0
                return ok;
115
0
            set++;
116
0
            break;
117
118
519k
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
519k
            if (ch < 256 &&
121
519k
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
518k
                return ok;
123
1.72k
            set += 256/SRE_CODE_BITS;
124
1.72k
            break;
125
126
0
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
0
            if (set[0] <= ch && ch <= set[1])
129
0
                return ok;
130
0
            set += 2;
131
0
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
0
        case SRE_OP_NEGATE:
148
0
            ok = !ok;
149
0
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
521k
        }
175
521k
    }
176
519k
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
180k
{
94
    /* check if character is a member of the given set */
95
96
180k
    int ok = 1;
97
98
182k
    for (;;) {
99
182k
        switch (*set++) {
100
101
2.16k
        case SRE_OP_FAILURE:
102
2.16k
            return !ok;
103
104
0
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
0
            if (ch == set[0])
107
0
                return ok;
108
0
            set++;
109
0
            break;
110
111
0
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
0
            if (sre_category(set[0], (int) ch))
114
0
                return ok;
115
0
            set++;
116
0
            break;
117
118
180k
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
180k
            if (ch < 256 &&
121
180k
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
178k
                return ok;
123
2.16k
            set += 256/SRE_CODE_BITS;
124
2.16k
            break;
125
126
0
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
0
            if (set[0] <= ch && ch <= set[1])
129
0
                return ok;
130
0
            set += 2;
131
0
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
0
        case SRE_OP_NEGATE:
148
0
            ok = !ok;
149
0
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
182k
        }
175
182k
    }
176
180k
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
1.56M
{
195
1.56M
    SRE_CODE chr;
196
1.56M
    SRE_CHAR c;
197
1.56M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
1.56M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
1.56M
    Py_ssize_t i;
200
1.56M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
1.56M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
1.55M
        end = ptr + maxcount;
205
206
1.56M
    switch (pattern[0]) {
207
208
1.56M
    case SRE_OP_IN:
209
        /* repeated set */
210
1.56M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
8.05M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
6.49M
            ptr++;
213
1.56M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
640
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
640
        chr = pattern[1];
232
640
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
640
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
640
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
640
        else
238
640
#endif
239
408k
        while (ptr < end && *ptr == c)
240
408k
            ptr++;
241
640
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
0
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
0
        chr = pattern[1];
270
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
0
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
0
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
0
        else
276
0
#endif
277
0
        while (ptr < end && *ptr != c)
278
0
            ptr++;
279
0
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
1.56M
    }
319
320
1.56M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
1.56M
           ptr - (SRE_CHAR*) state->ptr));
322
1.56M
    return ptr - (SRE_CHAR*) state->ptr;
323
1.56M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
1.55M
{
195
1.55M
    SRE_CODE chr;
196
1.55M
    SRE_CHAR c;
197
1.55M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
1.55M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
1.55M
    Py_ssize_t i;
200
1.55M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
1.55M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
1.55M
        end = ptr + maxcount;
205
206
1.55M
    switch (pattern[0]) {
207
208
1.55M
    case SRE_OP_IN:
209
        /* repeated set */
210
1.55M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
7.35M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
5.79M
            ptr++;
213
1.55M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
640
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
640
        chr = pattern[1];
232
640
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
640
        c = (SRE_CHAR) chr;
234
640
#if SIZEOF_SRE_CHAR < 4
235
640
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
640
        else
238
640
#endif
239
408k
        while (ptr < end && *ptr == c)
240
408k
            ptr++;
241
640
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
0
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
0
        chr = pattern[1];
270
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
0
        c = (SRE_CHAR) chr;
272
0
#if SIZEOF_SRE_CHAR < 4
273
0
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
0
        else
276
0
#endif
277
0
        while (ptr < end && *ptr != c)
278
0
            ptr++;
279
0
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
1.55M
    }
319
320
1.55M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
1.55M
           ptr - (SRE_CHAR*) state->ptr));
322
1.55M
    return ptr - (SRE_CHAR*) state->ptr;
323
1.55M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
1.76k
{
195
1.76k
    SRE_CODE chr;
196
1.76k
    SRE_CHAR c;
197
1.76k
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
1.76k
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
1.76k
    Py_ssize_t i;
200
1.76k
    INIT_TRACE(state);
201
202
    /* adjust end */
203
1.76k
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
0
        end = ptr + maxcount;
205
206
1.76k
    switch (pattern[0]) {
207
208
1.76k
    case SRE_OP_IN:
209
        /* repeated set */
210
1.76k
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
519k
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
518k
            ptr++;
213
1.76k
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
0
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
0
        chr = pattern[1];
232
0
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
0
        c = (SRE_CHAR) chr;
234
0
#if SIZEOF_SRE_CHAR < 4
235
0
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
0
        else
238
0
#endif
239
0
        while (ptr < end && *ptr == c)
240
0
            ptr++;
241
0
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
0
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
0
        chr = pattern[1];
270
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
0
        c = (SRE_CHAR) chr;
272
0
#if SIZEOF_SRE_CHAR < 4
273
0
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
0
        else
276
0
#endif
277
0
        while (ptr < end && *ptr != c)
278
0
            ptr++;
279
0
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
1.76k
    }
319
320
1.76k
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
1.76k
           ptr - (SRE_CHAR*) state->ptr));
322
1.76k
    return ptr - (SRE_CHAR*) state->ptr;
323
1.76k
}
sre.c:sre_ucs4_count
Line
Count
Source
194
2.24k
{
195
2.24k
    SRE_CODE chr;
196
2.24k
    SRE_CHAR c;
197
2.24k
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
2.24k
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
2.24k
    Py_ssize_t i;
200
2.24k
    INIT_TRACE(state);
201
202
    /* adjust end */
203
2.24k
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
0
        end = ptr + maxcount;
205
206
2.24k
    switch (pattern[0]) {
207
208
2.24k
    case SRE_OP_IN:
209
        /* repeated set */
210
2.24k
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
180k
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
178k
            ptr++;
213
2.24k
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
0
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
0
        chr = pattern[1];
232
0
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
0
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
0
        while (ptr < end && *ptr == c)
240
0
            ptr++;
241
0
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
0
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
0
        chr = pattern[1];
270
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
0
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
0
        while (ptr < end && *ptr != c)
278
0
            ptr++;
279
0
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
2.24k
    }
319
320
2.24k
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
2.24k
           ptr - (SRE_CHAR*) state->ptr));
322
2.24k
    return ptr - (SRE_CHAR*) state->ptr;
323
2.24k
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
1.41M
    do { \
355
1.41M
        ctx->lastmark = state->lastmark; \
356
1.41M
        ctx->lastindex = state->lastindex; \
357
1.41M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
1.55M
    do { \
360
1.55M
        state->lastmark = ctx->lastmark; \
361
1.55M
        state->lastindex = ctx->lastindex; \
362
1.55M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
1.40M
    do { \
366
1.40M
        TRACE(("push last_ptr: %zd", \
367
1.40M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
1.40M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
1.40M
    } while (0)
370
#define LAST_PTR_POP()  \
371
1.40M
    do { \
372
1.40M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
1.40M
        TRACE(("pop last_ptr: %zd", \
374
1.40M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
1.40M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
2.93M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
50.8k
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
2.97M
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
1.37M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
14
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
2.98M
#define DATA_STACK_ALLOC(state, type, ptr) \
389
2.98M
do { \
390
2.98M
    alloc_pos = state->data_stack_base; \
391
2.98M
    TRACE(("allocating %s in %zd (%zd)\n", \
392
2.98M
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
2.98M
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
12.5k
        int j = data_stack_grow(state, sizeof(type)); \
395
12.5k
        if (j < 0) return j; \
396
12.5k
        if (ctx_pos != -1) \
397
12.5k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
12.5k
    } \
399
2.98M
    ptr = (type*)(state->data_stack+alloc_pos); \
400
2.98M
    state->data_stack_base += sizeof(type); \
401
2.98M
} while (0)
402
403
4.53M
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
4.53M
do { \
405
4.53M
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
4.53M
    ptr = (type*)(state->data_stack+pos); \
407
4.53M
} while (0)
408
409
2.80M
#define DATA_STACK_PUSH(state, data, size) \
410
2.80M
do { \
411
2.80M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
2.80M
           data, state->data_stack_base, size)); \
413
2.80M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
666
        int j = data_stack_grow(state, size); \
415
666
        if (j < 0) return j; \
416
666
        if (ctx_pos != -1) \
417
666
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
666
    } \
419
2.80M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
2.80M
    state->data_stack_base += size; \
421
2.80M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
2.77M
#define DATA_STACK_POP(state, data, size, discard) \
427
2.77M
do { \
428
2.77M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
2.77M
           data, state->data_stack_base-size, size)); \
430
2.77M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
2.77M
    if (discard) \
432
2.77M
        state->data_stack_base -= size; \
433
2.77M
} while (0)
434
435
3.01M
#define DATA_STACK_POP_DISCARD(state, size) \
436
3.01M
do { \
437
3.01M
    TRACE(("discard data from %zd (%zd)\n", \
438
3.01M
           state->data_stack_base-size, size)); \
439
3.01M
    state->data_stack_base -= size; \
440
3.01M
} while(0)
441
442
#define DATA_PUSH(x) \
443
1.40M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
1.40M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
2.98M
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
2.98M
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
4.53M
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
1.40M
    do if (lastmark >= 0) { \
473
1.39M
        MARK_TRACE("push", (lastmark)); \
474
1.39M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
1.39M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
1.40M
    } while (0)
477
#define MARK_POP(lastmark) \
478
1.37M
    do if (lastmark >= 0) { \
479
1.36M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
1.36M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
1.36M
        MARK_TRACE("pop", (lastmark)); \
482
1.37M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
0
    do if (lastmark >= 0) { \
485
0
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
0
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
0
        MARK_TRACE("pop keep", (lastmark)); \
488
0
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
31.8k
    do if (lastmark >= 0) { \
491
31.7k
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
31.7k
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
31.7k
        MARK_TRACE("pop discard", (lastmark)); \
494
31.8k
    } while (0)
495
496
10.3k
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
1.40M
#define JUMP_MAX_UNTIL_2     2
499
1.37M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
4.32k
#define JUMP_REPEAT          7
504
1
#define JUMP_REPEAT_ONE_1    8
505
192k
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
30
#define JUMP_BRANCH          11
508
14
#define JUMP_ASSERT          12
509
0
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
2.97M
    ctx->pattern = pattern; \
516
2.97M
    ctx->ptr = ptr; \
517
2.97M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
2.97M
    nextctx->pattern = nextpattern; \
519
2.97M
    nextctx->toplevel = toplevel_; \
520
2.97M
    nextctx->jump = jumpvalue; \
521
2.97M
    nextctx->last_ctx_pos = ctx_pos; \
522
2.97M
    pattern = nextpattern; \
523
2.97M
    ctx_pos = alloc_pos; \
524
2.97M
    ctx = nextctx; \
525
2.97M
    goto entrance; \
526
2.97M
    jumplabel: \
527
2.97M
    pattern = ctx->pattern; \
528
2.97M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
2.97M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
14
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
7.18M
    do {                                                           \
553
7.18M
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
7.18M
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
7.18M
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
7.18M
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
7.18M
        do {                               \
588
7.18M
            MAYBE_CHECK_SIGNALS;           \
589
7.18M
            goto *sre_targets[*pattern++]; \
590
7.18M
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
10.3k
{
601
10.3k
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
10.3k
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
10.3k
    Py_ssize_t ret = 0;
604
10.3k
    int jump;
605
10.3k
    unsigned int sigcount = state->sigcount;
606
607
10.3k
    SRE(match_context)* ctx;
608
10.3k
    SRE(match_context)* nextctx;
609
10.3k
    INIT_TRACE(state);
610
611
10.3k
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
10.3k
    DATA_ALLOC(SRE(match_context), ctx);
614
10.3k
    ctx->last_ctx_pos = -1;
615
10.3k
    ctx->jump = JUMP_NONE;
616
10.3k
    ctx->toplevel = toplevel;
617
10.3k
    ctx_pos = alloc_pos;
618
619
10.3k
#if USE_COMPUTED_GOTOS
620
10.3k
#include "sre_targets.h"
621
10.3k
#endif
622
623
2.98M
entrance:
624
625
2.98M
    ;  // Fashion statement.
626
2.98M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
2.98M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
10.3k
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
2
            TRACE(("reject (got %tu chars, need %zu)\n",
633
2
                   end - ptr, (size_t) pattern[3]));
634
2
            RETURN_FAILURE;
635
2
        }
636
10.3k
        pattern += pattern[1] + 1;
637
10.3k
    }
638
639
2.98M
#if USE_COMPUTED_GOTOS
640
2.98M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
2.98M
    {
647
648
2.98M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
2.80M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
2.80M
                   ptr, pattern[0]));
653
2.80M
            {
654
2.80M
                int i = pattern[0];
655
2.80M
                if (i & 1)
656
1.39M
                    state->lastindex = i/2 + 1;
657
2.80M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
4.70k
                    int j = state->lastmark + 1;
663
4.94k
                    while (j < i)
664
238
                        state->mark[j++] = NULL;
665
4.70k
                    state->lastmark = i;
666
4.70k
                }
667
2.80M
                state->mark[i] = ptr;
668
2.80M
            }
669
2.80M
            pattern++;
670
2.80M
            DISPATCH;
671
672
2.80M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
52
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
52
                   ptr, *pattern));
677
52
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
7
                RETURN_FAILURE;
679
45
            pattern++;
680
45
            ptr++;
681
45
            DISPATCH;
682
683
45
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
8.72k
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
8.72k
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
8.72k
            if (ctx->toplevel &&
698
8.72k
                ((state->match_all && ptr != state->end) ||
699
8.72k
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
8.72k
            state->ptr = ptr;
704
8.72k
            RETURN_SUCCESS;
705
706
368
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
368
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
368
            if (!SRE(at)(state, ptr, *pattern))
711
31
                RETURN_FAILURE;
712
337
            pattern++;
713
337
            DISPATCH;
714
715
337
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
2
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
2
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
2
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
1
                RETURN_FAILURE;
732
1
            ptr++;
733
1
            DISPATCH;
734
735
1
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
1.40M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
1.40M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
1.40M
            if (ptr >= end ||
749
1.40M
                !SRE(charset)(state, pattern + 1, *ptr))
750
4.29k
                RETURN_FAILURE;
751
1.39M
            pattern += pattern[0];
752
1.39M
            ptr++;
753
1.39M
            DISPATCH;
754
755
1.39M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
0
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
0
                   pattern, ptr, pattern[0]));
758
0
            if (ptr >= end ||
759
0
                sre_lower_ascii(*ptr) != *pattern)
760
0
                RETURN_FAILURE;
761
0
            pattern++;
762
0
            ptr++;
763
0
            DISPATCH;
764
765
28
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
24
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
24
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
24
            if (ptr >= end
828
16
                || !SRE(charset)(state, pattern+1,
829
16
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
8
            pattern += pattern[0];
832
8
            ptr++;
833
8
            DISPATCH;
834
835
8
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
13
        TARGET(SRE_OP_JUMP):
845
13
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
13
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
13
                   ptr, pattern[0]));
850
13
            pattern += pattern[0];
851
13
            DISPATCH;
852
853
32
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
32
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
32
            LASTMARK_SAVE();
858
32
            if (state->repeat)
859
0
                MARK_PUSH(ctx->lastmark);
860
86
            for (; pattern[0]; pattern += pattern[0]) {
861
67
                if (pattern[1] == SRE_OP_LITERAL &&
862
27
                    (ptr >= end ||
863
27
                     (SRE_CODE) *ptr != pattern[2]))
864
25
                    continue;
865
42
                if (pattern[1] == SRE_OP_IN &&
866
12
                    (ptr >= end ||
867
12
                     !SRE(charset)(state, pattern + 3,
868
12
                                   (SRE_CODE) *ptr)))
869
12
                    continue;
870
30
                state->ptr = ptr;
871
30
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
30
                if (ret) {
873
13
                    if (state->repeat)
874
0
                        MARK_POP_DISCARD(ctx->lastmark);
875
13
                    RETURN_ON_ERROR(ret);
876
13
                    RETURN_SUCCESS;
877
13
                }
878
17
                if (state->repeat)
879
0
                    MARK_POP_KEEP(ctx->lastmark);
880
17
                LASTMARK_RESTORE();
881
17
            }
882
19
            if (state->repeat)
883
0
                MARK_POP_DISCARD(ctx->lastmark);
884
19
            RETURN_FAILURE;
885
886
1.56M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
1.56M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
1.56M
                   pattern[1], pattern[2]));
898
899
1.56M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
646
                RETURN_FAILURE; /* cannot match */
901
902
1.56M
            state->ptr = ptr;
903
904
1.56M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
1.56M
            RETURN_ON_ERROR(ret);
906
1.56M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
1.56M
            ctx->count = ret;
908
1.56M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
1.56M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
1.55M
                RETURN_FAILURE;
917
918
11.0k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
9.92k
                ptr == state->end &&
920
1.28k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
1.28k
            {
922
                /* tail is empty.  we're finished */
923
1.28k
                state->ptr = ptr;
924
1.28k
                RETURN_SUCCESS;
925
1.28k
            }
926
927
9.73k
            LASTMARK_SAVE();
928
9.73k
            if (state->repeat)
929
40
                MARK_PUSH(ctx->lastmark);
930
931
9.73k
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
33
                ctx->u.chr = pattern[pattern[0]+1];
935
33
                for (;;) {
936
197k
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
197k
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
197k
                        ptr--;
939
197k
                        ctx->count--;
940
197k
                    }
941
33
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
32
                        break;
943
1
                    state->ptr = ptr;
944
1
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
1
                            pattern+pattern[0]);
946
1
                    if (ret) {
947
1
                        if (state->repeat)
948
0
                            MARK_POP_DISCARD(ctx->lastmark);
949
1
                        RETURN_ON_ERROR(ret);
950
1
                        RETURN_SUCCESS;
951
1
                    }
952
0
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
0
                    LASTMARK_RESTORE();
955
956
0
                    ptr--;
957
0
                    ctx->count--;
958
0
                }
959
32
                if (state->repeat)
960
0
                    MARK_POP_DISCARD(ctx->lastmark);
961
9.70k
            } else {
962
                /* general case */
963
193k
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
192k
                    state->ptr = ptr;
965
192k
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
192k
                            pattern+pattern[0]);
967
192k
                    if (ret) {
968
8.81k
                        if (state->repeat)
969
40
                            MARK_POP_DISCARD(ctx->lastmark);
970
8.81k
                        RETURN_ON_ERROR(ret);
971
8.81k
                        RETURN_SUCCESS;
972
8.81k
                    }
973
183k
                    if (state->repeat)
974
0
                        MARK_POP_KEEP(ctx->lastmark);
975
183k
                    LASTMARK_RESTORE();
976
977
183k
                    ptr--;
978
183k
                    ctx->count--;
979
183k
                }
980
890
                if (state->repeat)
981
0
                    MARK_POP_DISCARD(ctx->lastmark);
982
890
            }
983
922
            RETURN_FAILURE;
984
985
32
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
32
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
32
                   pattern[1], pattern[2]));
997
998
32
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
32
            state->ptr = ptr;
1002
1003
32
            if (pattern[1] == 0)
1004
10
                ctx->count = 0;
1005
22
            else {
1006
                /* count using pattern min as the maximum */
1007
22
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
22
                RETURN_ON_ERROR(ret);
1009
22
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
22
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
20
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
2
                ctx->count = ret;
1015
2
                ptr += ctx->count;
1016
2
            }
1017
1018
12
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
12
                !(ctx->toplevel &&
1020
12
                  ((state->match_all && ptr != state->end) ||
1021
12
                   (state->must_advance && ptr == state->start))))
1022
12
            {
1023
                /* tail is empty.  we're finished */
1024
12
                state->ptr = ptr;
1025
12
                RETURN_SUCCESS;
1026
1027
12
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
4.32k
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
4.32k
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
4.32k
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
4.32k
            ctx->u.rep = repeat_pool_malloc(state);
1127
4.32k
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
4.32k
            ctx->u.rep->count = -1;
1131
4.32k
            ctx->u.rep->pattern = pattern;
1132
4.32k
            ctx->u.rep->prev = state->repeat;
1133
4.32k
            ctx->u.rep->last_ptr = NULL;
1134
4.32k
            state->repeat = ctx->u.rep;
1135
1136
4.32k
            state->ptr = ptr;
1137
4.32k
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
4.32k
            state->repeat = ctx->u.rep->prev;
1139
4.32k
            repeat_pool_free(state, ctx->u.rep);
1140
1141
4.32k
            if (ret) {
1142
97
                RETURN_ON_ERROR(ret);
1143
97
                RETURN_SUCCESS;
1144
97
            }
1145
4.23k
            RETURN_FAILURE;
1146
1147
1.40M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
1.40M
            ctx->u.rep = state->repeat;
1155
1.40M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
1.40M
            state->ptr = ptr;
1159
1160
1.40M
            ctx->count = ctx->u.rep->count+1;
1161
1162
1.40M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
1.40M
                   ptr, ctx->count));
1164
1165
1.40M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
1.40M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
1.40M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
1.40M
                ctx->u.rep->count = ctx->count;
1185
1.40M
                LASTMARK_SAVE();
1186
1.40M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
1.40M
                LAST_PTR_PUSH();
1189
1.40M
                ctx->u.rep->last_ptr = state->ptr;
1190
1.40M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
1.40M
                        ctx->u.rep->pattern+3);
1192
1.40M
                LAST_PTR_POP();
1193
1.40M
                if (ret) {
1194
31.8k
                    MARK_POP_DISCARD(ctx->lastmark);
1195
31.8k
                    RETURN_ON_ERROR(ret);
1196
31.8k
                    RETURN_SUCCESS;
1197
31.8k
                }
1198
1.37M
                MARK_POP(ctx->lastmark);
1199
1.37M
                LASTMARK_RESTORE();
1200
1.37M
                ctx->u.rep->count = ctx->count-1;
1201
1.37M
                state->ptr = ptr;
1202
1.37M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
1.37M
            state->repeat = ctx->u.rep->prev;
1207
1.37M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
1.37M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
1.37M
            RETURN_ON_SUCCESS(ret);
1211
1.37M
            state->ptr = ptr;
1212
1.37M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
14
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
14
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
14
                   ptr, pattern[1]));
1565
14
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
14
            state->ptr = ptr - pattern[1];
1568
14
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
14
            RETURN_ON_FAILURE(ret);
1570
2
            pattern += pattern[0];
1571
2
            DISPATCH;
1572
1573
2
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
0
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
0
                   ptr, pattern[1]));
1578
0
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
0
                state->ptr = ptr - pattern[1];
1580
0
                LASTMARK_SAVE();
1581
0
                if (state->repeat)
1582
0
                    MARK_PUSH(ctx->lastmark);
1583
1584
0
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
0
                if (ret) {
1586
0
                    if (state->repeat)
1587
0
                        MARK_POP_DISCARD(ctx->lastmark);
1588
0
                    RETURN_ON_ERROR(ret);
1589
0
                    RETURN_FAILURE;
1590
0
                }
1591
0
                if (state->repeat)
1592
0
                    MARK_POP(ctx->lastmark);
1593
0
                LASTMARK_RESTORE();
1594
0
            }
1595
0
            pattern += pattern[0];
1596
0
            DISPATCH;
1597
1598
0
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
2.98M
exit:
1620
2.98M
    ctx_pos = ctx->last_ctx_pos;
1621
2.98M
    jump = ctx->jump;
1622
2.98M
    DATA_POP_DISCARD(ctx);
1623
2.98M
    if (ctx_pos == -1) {
1624
10.3k
        state->sigcount = sigcount;
1625
10.3k
        return ret;
1626
10.3k
    }
1627
2.97M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
2.97M
    switch (jump) {
1630
1.40M
        case JUMP_MAX_UNTIL_2:
1631
1.40M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
1.40M
            goto jump_max_until_2;
1633
1.37M
        case JUMP_MAX_UNTIL_3:
1634
1.37M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
1.37M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
30
        case JUMP_BRANCH:
1643
30
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
30
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
4.32k
        case JUMP_REPEAT:
1658
4.32k
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
4.32k
            goto jump_repeat;
1660
1
        case JUMP_REPEAT_ONE_1:
1661
1
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
1
            goto jump_repeat_one_1;
1663
192k
        case JUMP_REPEAT_ONE_2:
1664
192k
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
192k
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
14
        case JUMP_ASSERT:
1673
14
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
14
            goto jump_assert;
1675
0
        case JUMP_ASSERT_NOT:
1676
0
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
0
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
2.97M
    }
1683
1684
0
    return ret; /* should never get here */
1685
2.97M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
6.35k
{
601
6.35k
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
6.35k
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
6.35k
    Py_ssize_t ret = 0;
604
6.35k
    int jump;
605
6.35k
    unsigned int sigcount = state->sigcount;
606
607
6.35k
    SRE(match_context)* ctx;
608
6.35k
    SRE(match_context)* nextctx;
609
6.35k
    INIT_TRACE(state);
610
611
6.35k
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
6.35k
    DATA_ALLOC(SRE(match_context), ctx);
614
6.35k
    ctx->last_ctx_pos = -1;
615
6.35k
    ctx->jump = JUMP_NONE;
616
6.35k
    ctx->toplevel = toplevel;
617
6.35k
    ctx_pos = alloc_pos;
618
619
6.35k
#if USE_COMPUTED_GOTOS
620
6.35k
#include "sre_targets.h"
621
6.35k
#endif
622
623
2.97M
entrance:
624
625
2.97M
    ;  // Fashion statement.
626
2.97M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
2.97M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
6.35k
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
2
            TRACE(("reject (got %tu chars, need %zu)\n",
633
2
                   end - ptr, (size_t) pattern[3]));
634
2
            RETURN_FAILURE;
635
2
        }
636
6.35k
        pattern += pattern[1] + 1;
637
6.35k
    }
638
639
2.97M
#if USE_COMPUTED_GOTOS
640
2.97M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
2.97M
    {
647
648
2.97M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
2.80M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
2.80M
                   ptr, pattern[0]));
653
2.80M
            {
654
2.80M
                int i = pattern[0];
655
2.80M
                if (i & 1)
656
1.39M
                    state->lastindex = i/2 + 1;
657
2.80M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
4.70k
                    int j = state->lastmark + 1;
663
4.94k
                    while (j < i)
664
238
                        state->mark[j++] = NULL;
665
4.70k
                    state->lastmark = i;
666
4.70k
                }
667
2.80M
                state->mark[i] = ptr;
668
2.80M
            }
669
2.80M
            pattern++;
670
2.80M
            DISPATCH;
671
672
2.80M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
52
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
52
                   ptr, *pattern));
677
52
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
7
                RETURN_FAILURE;
679
45
            pattern++;
680
45
            ptr++;
681
45
            DISPATCH;
682
683
45
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
4.83k
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
4.83k
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
4.83k
            if (ctx->toplevel &&
698
4.83k
                ((state->match_all && ptr != state->end) ||
699
4.83k
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
4.83k
            state->ptr = ptr;
704
4.83k
            RETURN_SUCCESS;
705
706
368
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
368
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
368
            if (!SRE(at)(state, ptr, *pattern))
711
31
                RETURN_FAILURE;
712
337
            pattern++;
713
337
            DISPATCH;
714
715
337
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
2
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
2
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
2
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
1
                RETURN_FAILURE;
732
1
            ptr++;
733
1
            DISPATCH;
734
735
1
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
1.40M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
1.40M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
1.40M
            if (ptr >= end ||
749
1.40M
                !SRE(charset)(state, pattern + 1, *ptr))
750
4.29k
                RETURN_FAILURE;
751
1.39M
            pattern += pattern[0];
752
1.39M
            ptr++;
753
1.39M
            DISPATCH;
754
755
1.39M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
0
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
0
                   pattern, ptr, pattern[0]));
758
0
            if (ptr >= end ||
759
0
                sre_lower_ascii(*ptr) != *pattern)
760
0
                RETURN_FAILURE;
761
0
            pattern++;
762
0
            ptr++;
763
0
            DISPATCH;
764
765
28
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
24
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
24
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
24
            if (ptr >= end
828
16
                || !SRE(charset)(state, pattern+1,
829
16
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
8
            pattern += pattern[0];
832
8
            ptr++;
833
8
            DISPATCH;
834
835
8
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
13
        TARGET(SRE_OP_JUMP):
845
13
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
13
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
13
                   ptr, pattern[0]));
850
13
            pattern += pattern[0];
851
13
            DISPATCH;
852
853
32
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
32
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
32
            LASTMARK_SAVE();
858
32
            if (state->repeat)
859
0
                MARK_PUSH(ctx->lastmark);
860
86
            for (; pattern[0]; pattern += pattern[0]) {
861
67
                if (pattern[1] == SRE_OP_LITERAL &&
862
27
                    (ptr >= end ||
863
27
                     (SRE_CODE) *ptr != pattern[2]))
864
25
                    continue;
865
42
                if (pattern[1] == SRE_OP_IN &&
866
12
                    (ptr >= end ||
867
12
                     !SRE(charset)(state, pattern + 3,
868
12
                                   (SRE_CODE) *ptr)))
869
12
                    continue;
870
30
                state->ptr = ptr;
871
30
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
30
                if (ret) {
873
13
                    if (state->repeat)
874
0
                        MARK_POP_DISCARD(ctx->lastmark);
875
13
                    RETURN_ON_ERROR(ret);
876
13
                    RETURN_SUCCESS;
877
13
                }
878
17
                if (state->repeat)
879
0
                    MARK_POP_KEEP(ctx->lastmark);
880
17
                LASTMARK_RESTORE();
881
17
            }
882
19
            if (state->repeat)
883
0
                MARK_POP_DISCARD(ctx->lastmark);
884
19
            RETURN_FAILURE;
885
886
1.55M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
1.55M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
1.55M
                   pattern[1], pattern[2]));
898
899
1.55M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
646
                RETURN_FAILURE; /* cannot match */
901
902
1.55M
            state->ptr = ptr;
903
904
1.55M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
1.55M
            RETURN_ON_ERROR(ret);
906
1.55M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
1.55M
            ctx->count = ret;
908
1.55M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
1.55M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
1.55M
                RETURN_FAILURE;
917
918
7.01k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.91k
                ptr == state->end &&
920
1.16k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
1.16k
            {
922
                /* tail is empty.  we're finished */
923
1.16k
                state->ptr = ptr;
924
1.16k
                RETURN_SUCCESS;
925
1.16k
            }
926
927
5.84k
            LASTMARK_SAVE();
928
5.84k
            if (state->repeat)
929
40
                MARK_PUSH(ctx->lastmark);
930
931
5.84k
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
33
                ctx->u.chr = pattern[pattern[0]+1];
935
33
                for (;;) {
936
197k
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
197k
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
197k
                        ptr--;
939
197k
                        ctx->count--;
940
197k
                    }
941
33
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
32
                        break;
943
1
                    state->ptr = ptr;
944
1
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
1
                            pattern+pattern[0]);
946
1
                    if (ret) {
947
1
                        if (state->repeat)
948
0
                            MARK_POP_DISCARD(ctx->lastmark);
949
1
                        RETURN_ON_ERROR(ret);
950
1
                        RETURN_SUCCESS;
951
1
                    }
952
0
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
0
                    LASTMARK_RESTORE();
955
956
0
                    ptr--;
957
0
                    ctx->count--;
958
0
                }
959
32
                if (state->repeat)
960
0
                    MARK_POP_DISCARD(ctx->lastmark);
961
5.81k
            } else {
962
                /* general case */
963
189k
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
188k
                    state->ptr = ptr;
965
188k
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
188k
                            pattern+pattern[0]);
967
188k
                    if (ret) {
968
4.92k
                        if (state->repeat)
969
40
                            MARK_POP_DISCARD(ctx->lastmark);
970
4.92k
                        RETURN_ON_ERROR(ret);
971
4.92k
                        RETURN_SUCCESS;
972
4.92k
                    }
973
183k
                    if (state->repeat)
974
0
                        MARK_POP_KEEP(ctx->lastmark);
975
183k
                    LASTMARK_RESTORE();
976
977
183k
                    ptr--;
978
183k
                    ctx->count--;
979
183k
                }
980
890
                if (state->repeat)
981
0
                    MARK_POP_DISCARD(ctx->lastmark);
982
890
            }
983
922
            RETURN_FAILURE;
984
985
32
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
32
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
32
                   pattern[1], pattern[2]));
997
998
32
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
32
            state->ptr = ptr;
1002
1003
32
            if (pattern[1] == 0)
1004
10
                ctx->count = 0;
1005
22
            else {
1006
                /* count using pattern min as the maximum */
1007
22
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
22
                RETURN_ON_ERROR(ret);
1009
22
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
22
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
20
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
2
                ctx->count = ret;
1015
2
                ptr += ctx->count;
1016
2
            }
1017
1018
12
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
12
                !(ctx->toplevel &&
1020
12
                  ((state->match_all && ptr != state->end) ||
1021
12
                   (state->must_advance && ptr == state->start))))
1022
12
            {
1023
                /* tail is empty.  we're finished */
1024
12
                state->ptr = ptr;
1025
12
                RETURN_SUCCESS;
1026
1027
12
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
4.32k
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
4.32k
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
4.32k
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
4.32k
            ctx->u.rep = repeat_pool_malloc(state);
1127
4.32k
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
4.32k
            ctx->u.rep->count = -1;
1131
4.32k
            ctx->u.rep->pattern = pattern;
1132
4.32k
            ctx->u.rep->prev = state->repeat;
1133
4.32k
            ctx->u.rep->last_ptr = NULL;
1134
4.32k
            state->repeat = ctx->u.rep;
1135
1136
4.32k
            state->ptr = ptr;
1137
4.32k
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
4.32k
            state->repeat = ctx->u.rep->prev;
1139
4.32k
            repeat_pool_free(state, ctx->u.rep);
1140
1141
4.32k
            if (ret) {
1142
97
                RETURN_ON_ERROR(ret);
1143
97
                RETURN_SUCCESS;
1144
97
            }
1145
4.23k
            RETURN_FAILURE;
1146
1147
1.40M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
1.40M
            ctx->u.rep = state->repeat;
1155
1.40M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
1.40M
            state->ptr = ptr;
1159
1160
1.40M
            ctx->count = ctx->u.rep->count+1;
1161
1162
1.40M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
1.40M
                   ptr, ctx->count));
1164
1165
1.40M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
1.40M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
1.40M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
1.40M
                ctx->u.rep->count = ctx->count;
1185
1.40M
                LASTMARK_SAVE();
1186
1.40M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
1.40M
                LAST_PTR_PUSH();
1189
1.40M
                ctx->u.rep->last_ptr = state->ptr;
1190
1.40M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
1.40M
                        ctx->u.rep->pattern+3);
1192
1.40M
                LAST_PTR_POP();
1193
1.40M
                if (ret) {
1194
31.8k
                    MARK_POP_DISCARD(ctx->lastmark);
1195
31.8k
                    RETURN_ON_ERROR(ret);
1196
31.8k
                    RETURN_SUCCESS;
1197
31.8k
                }
1198
1.37M
                MARK_POP(ctx->lastmark);
1199
1.37M
                LASTMARK_RESTORE();
1200
1.37M
                ctx->u.rep->count = ctx->count-1;
1201
1.37M
                state->ptr = ptr;
1202
1.37M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
1.37M
            state->repeat = ctx->u.rep->prev;
1207
1.37M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
1.37M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
1.37M
            RETURN_ON_SUCCESS(ret);
1211
1.37M
            state->ptr = ptr;
1212
1.37M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
14
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
14
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
14
                   ptr, pattern[1]));
1565
14
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
14
            state->ptr = ptr - pattern[1];
1568
14
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
14
            RETURN_ON_FAILURE(ret);
1570
2
            pattern += pattern[0];
1571
2
            DISPATCH;
1572
1573
2
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
0
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
0
                   ptr, pattern[1]));
1578
0
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
0
                state->ptr = ptr - pattern[1];
1580
0
                LASTMARK_SAVE();
1581
0
                if (state->repeat)
1582
0
                    MARK_PUSH(ctx->lastmark);
1583
1584
0
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
0
                if (ret) {
1586
0
                    if (state->repeat)
1587
0
                        MARK_POP_DISCARD(ctx->lastmark);
1588
0
                    RETURN_ON_ERROR(ret);
1589
0
                    RETURN_FAILURE;
1590
0
                }
1591
0
                if (state->repeat)
1592
0
                    MARK_POP(ctx->lastmark);
1593
0
                LASTMARK_RESTORE();
1594
0
            }
1595
0
            pattern += pattern[0];
1596
0
            DISPATCH;
1597
1598
0
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
2.97M
exit:
1620
2.97M
    ctx_pos = ctx->last_ctx_pos;
1621
2.97M
    jump = ctx->jump;
1622
2.97M
    DATA_POP_DISCARD(ctx);
1623
2.97M
    if (ctx_pos == -1) {
1624
6.35k
        state->sigcount = sigcount;
1625
6.35k
        return ret;
1626
6.35k
    }
1627
2.96M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
2.96M
    switch (jump) {
1630
1.40M
        case JUMP_MAX_UNTIL_2:
1631
1.40M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
1.40M
            goto jump_max_until_2;
1633
1.37M
        case JUMP_MAX_UNTIL_3:
1634
1.37M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
1.37M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
30
        case JUMP_BRANCH:
1643
30
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
30
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
4.32k
        case JUMP_REPEAT:
1658
4.32k
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
4.32k
            goto jump_repeat;
1660
1
        case JUMP_REPEAT_ONE_1:
1661
1
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
1
            goto jump_repeat_one_1;
1663
188k
        case JUMP_REPEAT_ONE_2:
1664
188k
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
188k
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
14
        case JUMP_ASSERT:
1673
14
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
14
            goto jump_assert;
1675
0
        case JUMP_ASSERT_NOT:
1676
0
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
0
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
2.96M
    }
1683
1684
0
    return ret; /* should never get here */
1685
2.96M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
1.76k
{
601
1.76k
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
1.76k
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
1.76k
    Py_ssize_t ret = 0;
604
1.76k
    int jump;
605
1.76k
    unsigned int sigcount = state->sigcount;
606
607
1.76k
    SRE(match_context)* ctx;
608
1.76k
    SRE(match_context)* nextctx;
609
1.76k
    INIT_TRACE(state);
610
611
1.76k
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
1.76k
    DATA_ALLOC(SRE(match_context), ctx);
614
1.76k
    ctx->last_ctx_pos = -1;
615
1.76k
    ctx->jump = JUMP_NONE;
616
1.76k
    ctx->toplevel = toplevel;
617
1.76k
    ctx_pos = alloc_pos;
618
619
1.76k
#if USE_COMPUTED_GOTOS
620
1.76k
#include "sre_targets.h"
621
1.76k
#endif
622
623
3.48k
entrance:
624
625
3.48k
    ;  // Fashion statement.
626
3.48k
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
3.48k
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
1.76k
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
0
            TRACE(("reject (got %tu chars, need %zu)\n",
633
0
                   end - ptr, (size_t) pattern[3]));
634
0
            RETURN_FAILURE;
635
0
        }
636
1.76k
        pattern += pattern[1] + 1;
637
1.76k
    }
638
639
3.48k
#if USE_COMPUTED_GOTOS
640
3.48k
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
3.48k
    {
647
648
3.48k
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
0
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
0
                   ptr, pattern[0]));
653
0
            {
654
0
                int i = pattern[0];
655
0
                if (i & 1)
656
0
                    state->lastindex = i/2 + 1;
657
0
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
0
                    int j = state->lastmark + 1;
663
0
                    while (j < i)
664
0
                        state->mark[j++] = NULL;
665
0
                    state->lastmark = i;
666
0
                }
667
0
                state->mark[i] = ptr;
668
0
            }
669
0
            pattern++;
670
0
            DISPATCH;
671
672
0
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
0
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
0
                   ptr, *pattern));
677
0
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
0
                RETURN_FAILURE;
679
0
            pattern++;
680
0
            ptr++;
681
0
            DISPATCH;
682
683
0
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
1.72k
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
1.72k
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
1.72k
            if (ctx->toplevel &&
698
1.72k
                ((state->match_all && ptr != state->end) ||
699
1.72k
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
1.72k
            state->ptr = ptr;
704
1.72k
            RETURN_SUCCESS;
705
706
0
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
0
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
0
            if (!SRE(at)(state, ptr, *pattern))
711
0
                RETURN_FAILURE;
712
0
            pattern++;
713
0
            DISPATCH;
714
715
0
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
0
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
0
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
0
            if (ptr >= end ||
749
0
                !SRE(charset)(state, pattern + 1, *ptr))
750
0
                RETURN_FAILURE;
751
0
            pattern += pattern[0];
752
0
            ptr++;
753
0
            DISPATCH;
754
755
0
        TARGET(SRE_OP_LITERAL_IGNORE):
756
0
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
0
                   pattern, ptr, pattern[0]));
758
0
            if (ptr >= end ||
759
0
                sre_lower_ascii(*ptr) != *pattern)
760
0
                RETURN_FAILURE;
761
0
            pattern++;
762
0
            ptr++;
763
0
            DISPATCH;
764
765
0
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
0
        TARGET(SRE_OP_JUMP):
845
0
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
0
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
0
                   ptr, pattern[0]));
850
0
            pattern += pattern[0];
851
0
            DISPATCH;
852
853
0
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
0
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
0
            LASTMARK_SAVE();
858
0
            if (state->repeat)
859
0
                MARK_PUSH(ctx->lastmark);
860
0
            for (; pattern[0]; pattern += pattern[0]) {
861
0
                if (pattern[1] == SRE_OP_LITERAL &&
862
0
                    (ptr >= end ||
863
0
                     (SRE_CODE) *ptr != pattern[2]))
864
0
                    continue;
865
0
                if (pattern[1] == SRE_OP_IN &&
866
0
                    (ptr >= end ||
867
0
                     !SRE(charset)(state, pattern + 3,
868
0
                                   (SRE_CODE) *ptr)))
869
0
                    continue;
870
0
                state->ptr = ptr;
871
0
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
0
                if (ret) {
873
0
                    if (state->repeat)
874
0
                        MARK_POP_DISCARD(ctx->lastmark);
875
0
                    RETURN_ON_ERROR(ret);
876
0
                    RETURN_SUCCESS;
877
0
                }
878
0
                if (state->repeat)
879
0
                    MARK_POP_KEEP(ctx->lastmark);
880
0
                LASTMARK_RESTORE();
881
0
            }
882
0
            if (state->repeat)
883
0
                MARK_POP_DISCARD(ctx->lastmark);
884
0
            RETURN_FAILURE;
885
886
1.76k
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
1.76k
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
1.76k
                   pattern[1], pattern[2]));
898
899
1.76k
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
0
                RETURN_FAILURE; /* cannot match */
901
902
1.76k
            state->ptr = ptr;
903
904
1.76k
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
1.76k
            RETURN_ON_ERROR(ret);
906
1.76k
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
1.76k
            ctx->count = ret;
908
1.76k
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
1.76k
            if (ctx->count < (Py_ssize_t) pattern[1])
916
0
                RETURN_FAILURE;
917
918
1.76k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.76k
                ptr == state->end &&
920
46
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
46
            {
922
                /* tail is empty.  we're finished */
923
46
                state->ptr = ptr;
924
46
                RETURN_SUCCESS;
925
46
            }
926
927
1.72k
            LASTMARK_SAVE();
928
1.72k
            if (state->repeat)
929
0
                MARK_PUSH(ctx->lastmark);
930
931
1.72k
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
0
                ctx->u.chr = pattern[pattern[0]+1];
935
0
                for (;;) {
936
0
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
0
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
0
                        ptr--;
939
0
                        ctx->count--;
940
0
                    }
941
0
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
0
                        break;
943
0
                    state->ptr = ptr;
944
0
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
0
                            pattern+pattern[0]);
946
0
                    if (ret) {
947
0
                        if (state->repeat)
948
0
                            MARK_POP_DISCARD(ctx->lastmark);
949
0
                        RETURN_ON_ERROR(ret);
950
0
                        RETURN_SUCCESS;
951
0
                    }
952
0
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
0
                    LASTMARK_RESTORE();
955
956
0
                    ptr--;
957
0
                    ctx->count--;
958
0
                }
959
0
                if (state->repeat)
960
0
                    MARK_POP_DISCARD(ctx->lastmark);
961
1.72k
            } else {
962
                /* general case */
963
1.72k
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
1.72k
                    state->ptr = ptr;
965
1.72k
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
1.72k
                            pattern+pattern[0]);
967
1.72k
                    if (ret) {
968
1.72k
                        if (state->repeat)
969
0
                            MARK_POP_DISCARD(ctx->lastmark);
970
1.72k
                        RETURN_ON_ERROR(ret);
971
1.72k
                        RETURN_SUCCESS;
972
1.72k
                    }
973
0
                    if (state->repeat)
974
0
                        MARK_POP_KEEP(ctx->lastmark);
975
0
                    LASTMARK_RESTORE();
976
977
0
                    ptr--;
978
0
                    ctx->count--;
979
0
                }
980
0
                if (state->repeat)
981
0
                    MARK_POP_DISCARD(ctx->lastmark);
982
0
            }
983
0
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
0
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
0
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
0
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
0
            ctx->u.rep = repeat_pool_malloc(state);
1127
0
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
0
            ctx->u.rep->count = -1;
1131
0
            ctx->u.rep->pattern = pattern;
1132
0
            ctx->u.rep->prev = state->repeat;
1133
0
            ctx->u.rep->last_ptr = NULL;
1134
0
            state->repeat = ctx->u.rep;
1135
1136
0
            state->ptr = ptr;
1137
0
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
0
            state->repeat = ctx->u.rep->prev;
1139
0
            repeat_pool_free(state, ctx->u.rep);
1140
1141
0
            if (ret) {
1142
0
                RETURN_ON_ERROR(ret);
1143
0
                RETURN_SUCCESS;
1144
0
            }
1145
0
            RETURN_FAILURE;
1146
1147
0
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
0
            ctx->u.rep = state->repeat;
1155
0
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
0
            state->ptr = ptr;
1159
1160
0
            ctx->count = ctx->u.rep->count+1;
1161
1162
0
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
0
                   ptr, ctx->count));
1164
1165
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
0
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
0
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
0
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
0
                ctx->u.rep->count = ctx->count;
1185
0
                LASTMARK_SAVE();
1186
0
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
0
                LAST_PTR_PUSH();
1189
0
                ctx->u.rep->last_ptr = state->ptr;
1190
0
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
0
                        ctx->u.rep->pattern+3);
1192
0
                LAST_PTR_POP();
1193
0
                if (ret) {
1194
0
                    MARK_POP_DISCARD(ctx->lastmark);
1195
0
                    RETURN_ON_ERROR(ret);
1196
0
                    RETURN_SUCCESS;
1197
0
                }
1198
0
                MARK_POP(ctx->lastmark);
1199
0
                LASTMARK_RESTORE();
1200
0
                ctx->u.rep->count = ctx->count-1;
1201
0
                state->ptr = ptr;
1202
0
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
0
            state->repeat = ctx->u.rep->prev;
1207
0
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
0
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
0
            RETURN_ON_SUCCESS(ret);
1211
0
            state->ptr = ptr;
1212
0
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
0
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
0
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
0
                   ptr, pattern[1]));
1565
0
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
0
            state->ptr = ptr - pattern[1];
1568
0
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
0
            RETURN_ON_FAILURE(ret);
1570
0
            pattern += pattern[0];
1571
0
            DISPATCH;
1572
1573
0
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
0
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
0
                   ptr, pattern[1]));
1578
0
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
0
                state->ptr = ptr - pattern[1];
1580
0
                LASTMARK_SAVE();
1581
0
                if (state->repeat)
1582
0
                    MARK_PUSH(ctx->lastmark);
1583
1584
0
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
0
                if (ret) {
1586
0
                    if (state->repeat)
1587
0
                        MARK_POP_DISCARD(ctx->lastmark);
1588
0
                    RETURN_ON_ERROR(ret);
1589
0
                    RETURN_FAILURE;
1590
0
                }
1591
0
                if (state->repeat)
1592
0
                    MARK_POP(ctx->lastmark);
1593
0
                LASTMARK_RESTORE();
1594
0
            }
1595
0
            pattern += pattern[0];
1596
0
            DISPATCH;
1597
1598
0
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
3.48k
exit:
1620
3.48k
    ctx_pos = ctx->last_ctx_pos;
1621
3.48k
    jump = ctx->jump;
1622
3.48k
    DATA_POP_DISCARD(ctx);
1623
3.48k
    if (ctx_pos == -1) {
1624
1.76k
        state->sigcount = sigcount;
1625
1.76k
        return ret;
1626
1.76k
    }
1627
1.72k
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
1.72k
    switch (jump) {
1630
0
        case JUMP_MAX_UNTIL_2:
1631
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
0
            goto jump_max_until_2;
1633
0
        case JUMP_MAX_UNTIL_3:
1634
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
0
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
0
        case JUMP_BRANCH:
1643
0
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
0
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
0
        case JUMP_REPEAT:
1658
0
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
0
            goto jump_repeat;
1660
0
        case JUMP_REPEAT_ONE_1:
1661
0
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
0
            goto jump_repeat_one_1;
1663
1.72k
        case JUMP_REPEAT_ONE_2:
1664
1.72k
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
1.72k
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
0
        case JUMP_ASSERT:
1673
0
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
0
            goto jump_assert;
1675
0
        case JUMP_ASSERT_NOT:
1676
0
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
0
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
1.72k
    }
1683
1684
0
    return ret; /* should never get here */
1685
1.72k
}
sre.c:sre_ucs4_match
Line
Count
Source
600
2.24k
{
601
2.24k
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
2.24k
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
2.24k
    Py_ssize_t ret = 0;
604
2.24k
    int jump;
605
2.24k
    unsigned int sigcount = state->sigcount;
606
607
2.24k
    SRE(match_context)* ctx;
608
2.24k
    SRE(match_context)* nextctx;
609
2.24k
    INIT_TRACE(state);
610
611
2.24k
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
2.24k
    DATA_ALLOC(SRE(match_context), ctx);
614
2.24k
    ctx->last_ctx_pos = -1;
615
2.24k
    ctx->jump = JUMP_NONE;
616
2.24k
    ctx->toplevel = toplevel;
617
2.24k
    ctx_pos = alloc_pos;
618
619
2.24k
#if USE_COMPUTED_GOTOS
620
2.24k
#include "sre_targets.h"
621
2.24k
#endif
622
623
4.41k
entrance:
624
625
4.41k
    ;  // Fashion statement.
626
4.41k
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
4.41k
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
2.24k
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
0
            TRACE(("reject (got %tu chars, need %zu)\n",
633
0
                   end - ptr, (size_t) pattern[3]));
634
0
            RETURN_FAILURE;
635
0
        }
636
2.24k
        pattern += pattern[1] + 1;
637
2.24k
    }
638
639
4.41k
#if USE_COMPUTED_GOTOS
640
4.41k
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
4.41k
    {
647
648
4.41k
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
0
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
0
                   ptr, pattern[0]));
653
0
            {
654
0
                int i = pattern[0];
655
0
                if (i & 1)
656
0
                    state->lastindex = i/2 + 1;
657
0
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
0
                    int j = state->lastmark + 1;
663
0
                    while (j < i)
664
0
                        state->mark[j++] = NULL;
665
0
                    state->lastmark = i;
666
0
                }
667
0
                state->mark[i] = ptr;
668
0
            }
669
0
            pattern++;
670
0
            DISPATCH;
671
672
0
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
0
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
0
                   ptr, *pattern));
677
0
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
0
                RETURN_FAILURE;
679
0
            pattern++;
680
0
            ptr++;
681
0
            DISPATCH;
682
683
0
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
2.16k
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
2.16k
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
2.16k
            if (ctx->toplevel &&
698
2.16k
                ((state->match_all && ptr != state->end) ||
699
2.16k
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
2.16k
            state->ptr = ptr;
704
2.16k
            RETURN_SUCCESS;
705
706
0
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
0
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
0
            if (!SRE(at)(state, ptr, *pattern))
711
0
                RETURN_FAILURE;
712
0
            pattern++;
713
0
            DISPATCH;
714
715
0
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
0
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
0
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
0
            if (ptr >= end ||
749
0
                !SRE(charset)(state, pattern + 1, *ptr))
750
0
                RETURN_FAILURE;
751
0
            pattern += pattern[0];
752
0
            ptr++;
753
0
            DISPATCH;
754
755
0
        TARGET(SRE_OP_LITERAL_IGNORE):
756
0
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
0
                   pattern, ptr, pattern[0]));
758
0
            if (ptr >= end ||
759
0
                sre_lower_ascii(*ptr) != *pattern)
760
0
                RETURN_FAILURE;
761
0
            pattern++;
762
0
            ptr++;
763
0
            DISPATCH;
764
765
0
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
0
        TARGET(SRE_OP_JUMP):
845
0
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
0
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
0
                   ptr, pattern[0]));
850
0
            pattern += pattern[0];
851
0
            DISPATCH;
852
853
0
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
0
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
0
            LASTMARK_SAVE();
858
0
            if (state->repeat)
859
0
                MARK_PUSH(ctx->lastmark);
860
0
            for (; pattern[0]; pattern += pattern[0]) {
861
0
                if (pattern[1] == SRE_OP_LITERAL &&
862
0
                    (ptr >= end ||
863
0
                     (SRE_CODE) *ptr != pattern[2]))
864
0
                    continue;
865
0
                if (pattern[1] == SRE_OP_IN &&
866
0
                    (ptr >= end ||
867
0
                     !SRE(charset)(state, pattern + 3,
868
0
                                   (SRE_CODE) *ptr)))
869
0
                    continue;
870
0
                state->ptr = ptr;
871
0
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
0
                if (ret) {
873
0
                    if (state->repeat)
874
0
                        MARK_POP_DISCARD(ctx->lastmark);
875
0
                    RETURN_ON_ERROR(ret);
876
0
                    RETURN_SUCCESS;
877
0
                }
878
0
                if (state->repeat)
879
0
                    MARK_POP_KEEP(ctx->lastmark);
880
0
                LASTMARK_RESTORE();
881
0
            }
882
0
            if (state->repeat)
883
0
                MARK_POP_DISCARD(ctx->lastmark);
884
0
            RETURN_FAILURE;
885
886
2.24k
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
2.24k
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
2.24k
                   pattern[1], pattern[2]));
898
899
2.24k
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
0
                RETURN_FAILURE; /* cannot match */
901
902
2.24k
            state->ptr = ptr;
903
904
2.24k
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
2.24k
            RETURN_ON_ERROR(ret);
906
2.24k
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
2.24k
            ctx->count = ret;
908
2.24k
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
2.24k
            if (ctx->count < (Py_ssize_t) pattern[1])
916
0
                RETURN_FAILURE;
917
918
2.24k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
2.24k
                ptr == state->end &&
920
74
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
74
            {
922
                /* tail is empty.  we're finished */
923
74
                state->ptr = ptr;
924
74
                RETURN_SUCCESS;
925
74
            }
926
927
2.16k
            LASTMARK_SAVE();
928
2.16k
            if (state->repeat)
929
0
                MARK_PUSH(ctx->lastmark);
930
931
2.16k
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
0
                ctx->u.chr = pattern[pattern[0]+1];
935
0
                for (;;) {
936
0
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
0
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
0
                        ptr--;
939
0
                        ctx->count--;
940
0
                    }
941
0
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
0
                        break;
943
0
                    state->ptr = ptr;
944
0
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
0
                            pattern+pattern[0]);
946
0
                    if (ret) {
947
0
                        if (state->repeat)
948
0
                            MARK_POP_DISCARD(ctx->lastmark);
949
0
                        RETURN_ON_ERROR(ret);
950
0
                        RETURN_SUCCESS;
951
0
                    }
952
0
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
0
                    LASTMARK_RESTORE();
955
956
0
                    ptr--;
957
0
                    ctx->count--;
958
0
                }
959
0
                if (state->repeat)
960
0
                    MARK_POP_DISCARD(ctx->lastmark);
961
2.16k
            } else {
962
                /* general case */
963
2.16k
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
2.16k
                    state->ptr = ptr;
965
2.16k
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
2.16k
                            pattern+pattern[0]);
967
2.16k
                    if (ret) {
968
2.16k
                        if (state->repeat)
969
0
                            MARK_POP_DISCARD(ctx->lastmark);
970
2.16k
                        RETURN_ON_ERROR(ret);
971
2.16k
                        RETURN_SUCCESS;
972
2.16k
                    }
973
0
                    if (state->repeat)
974
0
                        MARK_POP_KEEP(ctx->lastmark);
975
0
                    LASTMARK_RESTORE();
976
977
0
                    ptr--;
978
0
                    ctx->count--;
979
0
                }
980
0
                if (state->repeat)
981
0
                    MARK_POP_DISCARD(ctx->lastmark);
982
0
            }
983
0
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
0
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
0
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
0
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
0
            ctx->u.rep = repeat_pool_malloc(state);
1127
0
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
0
            ctx->u.rep->count = -1;
1131
0
            ctx->u.rep->pattern = pattern;
1132
0
            ctx->u.rep->prev = state->repeat;
1133
0
            ctx->u.rep->last_ptr = NULL;
1134
0
            state->repeat = ctx->u.rep;
1135
1136
0
            state->ptr = ptr;
1137
0
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
0
            state->repeat = ctx->u.rep->prev;
1139
0
            repeat_pool_free(state, ctx->u.rep);
1140
1141
0
            if (ret) {
1142
0
                RETURN_ON_ERROR(ret);
1143
0
                RETURN_SUCCESS;
1144
0
            }
1145
0
            RETURN_FAILURE;
1146
1147
0
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
0
            ctx->u.rep = state->repeat;
1155
0
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
0
            state->ptr = ptr;
1159
1160
0
            ctx->count = ctx->u.rep->count+1;
1161
1162
0
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
0
                   ptr, ctx->count));
1164
1165
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
0
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
0
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
0
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
0
                ctx->u.rep->count = ctx->count;
1185
0
                LASTMARK_SAVE();
1186
0
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
0
                LAST_PTR_PUSH();
1189
0
                ctx->u.rep->last_ptr = state->ptr;
1190
0
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
0
                        ctx->u.rep->pattern+3);
1192
0
                LAST_PTR_POP();
1193
0
                if (ret) {
1194
0
                    MARK_POP_DISCARD(ctx->lastmark);
1195
0
                    RETURN_ON_ERROR(ret);
1196
0
                    RETURN_SUCCESS;
1197
0
                }
1198
0
                MARK_POP(ctx->lastmark);
1199
0
                LASTMARK_RESTORE();
1200
0
                ctx->u.rep->count = ctx->count-1;
1201
0
                state->ptr = ptr;
1202
0
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
0
            state->repeat = ctx->u.rep->prev;
1207
0
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
0
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
0
            RETURN_ON_SUCCESS(ret);
1211
0
            state->ptr = ptr;
1212
0
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
0
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
0
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
0
                   ptr, pattern[1]));
1565
0
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
0
            state->ptr = ptr - pattern[1];
1568
0
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
0
            RETURN_ON_FAILURE(ret);
1570
0
            pattern += pattern[0];
1571
0
            DISPATCH;
1572
1573
0
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
0
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
0
                   ptr, pattern[1]));
1578
0
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
0
                state->ptr = ptr - pattern[1];
1580
0
                LASTMARK_SAVE();
1581
0
                if (state->repeat)
1582
0
                    MARK_PUSH(ctx->lastmark);
1583
1584
0
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
0
                if (ret) {
1586
0
                    if (state->repeat)
1587
0
                        MARK_POP_DISCARD(ctx->lastmark);
1588
0
                    RETURN_ON_ERROR(ret);
1589
0
                    RETURN_FAILURE;
1590
0
                }
1591
0
                if (state->repeat)
1592
0
                    MARK_POP(ctx->lastmark);
1593
0
                LASTMARK_RESTORE();
1594
0
            }
1595
0
            pattern += pattern[0];
1596
0
            DISPATCH;
1597
1598
0
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
4.41k
exit:
1620
4.41k
    ctx_pos = ctx->last_ctx_pos;
1621
4.41k
    jump = ctx->jump;
1622
4.41k
    DATA_POP_DISCARD(ctx);
1623
4.41k
    if (ctx_pos == -1) {
1624
2.24k
        state->sigcount = sigcount;
1625
2.24k
        return ret;
1626
2.24k
    }
1627
2.16k
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
2.16k
    switch (jump) {
1630
0
        case JUMP_MAX_UNTIL_2:
1631
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
0
            goto jump_max_until_2;
1633
0
        case JUMP_MAX_UNTIL_3:
1634
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
0
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
0
        case JUMP_BRANCH:
1643
0
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
0
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
0
        case JUMP_REPEAT:
1658
0
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
0
            goto jump_repeat;
1660
0
        case JUMP_REPEAT_ONE_1:
1661
0
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
0
            goto jump_repeat_one_1;
1663
2.16k
        case JUMP_REPEAT_ONE_2:
1664
2.16k
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
2.16k
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
0
        case JUMP_ASSERT:
1673
0
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
0
            goto jump_assert;
1675
0
        case JUMP_ASSERT_NOT:
1676
0
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
0
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
2.16k
    }
1683
1684
0
    return ret; /* should never get here */
1685
2.16k
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
0
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
0
{
1694
0
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
0
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
0
    Py_ssize_t status = 0;
1697
0
    Py_ssize_t prefix_len = 0;
1698
0
    Py_ssize_t prefix_skip = 0;
1699
0
    SRE_CODE* prefix = NULL;
1700
0
    SRE_CODE* charset = NULL;
1701
0
    SRE_CODE* overlap = NULL;
1702
0
    int flags = 0;
1703
0
    INIT_TRACE(state);
1704
1705
0
    if (ptr > end)
1706
0
        return 0;
1707
1708
0
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
0
        flags = pattern[2];
1713
1714
0
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
0
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
0
                   end - ptr, (size_t) pattern[3]));
1717
0
            return 0;
1718
0
        }
1719
0
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
0
            end -= pattern[3] - 1;
1723
0
            if (end <= ptr)
1724
0
                end = ptr;
1725
0
        }
1726
1727
0
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
0
            prefix_len = pattern[5];
1731
0
            prefix_skip = pattern[6];
1732
0
            prefix = pattern + 7;
1733
0
            overlap = prefix + prefix_len - 1;
1734
0
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
0
            charset = pattern + 5;
1738
1739
0
        pattern += 1 + pattern[1];
1740
0
    }
1741
1742
0
    TRACE(("prefix = %p %zd %zd\n",
1743
0
           prefix, prefix_len, prefix_skip));
1744
0
    TRACE(("charset = %p\n", charset));
1745
1746
0
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
0
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
0
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
0
#endif
1753
0
        end = (SRE_CHAR *)state->end;
1754
0
        state->must_advance = 0;
1755
0
        while (ptr < end) {
1756
0
            while (*ptr != c) {
1757
0
                if (++ptr >= end)
1758
0
                    return 0;
1759
0
            }
1760
0
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
0
            state->start = ptr;
1762
0
            state->ptr = ptr + prefix_skip;
1763
0
            if (flags & SRE_INFO_LITERAL)
1764
0
                return 1; /* we got all of it */
1765
0
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
0
            if (status != 0)
1767
0
                return status;
1768
0
            ++ptr;
1769
0
            RESET_CAPTURE_GROUP();
1770
0
        }
1771
0
        return 0;
1772
0
    }
1773
1774
0
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
0
        Py_ssize_t i = 0;
1778
1779
0
        end = (SRE_CHAR *)state->end;
1780
0
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
0
        for (i = 0; i < prefix_len; i++)
1784
0
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
0
#endif
1787
0
        while (ptr < end) {
1788
0
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
0
            while (*ptr++ != c) {
1790
0
                if (ptr >= end)
1791
0
                    return 0;
1792
0
            }
1793
0
            if (ptr >= end)
1794
0
                return 0;
1795
1796
0
            i = 1;
1797
0
            state->must_advance = 0;
1798
0
            do {
1799
0
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
0
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
0
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
0
                    state->start = ptr - (prefix_len - 1);
1808
0
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
0
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
0
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
0
                    if (status != 0)
1813
0
                        return status;
1814
                    /* close but no cigar -- try again */
1815
0
                    if (++ptr >= end)
1816
0
                        return 0;
1817
0
                    RESET_CAPTURE_GROUP();
1818
0
                }
1819
0
                i = overlap[i];
1820
0
            } while (i != 0);
1821
0
        }
1822
0
        return 0;
1823
0
    }
1824
1825
0
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
0
        end = (SRE_CHAR *)state->end;
1828
0
        state->must_advance = 0;
1829
0
        for (;;) {
1830
0
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
0
                ptr++;
1832
0
            if (ptr >= end)
1833
0
                return 0;
1834
0
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
0
            state->start = ptr;
1836
0
            state->ptr = ptr;
1837
0
            status = SRE(match)(state, pattern, 0);
1838
0
            if (status != 0)
1839
0
                break;
1840
0
            ptr++;
1841
0
            RESET_CAPTURE_GROUP();
1842
0
        }
1843
0
    } else {
1844
        /* general case */
1845
0
        assert(ptr <= end);
1846
0
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
0
        state->start = state->ptr = ptr;
1848
0
        status = SRE(match)(state, pattern, 1);
1849
0
        state->must_advance = 0;
1850
0
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
0
        while (status == 0 && ptr < end) {
1858
0
            ptr++;
1859
0
            RESET_CAPTURE_GROUP();
1860
0
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
0
            state->start = state->ptr = ptr;
1862
0
            status = SRE(match)(state, pattern, 0);
1863
0
        }
1864
0
    }
1865
1866
0
    return status;
1867
0
}
Unexecuted instantiation: sre.c:sre_ucs1_search
Unexecuted instantiation: sre.c:sre_ucs2_search
Unexecuted instantiation: sre.c:sre_ucs4_search
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/