Coverage Report

Created: 2026-04-20 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
22.4M
{
18
    /* check if pointer is at given position */
19
20
22.4M
    Py_ssize_t thisp, thatp;
21
22
22.4M
    switch (at) {
23
24
10.3M
    case SRE_AT_BEGINNING:
25
10.3M
    case SRE_AT_BEGINNING_STRING:
26
10.3M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
7.16M
    case SRE_AT_END:
33
7.16M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
23.2k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
7.16M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
5.01M
    case SRE_AT_END_STRING:
42
5.01M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
22.4M
    }
87
88
0
    return 0;
89
22.4M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
17.2M
{
18
    /* check if pointer is at given position */
19
20
17.2M
    Py_ssize_t thisp, thatp;
21
22
17.2M
    switch (at) {
23
24
8.85M
    case SRE_AT_BEGINNING:
25
8.85M
    case SRE_AT_BEGINNING_STRING:
26
8.85M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
5.78M
    case SRE_AT_END:
33
5.78M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
23.1k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
5.78M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.60M
    case SRE_AT_END_STRING:
42
2.60M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
17.2M
    }
87
88
0
    return 0;
89
17.2M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
4.15M
{
18
    /* check if pointer is at given position */
19
20
4.15M
    Py_ssize_t thisp, thatp;
21
22
4.15M
    switch (at) {
23
24
1.43M
    case SRE_AT_BEGINNING:
25
1.43M
    case SRE_AT_BEGINNING_STRING:
26
1.43M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
1.33M
    case SRE_AT_END:
33
1.33M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
49
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
1.33M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.38M
    case SRE_AT_END_STRING:
42
1.38M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
4.15M
    }
87
88
0
    return 0;
89
4.15M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
1.08M
{
18
    /* check if pointer is at given position */
19
20
1.08M
    Py_ssize_t thisp, thatp;
21
22
1.08M
    switch (at) {
23
24
17.3k
    case SRE_AT_BEGINNING:
25
17.3k
    case SRE_AT_BEGINNING_STRING:
26
17.3k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
45.3k
    case SRE_AT_END:
33
45.3k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
102
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
45.3k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.02M
    case SRE_AT_END_STRING:
42
1.02M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
1.08M
    }
87
88
0
    return 0;
89
1.08M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.51G
{
94
    /* check if character is a member of the given set */
95
96
1.51G
    int ok = 1;
97
98
3.41G
    for (;;) {
99
3.41G
        switch (*set++) {
100
101
974M
        case SRE_OP_FAILURE:
102
974M
            return !ok;
103
104
1.17G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.17G
            if (ch == set[0])
107
9.26M
                return ok;
108
1.16G
            set++;
109
1.16G
            break;
110
111
87.0M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
87.0M
            if (sre_category(set[0], (int) ch))
114
76.0M
                return ok;
115
10.9M
            set++;
116
10.9M
            break;
117
118
498M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
498M
            if (ch < 256 &&
121
475M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
187M
                return ok;
123
310M
            set += 256/SRE_CODE_BITS;
124
310M
            break;
125
126
401M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
401M
            if (set[0] <= ch && ch <= set[1])
129
265M
                return ok;
130
135M
            set += 2;
131
135M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
273M
        case SRE_OP_NEGATE:
148
273M
            ok = !ok;
149
273M
            break;
150
151
4
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
4
        {
154
4
            Py_ssize_t count, block;
155
4
            count = *(set++);
156
157
4
            if (ch < 0x10000u)
158
4
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
4
            set += 256/sizeof(SRE_CODE);
162
4
            if (block >=0 &&
163
4
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
4
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
4
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
4
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.41G
        }
175
3.41G
    }
176
1.51G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
508M
{
94
    /* check if character is a member of the given set */
95
96
508M
    int ok = 1;
97
98
1.05G
    for (;;) {
99
1.05G
        switch (*set++) {
100
101
284M
        case SRE_OP_FAILURE:
102
284M
            return !ok;
103
104
334M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
334M
            if (ch == set[0])
107
6.96M
                return ok;
108
327M
            set++;
109
327M
            break;
110
111
32.3M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
32.3M
            if (sre_category(set[0], (int) ch))
114
22.6M
                return ok;
115
9.65M
            set++;
116
9.65M
            break;
117
118
128M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
128M
            if (ch < 256 &&
121
128M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
53.5M
                return ok;
123
74.6M
            set += 256/SRE_CODE_BITS;
124
74.6M
            break;
125
126
212M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
212M
            if (set[0] <= ch && ch <= set[1])
129
140M
                return ok;
130
71.9M
            set += 2;
131
71.9M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
66.8M
        case SRE_OP_NEGATE:
148
66.8M
            ok = !ok;
149
66.8M
            break;
150
151
4
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
4
        {
154
4
            Py_ssize_t count, block;
155
4
            count = *(set++);
156
157
4
            if (ch < 0x10000u)
158
4
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
4
            set += 256/sizeof(SRE_CODE);
162
4
            if (block >=0 &&
163
4
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
4
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
4
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
4
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.05G
        }
175
1.05G
    }
176
508M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
675M
{
94
    /* check if character is a member of the given set */
95
96
675M
    int ok = 1;
97
98
1.59G
    for (;;) {
99
1.59G
        switch (*set++) {
100
101
468M
        case SRE_OP_FAILURE:
102
468M
            return !ok;
103
104
642M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
642M
            if (ch == set[0])
107
1.62M
                return ok;
108
640M
            set++;
109
640M
            break;
110
111
48.8M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
48.8M
            if (sre_category(set[0], (int) ch))
114
47.9M
                return ok;
115
976k
            set++;
116
976k
            break;
117
118
172M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
172M
            if (ch < 256 &&
121
162M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
53.0M
                return ok;
123
119M
            set += 256/SRE_CODE_BITS;
124
119M
            break;
125
126
157M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
157M
            if (set[0] <= ch && ch <= set[1])
129
104M
                return ok;
130
53.7M
            set += 2;
131
53.7M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
108M
        case SRE_OP_NEGATE:
148
108M
            ok = !ok;
149
108M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.59G
        }
175
1.59G
    }
176
675M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
329M
{
94
    /* check if character is a member of the given set */
95
96
329M
    int ok = 1;
97
98
753M
    for (;;) {
99
753M
        switch (*set++) {
100
101
221M
        case SRE_OP_FAILURE:
102
221M
            return !ok;
103
104
199M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
199M
            if (ch == set[0])
107
666k
                return ok;
108
198M
            set++;
109
198M
            break;
110
111
5.77M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
5.77M
            if (sre_category(set[0], (int) ch))
114
5.46M
                return ok;
115
307k
            set++;
116
307k
            break;
117
118
197M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
197M
            if (ch < 256 &&
121
185M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
81.2M
                return ok;
123
116M
            set += 256/SRE_CODE_BITS;
124
116M
            break;
125
126
31.1M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
31.1M
            if (set[0] <= ch && ch <= set[1])
129
20.9M
                return ok;
130
10.2M
            set += 2;
131
10.2M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
98.2M
        case SRE_OP_NEGATE:
148
98.2M
            ok = !ok;
149
98.2M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
753M
        }
175
753M
    }
176
329M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
467M
{
195
467M
    SRE_CODE chr;
196
467M
    SRE_CHAR c;
197
467M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
467M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
467M
    Py_ssize_t i;
200
467M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
467M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
50.8M
        end = ptr + maxcount;
205
206
467M
    switch (pattern[0]) {
207
208
412M
    case SRE_OP_IN:
209
        /* repeated set */
210
412M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
775M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
363M
            ptr++;
213
412M
        break;
214
215
2.61M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
2.61M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
38.6M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
36.0M
            ptr++;
220
2.61M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
50.6M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
50.6M
        chr = pattern[1];
232
50.6M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
50.6M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
39.2M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
39.2M
        else
238
39.2M
#endif
239
54.0M
        while (ptr < end && *ptr == c)
240
3.35M
            ptr++;
241
50.6M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.14M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.14M
        chr = pattern[1];
270
1.14M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.14M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
511k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
511k
        else
276
511k
#endif
277
53.2M
        while (ptr < end && *ptr != c)
278
52.1M
            ptr++;
279
1.14M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
467M
    }
319
320
467M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
467M
           ptr - (SRE_CHAR*) state->ptr));
322
467M
    return ptr - (SRE_CHAR*) state->ptr;
323
467M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
187M
{
195
187M
    SRE_CODE chr;
196
187M
    SRE_CHAR c;
197
187M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
187M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
187M
    Py_ssize_t i;
200
187M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
187M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
22.0M
        end = ptr + maxcount;
205
206
187M
    switch (pattern[0]) {
207
208
159M
    case SRE_OP_IN:
209
        /* repeated set */
210
159M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
302M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
143M
            ptr++;
213
159M
        break;
214
215
2.37M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
2.37M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
11.9M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
9.54M
            ptr++;
220
2.37M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
25.4M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
25.4M
        chr = pattern[1];
232
25.4M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
25.4M
        c = (SRE_CHAR) chr;
234
25.4M
#if SIZEOF_SRE_CHAR < 4
235
25.4M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
25.4M
        else
238
25.4M
#endif
239
25.7M
        while (ptr < end && *ptr == c)
240
339k
            ptr++;
241
25.4M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
291k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
291k
        chr = pattern[1];
270
291k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
291k
        c = (SRE_CHAR) chr;
272
291k
#if SIZEOF_SRE_CHAR < 4
273
291k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
291k
        else
276
291k
#endif
277
13.2M
        while (ptr < end && *ptr != c)
278
12.9M
            ptr++;
279
291k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
187M
    }
319
320
187M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
187M
           ptr - (SRE_CHAR*) state->ptr));
322
187M
    return ptr - (SRE_CHAR*) state->ptr;
323
187M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
190M
{
195
190M
    SRE_CODE chr;
196
190M
    SRE_CHAR c;
197
190M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
190M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
190M
    Py_ssize_t i;
200
190M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
190M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
15.0M
        end = ptr + maxcount;
205
206
190M
    switch (pattern[0]) {
207
208
176M
    case SRE_OP_IN:
209
        /* repeated set */
210
176M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
295M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
118M
            ptr++;
213
176M
        break;
214
215
232k
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
232k
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
12.2M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
12.0M
            ptr++;
220
232k
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
13.8M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
13.8M
        chr = pattern[1];
232
13.8M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
13.8M
        c = (SRE_CHAR) chr;
234
13.8M
#if SIZEOF_SRE_CHAR < 4
235
13.8M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
13.8M
        else
238
13.8M
#endif
239
16.0M
        while (ptr < end && *ptr == c)
240
2.22M
            ptr++;
241
13.8M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
220k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
220k
        chr = pattern[1];
270
220k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
220k
        c = (SRE_CHAR) chr;
272
220k
#if SIZEOF_SRE_CHAR < 4
273
220k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
220k
        else
276
220k
#endif
277
11.6M
        while (ptr < end && *ptr != c)
278
11.4M
            ptr++;
279
220k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
190M
    }
319
320
190M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
190M
           ptr - (SRE_CHAR*) state->ptr));
322
190M
    return ptr - (SRE_CHAR*) state->ptr;
323
190M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
89.2M
{
195
89.2M
    SRE_CODE chr;
196
89.2M
    SRE_CHAR c;
197
89.2M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
89.2M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
89.2M
    Py_ssize_t i;
200
89.2M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
89.2M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
13.6M
        end = ptr + maxcount;
205
206
89.2M
    switch (pattern[0]) {
207
208
77.2M
    case SRE_OP_IN:
209
        /* repeated set */
210
77.2M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
178M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
101M
            ptr++;
213
77.2M
        break;
214
215
8.42k
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
8.42k
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
14.4M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
14.4M
            ptr++;
220
8.42k
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
11.4M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
11.4M
        chr = pattern[1];
232
11.4M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
11.4M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
12.2M
        while (ptr < end && *ptr == c)
240
792k
            ptr++;
241
11.4M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
634k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
634k
        chr = pattern[1];
270
634k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
634k
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
28.3M
        while (ptr < end && *ptr != c)
278
27.7M
            ptr++;
279
634k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
89.2M
    }
319
320
89.2M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
89.2M
           ptr - (SRE_CHAR*) state->ptr));
322
89.2M
    return ptr - (SRE_CHAR*) state->ptr;
323
89.2M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
541M
    do { \
355
541M
        ctx->lastmark = state->lastmark; \
356
541M
        ctx->lastindex = state->lastindex; \
357
541M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
175M
    do { \
360
175M
        state->lastmark = ctx->lastmark; \
361
175M
        state->lastindex = ctx->lastindex; \
362
175M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
193M
    do { \
366
193M
        TRACE(("push last_ptr: %zd", \
367
193M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
193M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
193M
    } while (0)
370
#define LAST_PTR_POP()  \
371
193M
    do { \
372
193M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
193M
        TRACE(("pop last_ptr: %zd", \
374
193M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
193M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
512M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
642M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
975M
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
48.3M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
25.7M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.15G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.15G
do { \
390
1.15G
    alloc_pos = state->data_stack_base; \
391
1.15G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.15G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.15G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
166M
        int j = data_stack_grow(state, sizeof(type)); \
395
166M
        if (j < 0) return j; \
396
166M
        if (ctx_pos != -1) \
397
166M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
166M
    } \
399
1.15G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.15G
    state->data_stack_base += sizeof(type); \
401
1.15G
} while (0)
402
403
1.15G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.15G
do { \
405
1.15G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.15G
    ptr = (type*)(state->data_stack+pos); \
407
1.15G
} while (0)
408
409
539M
#define DATA_STACK_PUSH(state, data, size) \
410
539M
do { \
411
539M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
539M
           data, state->data_stack_base, size)); \
413
539M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
83.4k
        int j = data_stack_grow(state, size); \
415
83.4k
        if (j < 0) return j; \
416
83.4k
        if (ctx_pos != -1) \
417
83.4k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
83.4k
    } \
419
539M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
539M
    state->data_stack_base += size; \
421
539M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
298M
#define DATA_STACK_POP(state, data, size, discard) \
427
298M
do { \
428
298M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
298M
           data, state->data_stack_base-size, size)); \
430
298M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
298M
    if (discard) \
432
298M
        state->data_stack_base -= size; \
433
298M
} while (0)
434
435
1.39G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.39G
do { \
437
1.39G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.39G
           state->data_stack_base-size, size)); \
439
1.39G
    state->data_stack_base -= size; \
440
1.39G
} while(0)
441
442
#define DATA_PUSH(x) \
443
193M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
193M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.15G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.15G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.15G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
452M
    do if (lastmark >= 0) { \
473
345M
        MARK_TRACE("push", (lastmark)); \
474
345M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
345M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
452M
    } while (0)
477
#define MARK_POP(lastmark) \
478
119M
    do if (lastmark >= 0) { \
479
103M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
103M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
103M
        MARK_TRACE("pop", (lastmark)); \
482
119M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.68M
    do if (lastmark >= 0) { \
485
1.06M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.06M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.06M
        MARK_TRACE("pop keep", (lastmark)); \
488
1.68M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
332M
    do if (lastmark >= 0) { \
491
241M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
241M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
241M
        MARK_TRACE("pop discard", (lastmark)); \
494
332M
    } while (0)
495
496
471M
#define JUMP_NONE            0
497
145k
#define JUMP_MAX_UNTIL_1     1
498
193M
#define JUMP_MAX_UNTIL_2     2
499
48.3M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
48.1M
#define JUMP_REPEAT          7
504
12.4M
#define JUMP_REPEAT_ONE_1    8
505
158M
#define JUMP_REPEAT_ONE_2    9
506
832
#define JUMP_MIN_REPEAT_ONE  10
507
108M
#define JUMP_BRANCH          11
508
25.7M
#define JUMP_ASSERT          12
509
89.1M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
683M
    ctx->pattern = pattern; \
516
683M
    ctx->ptr = ptr; \
517
683M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
683M
    nextctx->pattern = nextpattern; \
519
683M
    nextctx->toplevel = toplevel_; \
520
683M
    nextctx->jump = jumpvalue; \
521
683M
    nextctx->last_ctx_pos = ctx_pos; \
522
683M
    pattern = nextpattern; \
523
683M
    ctx_pos = alloc_pos; \
524
683M
    ctx = nextctx; \
525
683M
    goto entrance; \
526
683M
    jumplabel: \
527
683M
    pattern = ctx->pattern; \
528
683M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
568M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
114M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.13G
    do {                                                           \
553
2.13G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.13G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.13G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.21G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.13G
        do {                               \
588
2.13G
            MAYBE_CHECK_SIGNALS;           \
589
2.13G
            goto *sre_targets[*pattern++]; \
590
2.13G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
471M
{
601
471M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
471M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
471M
    Py_ssize_t ret = 0;
604
471M
    int jump;
605
471M
    unsigned int sigcount = state->sigcount;
606
607
471M
    SRE(match_context)* ctx;
608
471M
    SRE(match_context)* nextctx;
609
471M
    INIT_TRACE(state);
610
611
471M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
471M
    DATA_ALLOC(SRE(match_context), ctx);
614
471M
    ctx->last_ctx_pos = -1;
615
471M
    ctx->jump = JUMP_NONE;
616
471M
    ctx->toplevel = toplevel;
617
471M
    ctx_pos = alloc_pos;
618
619
471M
#if USE_COMPUTED_GOTOS
620
471M
#include "sre_targets.h"
621
471M
#endif
622
623
1.15G
entrance:
624
625
1.15G
    ;  // Fashion statement.
626
1.15G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.15G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
61.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.62M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.62M
                   end - ptr, (size_t) pattern[3]));
634
3.62M
            RETURN_FAILURE;
635
3.62M
        }
636
58.2M
        pattern += pattern[1] + 1;
637
58.2M
    }
638
639
1.15G
#if USE_COMPUTED_GOTOS
640
1.15G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.15G
    {
647
648
1.15G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
436M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
436M
                   ptr, pattern[0]));
653
436M
            {
654
436M
                int i = pattern[0];
655
436M
                if (i & 1)
656
68.6M
                    state->lastindex = i/2 + 1;
657
436M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
430M
                    int j = state->lastmark + 1;
663
444M
                    while (j < i)
664
14.0M
                        state->mark[j++] = NULL;
665
430M
                    state->lastmark = i;
666
430M
                }
667
436M
                state->mark[i] = ptr;
668
436M
            }
669
436M
            pattern++;
670
436M
            DISPATCH;
671
672
436M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
147M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
147M
                   ptr, *pattern));
677
147M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
65.5M
                RETURN_FAILURE;
679
81.6M
            pattern++;
680
81.6M
            ptr++;
681
81.6M
            DISPATCH;
682
683
81.6M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
162M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
162M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
162M
            if (ctx->toplevel &&
698
42.6M
                ((state->match_all && ptr != state->end) ||
699
42.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
162M
            state->ptr = ptr;
704
162M
            RETURN_SUCCESS;
705
706
22.4M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
22.4M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
22.4M
            if (!SRE(at)(state, ptr, *pattern))
711
5.39M
                RETURN_FAILURE;
712
17.0M
            pattern++;
713
17.0M
            DISPATCH;
714
715
17.0M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
340M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
340M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
340M
            if (ptr >= end ||
749
336M
                !SRE(charset)(state, pattern + 1, *ptr))
750
84.8M
                RETURN_FAILURE;
751
255M
            pattern += pattern[0];
752
255M
            ptr++;
753
255M
            DISPATCH;
754
755
255M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
7.65M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
7.65M
                   pattern, ptr, pattern[0]));
758
7.65M
            if (ptr >= end ||
759
7.65M
                sre_lower_ascii(*ptr) != *pattern)
760
47.7k
                RETURN_FAILURE;
761
7.60M
            pattern++;
762
7.60M
            ptr++;
763
7.60M
            DISPATCH;
764
765
7.60M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
28
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
28
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
28
            if (ptr >= end
828
20
                || !SRE(charset)(state, pattern+1,
829
20
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
12
            pattern += pattern[0];
832
12
            ptr++;
833
12
            DISPATCH;
834
835
12
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
77.2M
        TARGET(SRE_OP_JUMP):
845
77.2M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
77.2M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
77.2M
                   ptr, pattern[0]));
850
77.2M
            pattern += pattern[0];
851
77.2M
            DISPATCH;
852
853
98.3M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
98.3M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
98.3M
            LASTMARK_SAVE();
858
98.3M
            if (state->repeat)
859
59.0M
                MARK_PUSH(ctx->lastmark);
860
218M
            for (; pattern[0]; pattern += pattern[0]) {
861
194M
                if (pattern[1] == SRE_OP_LITERAL &&
862
114M
                    (ptr >= end ||
863
114M
                     (SRE_CODE) *ptr != pattern[2]))
864
58.5M
                    continue;
865
136M
                if (pattern[1] == SRE_OP_IN &&
866
51.9M
                    (ptr >= end ||
867
51.8M
                     !SRE(charset)(state, pattern + 3,
868
51.8M
                                   (SRE_CODE) *ptr)))
869
28.1M
                    continue;
870
108M
                state->ptr = ptr;
871
108M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
108M
                if (ret) {
873
74.8M
                    if (state->repeat)
874
49.9M
                        MARK_POP_DISCARD(ctx->lastmark);
875
74.8M
                    RETURN_ON_ERROR(ret);
876
74.8M
                    RETURN_SUCCESS;
877
74.8M
                }
878
33.2M
                if (state->repeat)
879
15.3k
                    MARK_POP_KEEP(ctx->lastmark);
880
33.2M
                LASTMARK_RESTORE();
881
33.2M
            }
882
23.5M
            if (state->repeat)
883
9.09M
                MARK_POP_DISCARD(ctx->lastmark);
884
23.5M
            RETURN_FAILURE;
885
886
469M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
469M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
469M
                   pattern[1], pattern[2]));
898
899
469M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
2.41M
                RETURN_FAILURE; /* cannot match */
901
902
467M
            state->ptr = ptr;
903
904
467M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
467M
            RETURN_ON_ERROR(ret);
906
467M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
467M
            ctx->count = ret;
908
467M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
467M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
306M
                RETURN_FAILURE;
917
918
160M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
6.20M
                ptr == state->end &&
920
84.8k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
84.8k
            {
922
                /* tail is empty.  we're finished */
923
84.8k
                state->ptr = ptr;
924
84.8k
                RETURN_SUCCESS;
925
84.8k
            }
926
927
160M
            LASTMARK_SAVE();
928
160M
            if (state->repeat)
929
110M
                MARK_PUSH(ctx->lastmark);
930
931
160M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
22.8M
                ctx->u.chr = pattern[pattern[0]+1];
935
22.8M
                for (;;) {
936
61.7M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
51.4M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
38.9M
                        ptr--;
939
38.9M
                        ctx->count--;
940
38.9M
                    }
941
22.8M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
10.3M
                        break;
943
12.4M
                    state->ptr = ptr;
944
12.4M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
12.4M
                            pattern+pattern[0]);
946
12.4M
                    if (ret) {
947
12.4M
                        if (state->repeat)
948
11.1M
                            MARK_POP_DISCARD(ctx->lastmark);
949
12.4M
                        RETURN_ON_ERROR(ret);
950
12.4M
                        RETURN_SUCCESS;
951
12.4M
                    }
952
835
                    if (state->repeat)
953
819
                        MARK_POP_KEEP(ctx->lastmark);
954
835
                    LASTMARK_RESTORE();
955
956
835
                    ptr--;
957
835
                    ctx->count--;
958
835
                }
959
10.3M
                if (state->repeat)
960
9.11M
                    MARK_POP_DISCARD(ctx->lastmark);
961
137M
            } else {
962
                /* general case */
963
159M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
158M
                    state->ptr = ptr;
965
158M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
158M
                            pattern+pattern[0]);
967
158M
                    if (ret) {
968
135M
                        if (state->repeat)
969
88.6M
                            MARK_POP_DISCARD(ctx->lastmark);
970
135M
                        RETURN_ON_ERROR(ret);
971
135M
                        RETURN_SUCCESS;
972
135M
                    }
973
22.5M
                    if (state->repeat)
974
1.67M
                        MARK_POP_KEEP(ctx->lastmark);
975
22.5M
                    LASTMARK_RESTORE();
976
977
22.5M
                    ptr--;
978
22.5M
                    ctx->count--;
979
22.5M
                }
980
1.96M
                if (state->repeat)
981
1.40M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.96M
            }
983
12.2M
            RETURN_FAILURE;
984
985
16
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
16
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
16
                   pattern[1], pattern[2]));
997
998
16
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
16
            state->ptr = ptr;
1002
1003
16
            if (pattern[1] == 0)
1004
16
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
16
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
16
            } else {
1028
                /* general case */
1029
16
                LASTMARK_SAVE();
1030
16
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
832
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
832
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
832
                    state->ptr = ptr;
1036
832
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
832
                            pattern+pattern[0]);
1038
832
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
832
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
832
                    LASTMARK_RESTORE();
1047
1048
832
                    state->ptr = ptr;
1049
832
                    ret = SRE(count)(state, pattern+3, 1);
1050
832
                    RETURN_ON_ERROR(ret);
1051
832
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
832
                    if (ret == 0)
1053
16
                        break;
1054
832
                    assert(ret == 1);
1055
816
                    ptr++;
1056
816
                    ctx->count++;
1057
816
                }
1058
16
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
16
            }
1061
16
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
48.1M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
48.1M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
48.1M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
48.1M
            ctx->u.rep = repeat_pool_malloc(state);
1127
48.1M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
48.1M
            ctx->u.rep->count = -1;
1131
48.1M
            ctx->u.rep->pattern = pattern;
1132
48.1M
            ctx->u.rep->prev = state->repeat;
1133
48.1M
            ctx->u.rep->last_ptr = NULL;
1134
48.1M
            state->repeat = ctx->u.rep;
1135
1136
48.1M
            state->ptr = ptr;
1137
48.1M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
48.1M
            state->repeat = ctx->u.rep->prev;
1139
48.1M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
48.1M
            if (ret) {
1142
47.9M
                RETURN_ON_ERROR(ret);
1143
47.9M
                RETURN_SUCCESS;
1144
47.9M
            }
1145
109k
            RETURN_FAILURE;
1146
1147
209M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
209M
            ctx->u.rep = state->repeat;
1155
209M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
209M
            state->ptr = ptr;
1159
1160
209M
            ctx->count = ctx->u.rep->count+1;
1161
1162
209M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
209M
                   ptr, ctx->count));
1164
1165
209M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
145k
                ctx->u.rep->count = ctx->count;
1168
145k
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
145k
                        ctx->u.rep->pattern+3);
1170
145k
                if (ret) {
1171
129k
                    RETURN_ON_ERROR(ret);
1172
129k
                    RETURN_SUCCESS;
1173
129k
                }
1174
15.9k
                ctx->u.rep->count = ctx->count-1;
1175
15.9k
                state->ptr = ptr;
1176
15.9k
                RETURN_FAILURE;
1177
15.9k
            }
1178
1179
209M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
15.5M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
193M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
193M
                ctx->u.rep->count = ctx->count;
1185
193M
                LASTMARK_SAVE();
1186
193M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
193M
                LAST_PTR_PUSH();
1189
193M
                ctx->u.rep->last_ptr = state->ptr;
1190
193M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
193M
                        ctx->u.rep->pattern+3);
1192
193M
                LAST_PTR_POP();
1193
193M
                if (ret) {
1194
160M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
160M
                    RETURN_ON_ERROR(ret);
1196
160M
                    RETURN_SUCCESS;
1197
160M
                }
1198
32.8M
                MARK_POP(ctx->lastmark);
1199
32.8M
                LASTMARK_RESTORE();
1200
32.8M
                ctx->u.rep->count = ctx->count-1;
1201
32.8M
                state->ptr = ptr;
1202
32.8M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
48.3M
            state->repeat = ctx->u.rep->prev;
1207
48.3M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
48.3M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
48.3M
            RETURN_ON_SUCCESS(ret);
1211
392k
            state->ptr = ptr;
1212
392k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
25.7M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
25.7M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
25.7M
                   ptr, pattern[1]));
1565
25.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
25.7M
            state->ptr = ptr - pattern[1];
1568
25.7M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
25.7M
            RETURN_ON_FAILURE(ret);
1570
20.1M
            pattern += pattern[0];
1571
20.1M
            DISPATCH;
1572
1573
89.1M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
89.1M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
89.1M
                   ptr, pattern[1]));
1578
89.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
89.1M
                state->ptr = ptr - pattern[1];
1580
89.1M
                LASTMARK_SAVE();
1581
89.1M
                if (state->repeat)
1582
89.1M
                    MARK_PUSH(ctx->lastmark);
1583
1584
178M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
178M
                if (ret) {
1586
2.25M
                    if (state->repeat)
1587
2.25M
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.25M
                    RETURN_ON_ERROR(ret);
1589
2.25M
                    RETURN_FAILURE;
1590
2.25M
                }
1591
86.8M
                if (state->repeat)
1592
86.8M
                    MARK_POP(ctx->lastmark);
1593
86.8M
                LASTMARK_RESTORE();
1594
86.8M
            }
1595
86.8M
            pattern += pattern[0];
1596
86.8M
            DISPATCH;
1597
1598
86.8M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.15G
exit:
1620
1.15G
    ctx_pos = ctx->last_ctx_pos;
1621
1.15G
    jump = ctx->jump;
1622
1.15G
    DATA_POP_DISCARD(ctx);
1623
1.15G
    if (ctx_pos == -1) {
1624
471M
        state->sigcount = sigcount;
1625
471M
        return ret;
1626
471M
    }
1627
683M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
683M
    switch (jump) {
1630
193M
        case JUMP_MAX_UNTIL_2:
1631
193M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
193M
            goto jump_max_until_2;
1633
48.3M
        case JUMP_MAX_UNTIL_3:
1634
48.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
48.3M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
108M
        case JUMP_BRANCH:
1643
108M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
108M
            goto jump_branch;
1645
145k
        case JUMP_MAX_UNTIL_1:
1646
145k
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
145k
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
48.1M
        case JUMP_REPEAT:
1658
48.1M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
48.1M
            goto jump_repeat;
1660
12.4M
        case JUMP_REPEAT_ONE_1:
1661
12.4M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
12.4M
            goto jump_repeat_one_1;
1663
158M
        case JUMP_REPEAT_ONE_2:
1664
158M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
158M
            goto jump_repeat_one_2;
1666
832
        case JUMP_MIN_REPEAT_ONE:
1667
832
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
832
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
25.7M
        case JUMP_ASSERT:
1673
25.7M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
25.7M
            goto jump_assert;
1675
89.1M
        case JUMP_ASSERT_NOT:
1676
89.1M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
89.1M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
683M
    }
1683
1684
0
    return ret; /* should never get here */
1685
683M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
207M
{
601
207M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
207M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
207M
    Py_ssize_t ret = 0;
604
207M
    int jump;
605
207M
    unsigned int sigcount = state->sigcount;
606
607
207M
    SRE(match_context)* ctx;
608
207M
    SRE(match_context)* nextctx;
609
207M
    INIT_TRACE(state);
610
611
207M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
207M
    DATA_ALLOC(SRE(match_context), ctx);
614
207M
    ctx->last_ctx_pos = -1;
615
207M
    ctx->jump = JUMP_NONE;
616
207M
    ctx->toplevel = toplevel;
617
207M
    ctx_pos = alloc_pos;
618
619
207M
#if USE_COMPUTED_GOTOS
620
207M
#include "sre_targets.h"
621
207M
#endif
622
623
449M
entrance:
624
625
449M
    ;  // Fashion statement.
626
449M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
449M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
36.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.50M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.50M
                   end - ptr, (size_t) pattern[3]));
634
3.50M
            RETURN_FAILURE;
635
3.50M
        }
636
32.9M
        pattern += pattern[1] + 1;
637
32.9M
    }
638
639
445M
#if USE_COMPUTED_GOTOS
640
445M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
445M
    {
647
648
445M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
190M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
190M
                   ptr, pattern[0]));
653
190M
            {
654
190M
                int i = pattern[0];
655
190M
                if (i & 1)
656
32.4M
                    state->lastindex = i/2 + 1;
657
190M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
186M
                    int j = state->lastmark + 1;
663
195M
                    while (j < i)
664
8.91M
                        state->mark[j++] = NULL;
665
186M
                    state->lastmark = i;
666
186M
                }
667
190M
                state->mark[i] = ptr;
668
190M
            }
669
190M
            pattern++;
670
190M
            DISPATCH;
671
672
190M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
83.2M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
83.2M
                   ptr, *pattern));
677
83.2M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
34.7M
                RETURN_FAILURE;
679
48.5M
            pattern++;
680
48.5M
            ptr++;
681
48.5M
            DISPATCH;
682
683
48.5M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
72.8M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
72.8M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
72.8M
            if (ctx->toplevel &&
698
24.8M
                ((state->match_all && ptr != state->end) ||
699
24.8M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
72.8M
            state->ptr = ptr;
704
72.8M
            RETURN_SUCCESS;
705
706
17.2M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
17.2M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
17.2M
            if (!SRE(at)(state, ptr, *pattern))
711
2.88M
                RETURN_FAILURE;
712
14.3M
            pattern++;
713
14.3M
            DISPATCH;
714
715
14.3M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
100M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
100M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
100M
            if (ptr >= end ||
749
99.9M
                !SRE(charset)(state, pattern + 1, *ptr))
750
15.7M
                RETURN_FAILURE;
751
85.0M
            pattern += pattern[0];
752
85.0M
            ptr++;
753
85.0M
            DISPATCH;
754
755
85.0M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
562k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
562k
                   pattern, ptr, pattern[0]));
758
562k
            if (ptr >= end ||
759
562k
                sre_lower_ascii(*ptr) != *pattern)
760
5.11k
                RETURN_FAILURE;
761
557k
            pattern++;
762
557k
            ptr++;
763
557k
            DISPATCH;
764
765
557k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
28
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
28
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
28
            if (ptr >= end
828
20
                || !SRE(charset)(state, pattern+1,
829
20
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
12
            pattern += pattern[0];
832
12
            ptr++;
833
12
            DISPATCH;
834
835
12
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
35.1M
        TARGET(SRE_OP_JUMP):
845
35.1M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
35.1M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
35.1M
                   ptr, pattern[0]));
850
35.1M
            pattern += pattern[0];
851
35.1M
            DISPATCH;
852
853
45.7M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
45.7M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
45.7M
            LASTMARK_SAVE();
858
45.7M
            if (state->repeat)
859
14.9M
                MARK_PUSH(ctx->lastmark);
860
108M
            for (; pattern[0]; pattern += pattern[0]) {
861
96.3M
                if (pattern[1] == SRE_OP_LITERAL &&
862
66.6M
                    (ptr >= end ||
863
66.4M
                     (SRE_CODE) *ptr != pattern[2]))
864
27.4M
                    continue;
865
68.8M
                if (pattern[1] == SRE_OP_IN &&
866
14.7M
                    (ptr >= end ||
867
14.6M
                     !SRE(charset)(state, pattern + 3,
868
14.6M
                                   (SRE_CODE) *ptr)))
869
7.26M
                    continue;
870
61.6M
                state->ptr = ptr;
871
61.6M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
61.6M
                if (ret) {
873
33.5M
                    if (state->repeat)
874
14.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
33.5M
                    RETURN_ON_ERROR(ret);
876
33.5M
                    RETURN_SUCCESS;
877
33.5M
                }
878
28.0M
                if (state->repeat)
879
5.67k
                    MARK_POP_KEEP(ctx->lastmark);
880
28.0M
                LASTMARK_RESTORE();
881
28.0M
            }
882
12.2M
            if (state->repeat)
883
655k
                MARK_POP_DISCARD(ctx->lastmark);
884
12.2M
            RETURN_FAILURE;
885
886
188M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
188M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
188M
                   pattern[1], pattern[2]));
898
899
188M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.07M
                RETURN_FAILURE; /* cannot match */
901
902
187M
            state->ptr = ptr;
903
904
187M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
187M
            RETURN_ON_ERROR(ret);
906
187M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
187M
            ctx->count = ret;
908
187M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
187M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
127M
                RETURN_FAILURE;
917
918
59.8M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
601k
                ptr == state->end &&
920
59.6k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
59.6k
            {
922
                /* tail is empty.  we're finished */
923
59.6k
                state->ptr = ptr;
924
59.6k
                RETURN_SUCCESS;
925
59.6k
            }
926
927
59.8M
            LASTMARK_SAVE();
928
59.8M
            if (state->repeat)
929
42.4M
                MARK_PUSH(ctx->lastmark);
930
931
59.8M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.23M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.23M
                for (;;) {
936
16.0M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
14.1M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
9.86M
                        ptr--;
939
9.86M
                        ctx->count--;
940
9.86M
                    }
941
6.23M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.97M
                        break;
943
4.26M
                    state->ptr = ptr;
944
4.26M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.26M
                            pattern+pattern[0]);
946
4.26M
                    if (ret) {
947
4.26M
                        if (state->repeat)
948
2.98M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.26M
                        RETURN_ON_ERROR(ret);
950
4.26M
                        RETURN_SUCCESS;
951
4.26M
                    }
952
225
                    if (state->repeat)
953
209
                        MARK_POP_KEEP(ctx->lastmark);
954
225
                    LASTMARK_RESTORE();
955
956
225
                    ptr--;
957
225
                    ctx->count--;
958
225
                }
959
1.97M
                if (state->repeat)
960
777k
                    MARK_POP_DISCARD(ctx->lastmark);
961
53.5M
            } else {
962
                /* general case */
963
61.7M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
60.0M
                    state->ptr = ptr;
965
60.0M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
60.0M
                            pattern+pattern[0]);
967
60.0M
                    if (ret) {
968
51.8M
                        if (state->repeat)
969
37.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
51.8M
                        RETURN_ON_ERROR(ret);
971
51.8M
                        RETURN_SUCCESS;
972
51.8M
                    }
973
8.17M
                    if (state->repeat)
974
1.33M
                        MARK_POP_KEEP(ctx->lastmark);
975
8.17M
                    LASTMARK_RESTORE();
976
977
8.17M
                    ptr--;
978
8.17M
                    ctx->count--;
979
8.17M
                }
980
1.69M
                if (state->repeat)
981
1.18M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.69M
            }
983
3.66M
            RETURN_FAILURE;
984
985
16
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
16
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
16
                   pattern[1], pattern[2]));
997
998
16
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
16
            state->ptr = ptr;
1002
1003
16
            if (pattern[1] == 0)
1004
16
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
16
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
16
            } else {
1028
                /* general case */
1029
16
                LASTMARK_SAVE();
1030
16
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
832
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
832
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
832
                    state->ptr = ptr;
1036
832
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
832
                            pattern+pattern[0]);
1038
832
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
832
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
832
                    LASTMARK_RESTORE();
1047
1048
832
                    state->ptr = ptr;
1049
832
                    ret = SRE(count)(state, pattern+3, 1);
1050
832
                    RETURN_ON_ERROR(ret);
1051
832
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
832
                    if (ret == 0)
1053
16
                        break;
1054
832
                    assert(ret == 1);
1055
816
                    ptr++;
1056
816
                    ctx->count++;
1057
816
                }
1058
16
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
16
            }
1061
16
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
16.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
16.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
16.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
16.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
16.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
16.8M
            ctx->u.rep->count = -1;
1131
16.8M
            ctx->u.rep->pattern = pattern;
1132
16.8M
            ctx->u.rep->prev = state->repeat;
1133
16.8M
            ctx->u.rep->last_ptr = NULL;
1134
16.8M
            state->repeat = ctx->u.rep;
1135
1136
16.8M
            state->ptr = ptr;
1137
16.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
16.8M
            state->repeat = ctx->u.rep->prev;
1139
16.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
16.8M
            if (ret) {
1142
16.7M
                RETURN_ON_ERROR(ret);
1143
16.7M
                RETURN_SUCCESS;
1144
16.7M
            }
1145
105k
            RETURN_FAILURE;
1146
1147
66.2M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
66.2M
            ctx->u.rep = state->repeat;
1155
66.2M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
66.2M
            state->ptr = ptr;
1159
1160
66.2M
            ctx->count = ctx->u.rep->count+1;
1161
1162
66.2M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
66.2M
                   ptr, ctx->count));
1164
1165
66.2M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
16.7k
                ctx->u.rep->count = ctx->count;
1168
16.7k
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
16.7k
                        ctx->u.rep->pattern+3);
1170
16.7k
                if (ret) {
1171
3.61k
                    RETURN_ON_ERROR(ret);
1172
3.61k
                    RETURN_SUCCESS;
1173
3.61k
                }
1174
13.1k
                ctx->u.rep->count = ctx->count-1;
1175
13.1k
                state->ptr = ptr;
1176
13.1k
                RETURN_FAILURE;
1177
13.1k
            }
1178
1179
66.2M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
7.95M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
58.3M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
58.3M
                ctx->u.rep->count = ctx->count;
1185
58.3M
                LASTMARK_SAVE();
1186
58.3M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
58.3M
                LAST_PTR_PUSH();
1189
58.3M
                ctx->u.rep->last_ptr = state->ptr;
1190
58.3M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
58.3M
                        ctx->u.rep->pattern+3);
1192
58.3M
                LAST_PTR_POP();
1193
58.3M
                if (ret) {
1194
49.2M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
49.2M
                    RETURN_ON_ERROR(ret);
1196
49.2M
                    RETURN_SUCCESS;
1197
49.2M
                }
1198
9.03M
                MARK_POP(ctx->lastmark);
1199
9.03M
                LASTMARK_RESTORE();
1200
9.03M
                ctx->u.rep->count = ctx->count-1;
1201
9.03M
                state->ptr = ptr;
1202
9.03M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
16.9M
            state->repeat = ctx->u.rep->prev;
1207
16.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
16.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
16.9M
            RETURN_ON_SUCCESS(ret);
1211
284k
            state->ptr = ptr;
1212
284k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.75M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.75M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.75M
                   ptr, pattern[1]));
1565
3.75M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.75M
            state->ptr = ptr - pattern[1];
1568
3.75M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.75M
            RETURN_ON_FAILURE(ret);
1570
3.52M
            pattern += pattern[0];
1571
3.52M
            DISPATCH;
1572
1573
19.9M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
19.9M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
19.9M
                   ptr, pattern[1]));
1578
19.9M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
19.9M
                state->ptr = ptr - pattern[1];
1580
19.9M
                LASTMARK_SAVE();
1581
19.9M
                if (state->repeat)
1582
19.9M
                    MARK_PUSH(ctx->lastmark);
1583
1584
39.8M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
39.8M
                if (ret) {
1586
2.16M
                    if (state->repeat)
1587
2.16M
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.16M
                    RETURN_ON_ERROR(ret);
1589
2.16M
                    RETURN_FAILURE;
1590
2.16M
                }
1591
17.7M
                if (state->repeat)
1592
17.7M
                    MARK_POP(ctx->lastmark);
1593
17.7M
                LASTMARK_RESTORE();
1594
17.7M
            }
1595
17.7M
            pattern += pattern[0];
1596
17.7M
            DISPATCH;
1597
1598
17.7M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
449M
exit:
1620
449M
    ctx_pos = ctx->last_ctx_pos;
1621
449M
    jump = ctx->jump;
1622
449M
    DATA_POP_DISCARD(ctx);
1623
449M
    if (ctx_pos == -1) {
1624
207M
        state->sigcount = sigcount;
1625
207M
        return ret;
1626
207M
    }
1627
241M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
241M
    switch (jump) {
1630
58.3M
        case JUMP_MAX_UNTIL_2:
1631
58.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
58.3M
            goto jump_max_until_2;
1633
16.9M
        case JUMP_MAX_UNTIL_3:
1634
16.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
16.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
61.6M
        case JUMP_BRANCH:
1643
61.6M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
61.6M
            goto jump_branch;
1645
16.7k
        case JUMP_MAX_UNTIL_1:
1646
16.7k
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
16.7k
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
16.8M
        case JUMP_REPEAT:
1658
16.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
16.8M
            goto jump_repeat;
1660
4.26M
        case JUMP_REPEAT_ONE_1:
1661
4.26M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.26M
            goto jump_repeat_one_1;
1663
60.0M
        case JUMP_REPEAT_ONE_2:
1664
60.0M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
60.0M
            goto jump_repeat_one_2;
1666
832
        case JUMP_MIN_REPEAT_ONE:
1667
832
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
832
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.75M
        case JUMP_ASSERT:
1673
3.75M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.75M
            goto jump_assert;
1675
19.9M
        case JUMP_ASSERT_NOT:
1676
19.9M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
19.9M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
241M
    }
1683
1684
0
    return ret; /* should never get here */
1685
241M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
211M
{
601
211M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
211M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
211M
    Py_ssize_t ret = 0;
604
211M
    int jump;
605
211M
    unsigned int sigcount = state->sigcount;
606
607
211M
    SRE(match_context)* ctx;
608
211M
    SRE(match_context)* nextctx;
609
211M
    INIT_TRACE(state);
610
611
211M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
211M
    DATA_ALLOC(SRE(match_context), ctx);
614
211M
    ctx->last_ctx_pos = -1;
615
211M
    ctx->jump = JUMP_NONE;
616
211M
    ctx->toplevel = toplevel;
617
211M
    ctx_pos = alloc_pos;
618
619
211M
#if USE_COMPUTED_GOTOS
620
211M
#include "sre_targets.h"
621
211M
#endif
622
623
455M
entrance:
624
625
455M
    ;  // Fashion statement.
626
455M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
455M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
15.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
110k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
110k
                   end - ptr, (size_t) pattern[3]));
634
110k
            RETURN_FAILURE;
635
110k
        }
636
15.6M
        pattern += pattern[1] + 1;
637
15.6M
    }
638
639
455M
#if USE_COMPUTED_GOTOS
640
455M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
455M
    {
647
648
455M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
178M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
178M
                   ptr, pattern[0]));
653
178M
            {
654
178M
                int i = pattern[0];
655
178M
                if (i & 1)
656
17.4M
                    state->lastindex = i/2 + 1;
657
178M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
177M
                    int j = state->lastmark + 1;
663
181M
                    while (j < i)
664
3.22M
                        state->mark[j++] = NULL;
665
177M
                    state->lastmark = i;
666
177M
                }
667
178M
                state->mark[i] = ptr;
668
178M
            }
669
178M
            pattern++;
670
178M
            DISPATCH;
671
672
178M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
32.6M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
32.6M
                   ptr, *pattern));
677
32.6M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
16.7M
                RETURN_FAILURE;
679
15.9M
            pattern++;
680
15.9M
            ptr++;
681
15.9M
            DISPATCH;
682
683
15.9M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
63.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
63.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
63.9M
            if (ctx->toplevel &&
698
10.6M
                ((state->match_all && ptr != state->end) ||
699
10.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
63.9M
            state->ptr = ptr;
704
63.9M
            RETURN_SUCCESS;
705
706
4.15M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
4.15M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
4.15M
            if (!SRE(at)(state, ptr, *pattern))
711
1.45M
                RETURN_FAILURE;
712
2.70M
            pattern++;
713
2.70M
            DISPATCH;
714
715
2.70M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
174M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
174M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
174M
            if (ptr >= end ||
749
171M
                !SRE(charset)(state, pattern + 1, *ptr))
750
56.1M
                RETURN_FAILURE;
751
118M
            pattern += pattern[0];
752
118M
            ptr++;
753
118M
            DISPATCH;
754
755
118M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.73M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.73M
                   pattern, ptr, pattern[0]));
758
4.73M
            if (ptr >= end ||
759
4.73M
                sre_lower_ascii(*ptr) != *pattern)
760
25.6k
                RETURN_FAILURE;
761
4.70M
            pattern++;
762
4.70M
            ptr++;
763
4.70M
            DISPATCH;
764
765
4.70M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
17.5M
        TARGET(SRE_OP_JUMP):
845
17.5M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
17.5M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
17.5M
                   ptr, pattern[0]));
850
17.5M
            pattern += pattern[0];
851
17.5M
            DISPATCH;
852
853
22.6M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
22.6M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
22.6M
            LASTMARK_SAVE();
858
22.6M
            if (state->repeat)
859
17.1M
                MARK_PUSH(ctx->lastmark);
860
47.0M
            for (; pattern[0]; pattern += pattern[0]) {
861
41.5M
                if (pattern[1] == SRE_OP_LITERAL &&
862
19.4M
                    (ptr >= end ||
863
19.4M
                     (SRE_CODE) *ptr != pattern[2]))
864
11.4M
                    continue;
865
30.1M
                if (pattern[1] == SRE_OP_IN &&
866
15.0M
                    (ptr >= end ||
867
15.0M
                     !SRE(charset)(state, pattern + 3,
868
15.0M
                                   (SRE_CODE) *ptr)))
869
8.53M
                    continue;
870
21.6M
                state->ptr = ptr;
871
21.6M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
21.6M
                if (ret) {
873
17.2M
                    if (state->repeat)
874
13.9M
                        MARK_POP_DISCARD(ctx->lastmark);
875
17.2M
                    RETURN_ON_ERROR(ret);
876
17.2M
                    RETURN_SUCCESS;
877
17.2M
                }
878
4.43M
                if (state->repeat)
879
5.14k
                    MARK_POP_KEEP(ctx->lastmark);
880
4.43M
                LASTMARK_RESTORE();
881
4.43M
            }
882
5.45M
            if (state->repeat)
883
3.21M
                MARK_POP_DISCARD(ctx->lastmark);
884
5.45M
            RETURN_FAILURE;
885
886
191M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
191M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
191M
                   pattern[1], pattern[2]));
898
899
191M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.32M
                RETURN_FAILURE; /* cannot match */
901
902
190M
            state->ptr = ptr;
903
904
190M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
190M
            RETURN_ON_ERROR(ret);
906
190M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
190M
            ctx->count = ret;
908
190M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
190M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
145M
                RETURN_FAILURE;
917
918
45.5M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
4.51M
                ptr == state->end &&
920
19.7k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
19.7k
            {
922
                /* tail is empty.  we're finished */
923
19.7k
                state->ptr = ptr;
924
19.7k
                RETURN_SUCCESS;
925
19.7k
            }
926
927
45.5M
            LASTMARK_SAVE();
928
45.5M
            if (state->repeat)
929
25.4M
                MARK_PUSH(ctx->lastmark);
930
931
45.5M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
4.93M
                ctx->u.chr = pattern[pattern[0]+1];
935
4.93M
                for (;;) {
936
11.9M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
9.72M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
7.06M
                        ptr--;
939
7.06M
                        ctx->count--;
940
7.06M
                    }
941
4.93M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.27M
                        break;
943
2.66M
                    state->ptr = ptr;
944
2.66M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.66M
                            pattern+pattern[0]);
946
2.66M
                    if (ret) {
947
2.66M
                        if (state->repeat)
948
2.62M
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.66M
                        RETURN_ON_ERROR(ret);
950
2.66M
                        RETURN_SUCCESS;
951
2.66M
                    }
952
322
                    if (state->repeat)
953
322
                        MARK_POP_KEEP(ctx->lastmark);
954
322
                    LASTMARK_RESTORE();
955
956
322
                    ptr--;
957
322
                    ctx->count--;
958
322
                }
959
2.27M
                if (state->repeat)
960
2.26M
                    MARK_POP_DISCARD(ctx->lastmark);
961
40.5M
            } else {
962
                /* general case */
963
48.0M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
47.8M
                    state->ptr = ptr;
965
47.8M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
47.8M
                            pattern+pattern[0]);
967
47.8M
                    if (ret) {
968
40.3M
                        if (state->repeat)
969
20.3M
                            MARK_POP_DISCARD(ctx->lastmark);
970
40.3M
                        RETURN_ON_ERROR(ret);
971
40.3M
                        RETURN_SUCCESS;
972
40.3M
                    }
973
7.44M
                    if (state->repeat)
974
232k
                        MARK_POP_KEEP(ctx->lastmark);
975
7.44M
                    LASTMARK_RESTORE();
976
977
7.44M
                    ptr--;
978
7.44M
                    ctx->count--;
979
7.44M
                }
980
197k
                if (state->repeat)
981
158k
                    MARK_POP_DISCARD(ctx->lastmark);
982
197k
            }
983
2.47M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
14.1M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
14.1M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
14.1M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
14.1M
            ctx->u.rep = repeat_pool_malloc(state);
1127
14.1M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
14.1M
            ctx->u.rep->count = -1;
1131
14.1M
            ctx->u.rep->pattern = pattern;
1132
14.1M
            ctx->u.rep->prev = state->repeat;
1133
14.1M
            ctx->u.rep->last_ptr = NULL;
1134
14.1M
            state->repeat = ctx->u.rep;
1135
1136
14.1M
            state->ptr = ptr;
1137
14.1M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
14.1M
            state->repeat = ctx->u.rep->prev;
1139
14.1M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
14.1M
            if (ret) {
1142
14.1M
                RETURN_ON_ERROR(ret);
1143
14.1M
                RETURN_SUCCESS;
1144
14.1M
            }
1145
3.74k
            RETURN_FAILURE;
1146
1147
84.1M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
84.1M
            ctx->u.rep = state->repeat;
1155
84.1M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
84.1M
            state->ptr = ptr;
1159
1160
84.1M
            ctx->count = ctx->u.rep->count+1;
1161
1162
84.1M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
84.1M
                   ptr, ctx->count));
1164
1165
84.1M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
125k
                ctx->u.rep->count = ctx->count;
1168
125k
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
125k
                        ctx->u.rep->pattern+3);
1170
125k
                if (ret) {
1171
123k
                    RETURN_ON_ERROR(ret);
1172
123k
                    RETURN_SUCCESS;
1173
123k
                }
1174
2.72k
                ctx->u.rep->count = ctx->count-1;
1175
2.72k
                state->ptr = ptr;
1176
2.72k
                RETURN_FAILURE;
1177
2.72k
            }
1178
1179
84.0M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
2.59M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
81.4M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
81.4M
                ctx->u.rep->count = ctx->count;
1185
81.4M
                LASTMARK_SAVE();
1186
81.4M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
81.4M
                LAST_PTR_PUSH();
1189
81.4M
                ctx->u.rep->last_ptr = state->ptr;
1190
81.4M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
81.4M
                        ctx->u.rep->pattern+3);
1192
81.4M
                LAST_PTR_POP();
1193
81.4M
                if (ret) {
1194
69.8M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
69.8M
                    RETURN_ON_ERROR(ret);
1196
69.8M
                    RETURN_SUCCESS;
1197
69.8M
                }
1198
11.6M
                MARK_POP(ctx->lastmark);
1199
11.6M
                LASTMARK_RESTORE();
1200
11.6M
                ctx->u.rep->count = ctx->count-1;
1201
11.6M
                state->ptr = ptr;
1202
11.6M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
14.2M
            state->repeat = ctx->u.rep->prev;
1207
14.2M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
14.2M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
14.2M
            RETURN_ON_SUCCESS(ret);
1211
73.1k
            state->ptr = ptr;
1212
73.1k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
9.24M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
9.24M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
9.24M
                   ptr, pattern[1]));
1565
9.24M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
9.24M
            state->ptr = ptr - pattern[1];
1568
9.24M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
9.24M
            RETURN_ON_FAILURE(ret);
1570
5.33M
            pattern += pattern[0];
1571
5.33M
            DISPATCH;
1572
1573
52.7M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
52.7M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
52.7M
                   ptr, pattern[1]));
1578
52.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
52.7M
                state->ptr = ptr - pattern[1];
1580
52.7M
                LASTMARK_SAVE();
1581
52.7M
                if (state->repeat)
1582
52.7M
                    MARK_PUSH(ctx->lastmark);
1583
1584
105M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
105M
                if (ret) {
1586
86.6k
                    if (state->repeat)
1587
86.6k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
86.6k
                    RETURN_ON_ERROR(ret);
1589
86.6k
                    RETURN_FAILURE;
1590
86.6k
                }
1591
52.6M
                if (state->repeat)
1592
52.6M
                    MARK_POP(ctx->lastmark);
1593
52.6M
                LASTMARK_RESTORE();
1594
52.6M
            }
1595
52.6M
            pattern += pattern[0];
1596
52.6M
            DISPATCH;
1597
1598
52.6M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
455M
exit:
1620
455M
    ctx_pos = ctx->last_ctx_pos;
1621
455M
    jump = ctx->jump;
1622
455M
    DATA_POP_DISCARD(ctx);
1623
455M
    if (ctx_pos == -1) {
1624
211M
        state->sigcount = sigcount;
1625
211M
        return ret;
1626
211M
    }
1627
244M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
244M
    switch (jump) {
1630
81.4M
        case JUMP_MAX_UNTIL_2:
1631
81.4M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
81.4M
            goto jump_max_until_2;
1633
14.2M
        case JUMP_MAX_UNTIL_3:
1634
14.2M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
14.2M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
21.6M
        case JUMP_BRANCH:
1643
21.6M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
21.6M
            goto jump_branch;
1645
125k
        case JUMP_MAX_UNTIL_1:
1646
125k
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
125k
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
14.1M
        case JUMP_REPEAT:
1658
14.1M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
14.1M
            goto jump_repeat;
1660
2.66M
        case JUMP_REPEAT_ONE_1:
1661
2.66M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.66M
            goto jump_repeat_one_1;
1663
47.8M
        case JUMP_REPEAT_ONE_2:
1664
47.8M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
47.8M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
9.24M
        case JUMP_ASSERT:
1673
9.24M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
9.24M
            goto jump_assert;
1675
52.7M
        case JUMP_ASSERT_NOT:
1676
52.7M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
52.7M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
244M
    }
1683
1684
0
    return ret; /* should never get here */
1685
244M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
52.2M
{
601
52.2M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
52.2M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
52.2M
    Py_ssize_t ret = 0;
604
52.2M
    int jump;
605
52.2M
    unsigned int sigcount = state->sigcount;
606
607
52.2M
    SRE(match_context)* ctx;
608
52.2M
    SRE(match_context)* nextctx;
609
52.2M
    INIT_TRACE(state);
610
611
52.2M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
52.2M
    DATA_ALLOC(SRE(match_context), ctx);
614
52.2M
    ctx->last_ctx_pos = -1;
615
52.2M
    ctx->jump = JUMP_NONE;
616
52.2M
    ctx->toplevel = toplevel;
617
52.2M
    ctx_pos = alloc_pos;
618
619
52.2M
#if USE_COMPUTED_GOTOS
620
52.2M
#include "sre_targets.h"
621
52.2M
#endif
622
623
250M
entrance:
624
625
250M
    ;  // Fashion statement.
626
250M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
250M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
9.66M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.81k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.81k
                   end - ptr, (size_t) pattern[3]));
634
3.81k
            RETURN_FAILURE;
635
3.81k
        }
636
9.65M
        pattern += pattern[1] + 1;
637
9.65M
    }
638
639
250M
#if USE_COMPUTED_GOTOS
640
250M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
250M
    {
647
648
250M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
68.0M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
68.0M
                   ptr, pattern[0]));
653
68.0M
            {
654
68.0M
                int i = pattern[0];
655
68.0M
                if (i & 1)
656
18.7M
                    state->lastindex = i/2 + 1;
657
68.0M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
66.1M
                    int j = state->lastmark + 1;
663
68.0M
                    while (j < i)
664
1.89M
                        state->mark[j++] = NULL;
665
66.1M
                    state->lastmark = i;
666
66.1M
                }
667
68.0M
                state->mark[i] = ptr;
668
68.0M
            }
669
68.0M
            pattern++;
670
68.0M
            DISPATCH;
671
672
68.0M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
31.2M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
31.2M
                   ptr, *pattern));
677
31.2M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
14.0M
                RETURN_FAILURE;
679
17.2M
            pattern++;
680
17.2M
            ptr++;
681
17.2M
            DISPATCH;
682
683
17.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
25.4M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
25.4M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
25.4M
            if (ctx->toplevel &&
698
7.07M
                ((state->match_all && ptr != state->end) ||
699
7.07M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
25.4M
            state->ptr = ptr;
704
25.4M
            RETURN_SUCCESS;
705
706
1.08M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
1.08M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
1.08M
            if (!SRE(at)(state, ptr, *pattern))
711
1.05M
                RETURN_FAILURE;
712
28.3k
            pattern++;
713
28.3k
            DISPATCH;
714
715
28.3k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
65.2M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
65.2M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
65.2M
            if (ptr >= end ||
749
65.2M
                !SRE(charset)(state, pattern + 1, *ptr))
750
12.9M
                RETURN_FAILURE;
751
52.3M
            pattern += pattern[0];
752
52.3M
            ptr++;
753
52.3M
            DISPATCH;
754
755
52.3M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.35M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.35M
                   pattern, ptr, pattern[0]));
758
2.35M
            if (ptr >= end ||
759
2.35M
                sre_lower_ascii(*ptr) != *pattern)
760
16.9k
                RETURN_FAILURE;
761
2.34M
            pattern++;
762
2.34M
            ptr++;
763
2.34M
            DISPATCH;
764
765
2.34M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
24.5M
        TARGET(SRE_OP_JUMP):
845
24.5M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
24.5M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
24.5M
                   ptr, pattern[0]));
850
24.5M
            pattern += pattern[0];
851
24.5M
            DISPATCH;
852
853
29.8M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
29.8M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
29.8M
            LASTMARK_SAVE();
858
29.8M
            if (state->repeat)
859
26.9M
                MARK_PUSH(ctx->lastmark);
860
62.6M
            for (; pattern[0]; pattern += pattern[0]) {
861
56.8M
                if (pattern[1] == SRE_OP_LITERAL &&
862
28.2M
                    (ptr >= end ||
863
28.2M
                     (SRE_CODE) *ptr != pattern[2]))
864
19.6M
                    continue;
865
37.1M
                if (pattern[1] == SRE_OP_IN &&
866
22.1M
                    (ptr >= end ||
867
22.1M
                     !SRE(charset)(state, pattern + 3,
868
22.1M
                                   (SRE_CODE) *ptr)))
869
12.3M
                    continue;
870
24.7M
                state->ptr = ptr;
871
24.7M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
24.7M
                if (ret) {
873
24.0M
                    if (state->repeat)
874
21.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
24.0M
                    RETURN_ON_ERROR(ret);
876
24.0M
                    RETURN_SUCCESS;
877
24.0M
                }
878
717k
                if (state->repeat)
879
4.52k
                    MARK_POP_KEEP(ctx->lastmark);
880
717k
                LASTMARK_RESTORE();
881
717k
            }
882
5.83M
            if (state->repeat)
883
5.22M
                MARK_POP_DISCARD(ctx->lastmark);
884
5.83M
            RETURN_FAILURE;
885
886
89.2M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
89.2M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
89.2M
                   pattern[1], pattern[2]));
898
899
89.2M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
16.9k
                RETURN_FAILURE; /* cannot match */
901
902
89.2M
            state->ptr = ptr;
903
904
89.2M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
89.2M
            RETURN_ON_ERROR(ret);
906
89.2M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
89.2M
            ctx->count = ret;
908
89.2M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
89.2M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
34.3M
                RETURN_FAILURE;
917
918
54.9M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.08M
                ptr == state->end &&
920
5.44k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
5.44k
            {
922
                /* tail is empty.  we're finished */
923
5.44k
                state->ptr = ptr;
924
5.44k
                RETURN_SUCCESS;
925
5.44k
            }
926
927
54.9M
            LASTMARK_SAVE();
928
54.9M
            if (state->repeat)
929
42.4M
                MARK_PUSH(ctx->lastmark);
930
931
54.9M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
11.6M
                ctx->u.chr = pattern[pattern[0]+1];
935
11.6M
                for (;;) {
936
33.6M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
27.5M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
21.9M
                        ptr--;
939
21.9M
                        ctx->count--;
940
21.9M
                    }
941
11.6M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
6.07M
                        break;
943
5.57M
                    state->ptr = ptr;
944
5.57M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
5.57M
                            pattern+pattern[0]);
946
5.57M
                    if (ret) {
947
5.56M
                        if (state->repeat)
948
5.56M
                            MARK_POP_DISCARD(ctx->lastmark);
949
5.56M
                        RETURN_ON_ERROR(ret);
950
5.56M
                        RETURN_SUCCESS;
951
5.56M
                    }
952
288
                    if (state->repeat)
953
288
                        MARK_POP_KEEP(ctx->lastmark);
954
288
                    LASTMARK_RESTORE();
955
956
288
                    ptr--;
957
288
                    ctx->count--;
958
288
                }
959
6.07M
                if (state->repeat)
960
6.06M
                    MARK_POP_DISCARD(ctx->lastmark);
961
43.2M
            } else {
962
                /* general case */
963
50.2M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
50.1M
                    state->ptr = ptr;
965
50.1M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
50.1M
                            pattern+pattern[0]);
967
50.1M
                    if (ret) {
968
43.2M
                        if (state->repeat)
969
30.7M
                            MARK_POP_DISCARD(ctx->lastmark);
970
43.2M
                        RETURN_ON_ERROR(ret);
971
43.2M
                        RETURN_SUCCESS;
972
43.2M
                    }
973
6.91M
                    if (state->repeat)
974
100k
                        MARK_POP_KEEP(ctx->lastmark);
975
6.91M
                    LASTMARK_RESTORE();
976
977
6.91M
                    ptr--;
978
6.91M
                    ctx->count--;
979
6.91M
                }
980
77.2k
                if (state->repeat)
981
66.2k
                    MARK_POP_DISCARD(ctx->lastmark);
982
77.2k
            }
983
6.14M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
17.1M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
17.1M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
17.1M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
17.1M
            ctx->u.rep = repeat_pool_malloc(state);
1127
17.1M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
17.1M
            ctx->u.rep->count = -1;
1131
17.1M
            ctx->u.rep->pattern = pattern;
1132
17.1M
            ctx->u.rep->prev = state->repeat;
1133
17.1M
            ctx->u.rep->last_ptr = NULL;
1134
17.1M
            state->repeat = ctx->u.rep;
1135
1136
17.1M
            state->ptr = ptr;
1137
17.1M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
17.1M
            state->repeat = ctx->u.rep->prev;
1139
17.1M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
17.1M
            if (ret) {
1142
17.1M
                RETURN_ON_ERROR(ret);
1143
17.1M
                RETURN_SUCCESS;
1144
17.1M
            }
1145
464
            RETURN_FAILURE;
1146
1147
58.9M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
58.9M
            ctx->u.rep = state->repeat;
1155
58.9M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
58.9M
            state->ptr = ptr;
1159
1160
58.9M
            ctx->count = ctx->u.rep->count+1;
1161
1162
58.9M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
58.9M
                   ptr, ctx->count));
1164
1165
58.9M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
2.99k
                ctx->u.rep->count = ctx->count;
1168
2.99k
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
2.99k
                        ctx->u.rep->pattern+3);
1170
2.99k
                if (ret) {
1171
2.94k
                    RETURN_ON_ERROR(ret);
1172
2.94k
                    RETURN_SUCCESS;
1173
2.94k
                }
1174
49
                ctx->u.rep->count = ctx->count-1;
1175
49
                state->ptr = ptr;
1176
49
                RETURN_FAILURE;
1177
49
            }
1178
1179
58.9M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.98M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
53.9M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
53.9M
                ctx->u.rep->count = ctx->count;
1185
53.9M
                LASTMARK_SAVE();
1186
53.9M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
53.9M
                LAST_PTR_PUSH();
1189
53.9M
                ctx->u.rep->last_ptr = state->ptr;
1190
53.9M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
53.9M
                        ctx->u.rep->pattern+3);
1192
53.9M
                LAST_PTR_POP();
1193
53.9M
                if (ret) {
1194
41.7M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
41.7M
                    RETURN_ON_ERROR(ret);
1196
41.7M
                    RETURN_SUCCESS;
1197
41.7M
                }
1198
12.1M
                MARK_POP(ctx->lastmark);
1199
12.1M
                LASTMARK_RESTORE();
1200
12.1M
                ctx->u.rep->count = ctx->count-1;
1201
12.1M
                state->ptr = ptr;
1202
12.1M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
17.1M
            state->repeat = ctx->u.rep->prev;
1207
17.1M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
17.1M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
17.1M
            RETURN_ON_SUCCESS(ret);
1211
34.9k
            state->ptr = ptr;
1212
34.9k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
12.7M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
12.7M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
12.7M
                   ptr, pattern[1]));
1565
12.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
12.7M
            state->ptr = ptr - pattern[1];
1568
12.7M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
12.7M
            RETURN_ON_FAILURE(ret);
1570
11.2M
            pattern += pattern[0];
1571
11.2M
            DISPATCH;
1572
1573
16.4M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
16.4M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
16.4M
                   ptr, pattern[1]));
1578
16.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
16.4M
                state->ptr = ptr - pattern[1];
1580
16.4M
                LASTMARK_SAVE();
1581
16.4M
                if (state->repeat)
1582
16.4M
                    MARK_PUSH(ctx->lastmark);
1583
1584
32.9M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
32.9M
                if (ret) {
1586
9.84k
                    if (state->repeat)
1587
9.84k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
9.84k
                    RETURN_ON_ERROR(ret);
1589
9.84k
                    RETURN_FAILURE;
1590
9.84k
                }
1591
16.4M
                if (state->repeat)
1592
16.4M
                    MARK_POP(ctx->lastmark);
1593
16.4M
                LASTMARK_RESTORE();
1594
16.4M
            }
1595
16.4M
            pattern += pattern[0];
1596
16.4M
            DISPATCH;
1597
1598
16.4M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
250M
exit:
1620
250M
    ctx_pos = ctx->last_ctx_pos;
1621
250M
    jump = ctx->jump;
1622
250M
    DATA_POP_DISCARD(ctx);
1623
250M
    if (ctx_pos == -1) {
1624
52.2M
        state->sigcount = sigcount;
1625
52.2M
        return ret;
1626
52.2M
    }
1627
197M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
197M
    switch (jump) {
1630
53.9M
        case JUMP_MAX_UNTIL_2:
1631
53.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
53.9M
            goto jump_max_until_2;
1633
17.1M
        case JUMP_MAX_UNTIL_3:
1634
17.1M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
17.1M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
24.7M
        case JUMP_BRANCH:
1643
24.7M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
24.7M
            goto jump_branch;
1645
2.99k
        case JUMP_MAX_UNTIL_1:
1646
2.99k
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
2.99k
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
17.1M
        case JUMP_REPEAT:
1658
17.1M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
17.1M
            goto jump_repeat;
1660
5.57M
        case JUMP_REPEAT_ONE_1:
1661
5.57M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
5.57M
            goto jump_repeat_one_1;
1663
50.1M
        case JUMP_REPEAT_ONE_2:
1664
50.1M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
50.1M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
12.7M
        case JUMP_ASSERT:
1673
12.7M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
12.7M
            goto jump_assert;
1675
16.4M
        case JUMP_ASSERT_NOT:
1676
16.4M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
16.4M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
197M
    }
1683
1684
0
    return ret; /* should never get here */
1685
197M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
307M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
111M
{
1694
111M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
111M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
111M
    Py_ssize_t status = 0;
1697
111M
    Py_ssize_t prefix_len = 0;
1698
111M
    Py_ssize_t prefix_skip = 0;
1699
111M
    SRE_CODE* prefix = NULL;
1700
111M
    SRE_CODE* charset = NULL;
1701
111M
    SRE_CODE* overlap = NULL;
1702
111M
    int flags = 0;
1703
111M
    INIT_TRACE(state);
1704
1705
111M
    if (ptr > end)
1706
0
        return 0;
1707
1708
111M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
111M
        flags = pattern[2];
1713
1714
111M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.48M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.48M
                   end - ptr, (size_t) pattern[3]));
1717
5.48M
            return 0;
1718
5.48M
        }
1719
105M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
8.64M
            end -= pattern[3] - 1;
1723
8.64M
            if (end <= ptr)
1724
0
                end = ptr;
1725
8.64M
        }
1726
1727
105M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
8.64M
            prefix_len = pattern[5];
1731
8.64M
            prefix_skip = pattern[6];
1732
8.64M
            prefix = pattern + 7;
1733
8.64M
            overlap = prefix + prefix_len - 1;
1734
97.3M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
88.9M
            charset = pattern + 5;
1738
1739
105M
        pattern += 1 + pattern[1];
1740
105M
    }
1741
1742
105M
    TRACE(("prefix = %p %zd %zd\n",
1743
105M
           prefix, prefix_len, prefix_skip));
1744
105M
    TRACE(("charset = %p\n", charset));
1745
1746
105M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
7.61M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
5.36M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
5.36M
#endif
1753
5.36M
        end = (SRE_CHAR *)state->end;
1754
5.36M
        state->must_advance = 0;
1755
8.20M
        while (ptr < end) {
1756
116M
            while (*ptr != c) {
1757
109M
                if (++ptr >= end)
1758
433k
                    return 0;
1759
109M
            }
1760
7.64M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
7.64M
            state->start = ptr;
1762
7.64M
            state->ptr = ptr + prefix_skip;
1763
7.64M
            if (flags & SRE_INFO_LITERAL)
1764
7.57k
                return 1; /* we got all of it */
1765
7.64M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
7.64M
            if (status != 0)
1767
7.04M
                return status;
1768
591k
            ++ptr;
1769
591k
            RESET_CAPTURE_GROUP();
1770
591k
        }
1771
121k
        return 0;
1772
5.36M
    }
1773
1774
98.3M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
1.03M
        Py_ssize_t i = 0;
1778
1779
1.03M
        end = (SRE_CHAR *)state->end;
1780
1.03M
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
2.31M
        for (i = 0; i < prefix_len; i++)
1784
1.54M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
772k
#endif
1787
1.70M
        while (ptr < end) {
1788
1.70M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
20.9M
            while (*ptr++ != c) {
1790
19.2M
                if (ptr >= end)
1791
372
                    return 0;
1792
19.2M
            }
1793
1.70M
            if (ptr >= end)
1794
61
                return 0;
1795
1796
1.70M
            i = 1;
1797
1.70M
            state->must_advance = 0;
1798
1.70M
            do {
1799
1.70M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.62M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.62M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.62M
                    state->start = ptr - (prefix_len - 1);
1808
1.62M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.62M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.62M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.62M
                    if (status != 0)
1813
1.03M
                        return status;
1814
                    /* close but no cigar -- try again */
1815
592k
                    if (++ptr >= end)
1816
68
                        return 0;
1817
592k
                    RESET_CAPTURE_GROUP();
1818
592k
                }
1819
671k
                i = overlap[i];
1820
671k
            } while (i != 0);
1821
1.70M
        }
1822
0
        return 0;
1823
1.03M
    }
1824
1825
97.3M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
88.9M
        end = (SRE_CHAR *)state->end;
1828
88.9M
        state->must_advance = 0;
1829
91.3M
        for (;;) {
1830
360M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
268M
                ptr++;
1832
91.3M
            if (ptr >= end)
1833
3.35M
                return 0;
1834
87.9M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
87.9M
            state->start = ptr;
1836
87.9M
            state->ptr = ptr;
1837
87.9M
            status = SRE(match)(state, pattern, 0);
1838
87.9M
            if (status != 0)
1839
85.6M
                break;
1840
2.37M
            ptr++;
1841
2.37M
            RESET_CAPTURE_GROUP();
1842
2.37M
        }
1843
88.9M
    } else {
1844
        /* general case */
1845
8.34M
        assert(ptr <= end);
1846
8.34M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
8.34M
        state->start = state->ptr = ptr;
1848
8.34M
        status = SRE(match)(state, pattern, 1);
1849
8.34M
        state->must_advance = 0;
1850
8.34M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.05M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
90
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
4.05M
        {
1854
4.05M
            state->start = state->ptr = ptr = end;
1855
4.05M
            return 0;
1856
4.05M
        }
1857
307M
        while (status == 0 && ptr < end) {
1858
303M
            ptr++;
1859
303M
            RESET_CAPTURE_GROUP();
1860
303M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
303M
            state->start = state->ptr = ptr;
1862
303M
            status = SRE(match)(state, pattern, 0);
1863
303M
        }
1864
4.29M
    }
1865
1866
89.8M
    return status;
1867
97.3M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
53.7M
{
1694
53.7M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
53.7M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
53.7M
    Py_ssize_t status = 0;
1697
53.7M
    Py_ssize_t prefix_len = 0;
1698
53.7M
    Py_ssize_t prefix_skip = 0;
1699
53.7M
    SRE_CODE* prefix = NULL;
1700
53.7M
    SRE_CODE* charset = NULL;
1701
53.7M
    SRE_CODE* overlap = NULL;
1702
53.7M
    int flags = 0;
1703
53.7M
    INIT_TRACE(state);
1704
1705
53.7M
    if (ptr > end)
1706
0
        return 0;
1707
1708
53.7M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
53.7M
        flags = pattern[2];
1713
1714
53.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.38M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.38M
                   end - ptr, (size_t) pattern[3]));
1717
5.38M
            return 0;
1718
5.38M
        }
1719
48.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.26M
            end -= pattern[3] - 1;
1723
2.26M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.26M
        }
1726
1727
48.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.26M
            prefix_len = pattern[5];
1731
2.26M
            prefix_skip = pattern[6];
1732
2.26M
            prefix = pattern + 7;
1733
2.26M
            overlap = prefix + prefix_len - 1;
1734
46.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
40.3M
            charset = pattern + 5;
1738
1739
48.4M
        pattern += 1 + pattern[1];
1740
48.4M
    }
1741
1742
48.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
48.4M
           prefix, prefix_len, prefix_skip));
1744
48.4M
    TRACE(("charset = %p\n", charset));
1745
1746
48.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.16M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.16M
#if SIZEOF_SRE_CHAR < 4
1750
2.16M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.16M
#endif
1753
2.16M
        end = (SRE_CHAR *)state->end;
1754
2.16M
        state->must_advance = 0;
1755
2.53M
        while (ptr < end) {
1756
30.7M
            while (*ptr != c) {
1757
28.6M
                if (++ptr >= end)
1758
351k
                    return 0;
1759
28.6M
            }
1760
2.06M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.06M
            state->start = ptr;
1762
2.06M
            state->ptr = ptr + prefix_skip;
1763
2.06M
            if (flags & SRE_INFO_LITERAL)
1764
612
                return 1; /* we got all of it */
1765
2.06M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.06M
            if (status != 0)
1767
1.69M
                return status;
1768
375k
            ++ptr;
1769
375k
            RESET_CAPTURE_GROUP();
1770
375k
        }
1771
116k
        return 0;
1772
2.16M
    }
1773
1774
46.2M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
100k
        Py_ssize_t i = 0;
1778
1779
100k
        end = (SRE_CHAR *)state->end;
1780
100k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
100k
#if SIZEOF_SRE_CHAR < 4
1783
300k
        for (i = 0; i < prefix_len; i++)
1784
200k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
100k
#endif
1787
176k
        while (ptr < end) {
1788
176k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.21M
            while (*ptr++ != c) {
1790
3.03M
                if (ptr >= end)
1791
74
                    return 0;
1792
3.03M
            }
1793
176k
            if (ptr >= end)
1794
23
                return 0;
1795
1796
175k
            i = 1;
1797
175k
            state->must_advance = 0;
1798
176k
            do {
1799
176k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
166k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
166k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
166k
                    state->start = ptr - (prefix_len - 1);
1808
166k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
166k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
166k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
166k
                    if (status != 0)
1813
99.9k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
66.5k
                    if (++ptr >= end)
1816
28
                        return 0;
1817
66.5k
                    RESET_CAPTURE_GROUP();
1818
66.5k
                }
1819
76.7k
                i = overlap[i];
1820
76.7k
            } while (i != 0);
1821
175k
        }
1822
0
        return 0;
1823
100k
    }
1824
1825
46.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
40.3M
        end = (SRE_CHAR *)state->end;
1828
40.3M
        state->must_advance = 0;
1829
41.9M
        for (;;) {
1830
100M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
58.1M
                ptr++;
1832
41.9M
            if (ptr >= end)
1833
2.35M
                return 0;
1834
39.5M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
39.5M
            state->start = ptr;
1836
39.5M
            state->ptr = ptr;
1837
39.5M
            status = SRE(match)(state, pattern, 0);
1838
39.5M
            if (status != 0)
1839
38.0M
                break;
1840
1.53M
            ptr++;
1841
1.53M
            RESET_CAPTURE_GROUP();
1842
1.53M
        }
1843
40.3M
    } else {
1844
        /* general case */
1845
5.75M
        assert(ptr <= end);
1846
5.75M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
5.75M
        state->start = state->ptr = ptr;
1848
5.75M
        status = SRE(match)(state, pattern, 1);
1849
5.75M
        state->must_advance = 0;
1850
5.75M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
2.63M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
26
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
2.63M
        {
1854
2.63M
            state->start = state->ptr = ptr = end;
1855
2.63M
            return 0;
1856
2.63M
        }
1857
126M
        while (status == 0 && ptr < end) {
1858
123M
            ptr++;
1859
123M
            RESET_CAPTURE_GROUP();
1860
123M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
123M
            state->start = state->ptr = ptr;
1862
123M
            status = SRE(match)(state, pattern, 0);
1863
123M
        }
1864
3.11M
    }
1865
1866
41.1M
    return status;
1867
46.1M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
50.5M
{
1694
50.5M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
50.5M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
50.5M
    Py_ssize_t status = 0;
1697
50.5M
    Py_ssize_t prefix_len = 0;
1698
50.5M
    Py_ssize_t prefix_skip = 0;
1699
50.5M
    SRE_CODE* prefix = NULL;
1700
50.5M
    SRE_CODE* charset = NULL;
1701
50.5M
    SRE_CODE* overlap = NULL;
1702
50.5M
    int flags = 0;
1703
50.5M
    INIT_TRACE(state);
1704
1705
50.5M
    if (ptr > end)
1706
0
        return 0;
1707
1708
50.5M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
50.5M
        flags = pattern[2];
1713
1714
50.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
92.5k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
92.5k
                   end - ptr, (size_t) pattern[3]));
1717
92.5k
            return 0;
1718
92.5k
        }
1719
50.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.87M
            end -= pattern[3] - 1;
1723
3.87M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.87M
        }
1726
1727
50.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.87M
            prefix_len = pattern[5];
1731
3.87M
            prefix_skip = pattern[6];
1732
3.87M
            prefix = pattern + 7;
1733
3.87M
            overlap = prefix + prefix_len - 1;
1734
46.5M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
44.0M
            charset = pattern + 5;
1738
1739
50.4M
        pattern += 1 + pattern[1];
1740
50.4M
    }
1741
1742
50.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
50.4M
           prefix, prefix_len, prefix_skip));
1744
50.4M
    TRACE(("charset = %p\n", charset));
1745
1746
50.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.20M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
3.20M
#if SIZEOF_SRE_CHAR < 4
1750
3.20M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
3.20M
#endif
1753
3.20M
        end = (SRE_CHAR *)state->end;
1754
3.20M
        state->must_advance = 0;
1755
3.33M
        while (ptr < end) {
1756
59.5M
            while (*ptr != c) {
1757
56.2M
                if (++ptr >= end)
1758
77.0k
                    return 0;
1759
56.2M
            }
1760
3.25M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.25M
            state->start = ptr;
1762
3.25M
            state->ptr = ptr + prefix_skip;
1763
3.25M
            if (flags & SRE_INFO_LITERAL)
1764
4.53k
                return 1; /* we got all of it */
1765
3.24M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.24M
            if (status != 0)
1767
3.11M
                return status;
1768
130k
            ++ptr;
1769
130k
            RESET_CAPTURE_GROUP();
1770
130k
        }
1771
3.97k
        return 0;
1772
3.20M
    }
1773
1774
47.2M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
672k
        Py_ssize_t i = 0;
1778
1779
672k
        end = (SRE_CHAR *)state->end;
1780
672k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
672k
#if SIZEOF_SRE_CHAR < 4
1783
2.01M
        for (i = 0; i < prefix_len; i++)
1784
1.34M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
672k
#endif
1787
1.01M
        while (ptr < end) {
1788
1.01M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
8.17M
            while (*ptr++ != c) {
1790
7.16M
                if (ptr >= end)
1791
144
                    return 0;
1792
7.16M
            }
1793
1.01M
            if (ptr >= end)
1794
20
                return 0;
1795
1796
1.01M
            i = 1;
1797
1.01M
            state->must_advance = 0;
1798
1.01M
            do {
1799
1.01M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
982k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
982k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
982k
                    state->start = ptr - (prefix_len - 1);
1808
982k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
982k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
982k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
982k
                    if (status != 0)
1813
672k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
309k
                    if (++ptr >= end)
1816
19
                        return 0;
1817
309k
                    RESET_CAPTURE_GROUP();
1818
309k
                }
1819
338k
                i = overlap[i];
1820
338k
            } while (i != 0);
1821
1.01M
        }
1822
0
        return 0;
1823
672k
    }
1824
1825
46.5M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
44.0M
        end = (SRE_CHAR *)state->end;
1828
44.0M
        state->must_advance = 0;
1829
44.4M
        for (;;) {
1830
196M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
151M
                ptr++;
1832
44.4M
            if (ptr >= end)
1833
954k
                return 0;
1834
43.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
43.4M
            state->start = ptr;
1836
43.4M
            state->ptr = ptr;
1837
43.4M
            status = SRE(match)(state, pattern, 0);
1838
43.4M
            if (status != 0)
1839
43.1M
                break;
1840
355k
            ptr++;
1841
355k
            RESET_CAPTURE_GROUP();
1842
355k
        }
1843
44.0M
    } else {
1844
        /* general case */
1845
2.45M
        assert(ptr <= end);
1846
2.45M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.45M
        state->start = state->ptr = ptr;
1848
2.45M
        status = SRE(match)(state, pattern, 1);
1849
2.45M
        state->must_advance = 0;
1850
2.45M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
1.40M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
33
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
1.40M
        {
1854
1.40M
            state->start = state->ptr = ptr = end;
1855
1.40M
            return 0;
1856
1.40M
        }
1857
146M
        while (status == 0 && ptr < end) {
1858
145M
            ptr++;
1859
145M
            RESET_CAPTURE_GROUP();
1860
145M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
145M
            state->start = state->ptr = ptr;
1862
145M
            status = SRE(match)(state, pattern, 0);
1863
145M
        }
1864
1.04M
    }
1865
1866
44.1M
    return status;
1867
46.5M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.13M
{
1694
7.13M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.13M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.13M
    Py_ssize_t status = 0;
1697
7.13M
    Py_ssize_t prefix_len = 0;
1698
7.13M
    Py_ssize_t prefix_skip = 0;
1699
7.13M
    SRE_CODE* prefix = NULL;
1700
7.13M
    SRE_CODE* charset = NULL;
1701
7.13M
    SRE_CODE* overlap = NULL;
1702
7.13M
    int flags = 0;
1703
7.13M
    INIT_TRACE(state);
1704
1705
7.13M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.13M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.13M
        flags = pattern[2];
1713
1714
7.13M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
4.57k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
4.57k
                   end - ptr, (size_t) pattern[3]));
1717
4.57k
            return 0;
1718
4.57k
        }
1719
7.12M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.50M
            end -= pattern[3] - 1;
1723
2.50M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.50M
        }
1726
1727
7.12M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.50M
            prefix_len = pattern[5];
1731
2.50M
            prefix_skip = pattern[6];
1732
2.50M
            prefix = pattern + 7;
1733
2.50M
            overlap = prefix + prefix_len - 1;
1734
4.61M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
4.47M
            charset = pattern + 5;
1738
1739
7.12M
        pattern += 1 + pattern[1];
1740
7.12M
    }
1741
1742
7.12M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.12M
           prefix, prefix_len, prefix_skip));
1744
7.12M
    TRACE(("charset = %p\n", charset));
1745
1746
7.12M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.24M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.24M
        end = (SRE_CHAR *)state->end;
1754
2.24M
        state->must_advance = 0;
1755
2.33M
        while (ptr < end) {
1756
26.6M
            while (*ptr != c) {
1757
24.3M
                if (++ptr >= end)
1758
4.66k
                    return 0;
1759
24.3M
            }
1760
2.32M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.32M
            state->start = ptr;
1762
2.32M
            state->ptr = ptr + prefix_skip;
1763
2.32M
            if (flags & SRE_INFO_LITERAL)
1764
2.42k
                return 1; /* we got all of it */
1765
2.32M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.32M
            if (status != 0)
1767
2.23M
                return status;
1768
84.9k
            ++ptr;
1769
84.9k
            RESET_CAPTURE_GROUP();
1770
84.9k
        }
1771
892
        return 0;
1772
2.24M
    }
1773
1774
4.88M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
263k
        Py_ssize_t i = 0;
1778
1779
263k
        end = (SRE_CHAR *)state->end;
1780
263k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
519k
        while (ptr < end) {
1788
519k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
9.53M
            while (*ptr++ != c) {
1790
9.01M
                if (ptr >= end)
1791
154
                    return 0;
1792
9.01M
            }
1793
519k
            if (ptr >= end)
1794
18
                return 0;
1795
1796
519k
            i = 1;
1797
519k
            state->must_advance = 0;
1798
519k
            do {
1799
519k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
480k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
480k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
480k
                    state->start = ptr - (prefix_len - 1);
1808
480k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
480k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
480k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
480k
                    if (status != 0)
1813
263k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
216k
                    if (++ptr >= end)
1816
21
                        return 0;
1817
216k
                    RESET_CAPTURE_GROUP();
1818
216k
                }
1819
256k
                i = overlap[i];
1820
256k
            } while (i != 0);
1821
519k
        }
1822
0
        return 0;
1823
263k
    }
1824
1825
4.61M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
4.47M
        end = (SRE_CHAR *)state->end;
1828
4.47M
        state->must_advance = 0;
1829
4.95M
        for (;;) {
1830
63.7M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
58.8M
                ptr++;
1832
4.95M
            if (ptr >= end)
1833
47.6k
                return 0;
1834
4.90M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
4.90M
            state->start = ptr;
1836
4.90M
            state->ptr = ptr;
1837
4.90M
            status = SRE(match)(state, pattern, 0);
1838
4.90M
            if (status != 0)
1839
4.42M
                break;
1840
482k
            ptr++;
1841
482k
            RESET_CAPTURE_GROUP();
1842
482k
        }
1843
4.47M
    } else {
1844
        /* general case */
1845
144k
        assert(ptr <= end);
1846
144k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
144k
        state->start = state->ptr = ptr;
1848
144k
        status = SRE(match)(state, pattern, 1);
1849
144k
        state->must_advance = 0;
1850
144k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
13.8k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
31
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
13.8k
        {
1854
13.8k
            state->start = state->ptr = ptr = end;
1855
13.8k
            return 0;
1856
13.8k
        }
1857
34.8M
        while (status == 0 && ptr < end) {
1858
34.7M
            ptr++;
1859
34.7M
            RESET_CAPTURE_GROUP();
1860
34.7M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
34.7M
            state->start = state->ptr = ptr;
1862
34.7M
            status = SRE(match)(state, pattern, 0);
1863
34.7M
        }
1864
130k
    }
1865
1866
4.55M
    return status;
1867
4.61M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/