Coverage Report

Created: 2025-08-26 06:26

/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
14.3M
{
18
    /* check if pointer is at given position */
19
20
14.3M
    Py_ssize_t thisp, thatp;
21
22
14.3M
    switch (at) {
23
24
6.89M
    case SRE_AT_BEGINNING:
25
6.89M
    case SRE_AT_BEGINNING_STRING:
26
6.89M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.46M
    case SRE_AT_END:
33
4.46M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
4.46M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.46M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
3.03M
    case SRE_AT_END_STRING:
42
3.03M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
14.3M
    }
87
88
0
    return 0;
89
14.3M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
13.0M
{
18
    /* check if pointer is at given position */
19
20
13.0M
    Py_ssize_t thisp, thatp;
21
22
13.0M
    switch (at) {
23
24
6.83M
    case SRE_AT_BEGINNING:
25
6.83M
    case SRE_AT_BEGINNING_STRING:
26
6.83M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.05M
    case SRE_AT_END:
33
4.05M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
4.05M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.05M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.11M
    case SRE_AT_END_STRING:
42
2.11M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
13.0M
    }
87
88
0
    return 0;
89
13.0M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
791k
{
18
    /* check if pointer is at given position */
19
20
791k
    Py_ssize_t thisp, thatp;
21
22
791k
    switch (at) {
23
24
46.9k
    case SRE_AT_BEGINNING:
25
46.9k
    case SRE_AT_BEGINNING_STRING:
26
46.9k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
320k
    case SRE_AT_END:
33
320k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
320k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
320k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
424k
    case SRE_AT_END_STRING:
42
424k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
791k
    }
87
88
0
    return 0;
89
791k
}
sre.c:sre_ucs4_at
Line
Count
Source
17
593k
{
18
    /* check if pointer is at given position */
19
20
593k
    Py_ssize_t thisp, thatp;
21
22
593k
    switch (at) {
23
24
8.47k
    case SRE_AT_BEGINNING:
25
8.47k
    case SRE_AT_BEGINNING_STRING:
26
8.47k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
85.8k
    case SRE_AT_END:
33
85.8k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
85.8k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
85.8k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
499k
    case SRE_AT_END_STRING:
42
499k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
593k
    }
87
88
0
    return 0;
89
593k
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.73G
{
94
    /* check if character is a member of the given set */
95
96
1.73G
    int ok = 1;
97
98
3.82G
    for (;;) {
99
3.82G
        switch (*set++) {
100
101
1.11G
        case SRE_OP_FAILURE:
102
1.11G
            return !ok;
103
104
1.07G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.07G
            if (ch == set[0])
107
5.28M
                return ok;
108
1.06G
            set++;
109
1.06G
            break;
110
111
11.2M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
11.2M
            if (sre_category(set[0], (int) ch))
114
7.71M
                return ok;
115
3.55M
            set++;
116
3.55M
            break;
117
118
910M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
910M
            if (ch < 256 &&
121
910M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
396M
                return ok;
123
514M
            set += 256/SRE_CODE_BITS;
124
514M
            break;
125
126
336M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
336M
            if (set[0] <= ch && ch <= set[1])
129
202M
                return ok;
130
134M
            set += 2;
131
134M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
376M
        case SRE_OP_NEGATE:
148
376M
            ok = !ok;
149
376M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.82G
        }
175
3.82G
    }
176
1.73G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
341M
{
94
    /* check if character is a member of the given set */
95
96
341M
    int ok = 1;
97
98
690M
    for (;;) {
99
690M
        switch (*set++) {
100
101
179M
        case SRE_OP_FAILURE:
102
179M
            return !ok;
103
104
205M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
205M
            if (ch == set[0])
107
2.78M
                return ok;
108
202M
            set++;
109
202M
            break;
110
111
10.4M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
10.4M
            if (sre_category(set[0], (int) ch))
114
6.90M
                return ok;
115
3.54M
            set++;
116
3.54M
            break;
117
118
90.9M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
90.9M
            if (ch < 256 &&
121
90.9M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
48.6M
                return ok;
123
42.2M
            set += 256/SRE_CODE_BITS;
124
42.2M
            break;
125
126
167M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
167M
            if (set[0] <= ch && ch <= set[1])
129
103M
                return ok;
130
64.1M
            set += 2;
131
64.1M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
37.1M
        case SRE_OP_NEGATE:
148
37.1M
            ok = !ok;
149
37.1M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
690M
        }
175
690M
    }
176
341M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
776M
{
94
    /* check if character is a member of the given set */
95
96
776M
    int ok = 1;
97
98
1.82G
    for (;;) {
99
1.82G
        switch (*set++) {
100
101
551M
        case SRE_OP_FAILURE:
102
551M
            return !ok;
103
104
597M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
597M
            if (ch == set[0])
107
1.50M
                return ok;
108
595M
            set++;
109
595M
            break;
110
111
171k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
171k
            if (sre_category(set[0], (int) ch))
114
165k
                return ok;
115
6.53k
            set++;
116
6.53k
            break;
117
118
357M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
357M
            if (ch < 256 &&
121
357M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
134M
                return ok;
123
223M
            set += 256/SRE_CODE_BITS;
124
223M
            break;
125
126
150M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
150M
            if (set[0] <= ch && ch <= set[1])
129
89.3M
                return ok;
130
61.3M
            set += 2;
131
61.3M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
164M
        case SRE_OP_NEGATE:
148
164M
            ok = !ok;
149
164M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.82G
        }
175
1.82G
    }
176
776M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
613M
{
94
    /* check if character is a member of the given set */
95
96
613M
    int ok = 1;
97
98
1.31G
    for (;;) {
99
1.31G
        switch (*set++) {
100
101
388M
        case SRE_OP_FAILURE:
102
388M
            return !ok;
103
104
271M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
271M
            if (ch == set[0])
107
1.00M
                return ok;
108
270M
            set++;
109
270M
            break;
110
111
651k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
651k
            if (sre_category(set[0], (int) ch))
114
650k
                return ok;
115
880
            set++;
116
880
            break;
117
118
462M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
462M
            if (ch < 256 &&
121
462M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
213M
                return ok;
123
249M
            set += 256/SRE_CODE_BITS;
124
249M
            break;
125
126
18.6M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
18.6M
            if (set[0] <= ch && ch <= set[1])
129
9.95M
                return ok;
130
8.72M
            set += 2;
131
8.72M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
174M
        case SRE_OP_NEGATE:
148
174M
            ok = !ok;
149
174M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.31G
        }
175
1.31G
    }
176
613M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
665M
{
195
665M
    SRE_CODE chr;
196
665M
    SRE_CHAR c;
197
665M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
665M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
665M
    Py_ssize_t i;
200
665M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
665M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
15.5M
        end = ptr + maxcount;
205
206
665M
    switch (pattern[0]) {
207
208
586M
    case SRE_OP_IN:
209
        /* repeated set */
210
586M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
965M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
379M
            ptr++;
213
586M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
72.5M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
72.5M
        chr = pattern[1];
232
72.5M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
72.5M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
69.6M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
69.6M
        else
238
69.6M
#endif
239
77.3M
        while (ptr < end && *ptr == c)
240
4.83M
            ptr++;
241
72.5M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
7.18M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
7.18M
        chr = pattern[1];
270
7.18M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
7.18M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
3.60M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
3.60M
        else
276
3.60M
#endif
277
43.6M
        while (ptr < end && *ptr != c)
278
36.4M
            ptr++;
279
7.18M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
665M
    }
319
320
665M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
665M
           ptr - (SRE_CHAR*) state->ptr));
322
665M
    return ptr - (SRE_CHAR*) state->ptr;
323
665M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
159M
{
195
159M
    SRE_CODE chr;
196
159M
    SRE_CHAR c;
197
159M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
159M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
159M
    Py_ssize_t i;
200
159M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
159M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
4.14M
        end = ptr + maxcount;
205
206
159M
    switch (pattern[0]) {
207
208
95.8M
    case SRE_OP_IN:
209
        /* repeated set */
210
95.8M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
210M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
115M
            ptr++;
213
95.8M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
63.1M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
63.1M
        chr = pattern[1];
232
63.1M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
63.1M
        c = (SRE_CHAR) chr;
234
63.1M
#if SIZEOF_SRE_CHAR < 4
235
63.1M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
63.1M
        else
238
63.1M
#endif
239
65.2M
        while (ptr < end && *ptr == c)
240
2.13M
            ptr++;
241
63.1M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
206k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
206k
        chr = pattern[1];
270
206k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
206k
        c = (SRE_CHAR) chr;
272
206k
#if SIZEOF_SRE_CHAR < 4
273
206k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
206k
        else
276
206k
#endif
277
6.76M
        while (ptr < end && *ptr != c)
278
6.56M
            ptr++;
279
206k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
159M
    }
319
320
159M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
159M
           ptr - (SRE_CHAR*) state->ptr));
322
159M
    return ptr - (SRE_CHAR*) state->ptr;
323
159M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
286M
{
195
286M
    SRE_CODE chr;
196
286M
    SRE_CHAR c;
197
286M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
286M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
286M
    Py_ssize_t i;
200
286M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
286M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
5.43M
        end = ptr + maxcount;
205
206
286M
    switch (pattern[0]) {
207
208
276M
    case SRE_OP_IN:
209
        /* repeated set */
210
276M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
408M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
131M
            ptr++;
213
276M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
6.51M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
6.51M
        chr = pattern[1];
232
6.51M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
6.51M
        c = (SRE_CHAR) chr;
234
6.51M
#if SIZEOF_SRE_CHAR < 4
235
6.51M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
6.51M
        else
238
6.51M
#endif
239
8.97M
        while (ptr < end && *ptr == c)
240
2.45M
            ptr++;
241
6.51M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
3.39M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
3.39M
        chr = pattern[1];
270
3.39M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
3.39M
        c = (SRE_CHAR) chr;
272
3.39M
#if SIZEOF_SRE_CHAR < 4
273
3.39M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
3.39M
        else
276
3.39M
#endif
277
12.2M
        while (ptr < end && *ptr != c)
278
8.83M
            ptr++;
279
3.39M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
286M
    }
319
320
286M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
286M
           ptr - (SRE_CHAR*) state->ptr));
322
286M
    return ptr - (SRE_CHAR*) state->ptr;
323
286M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
219M
{
195
219M
    SRE_CODE chr;
196
219M
    SRE_CHAR c;
197
219M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
219M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
219M
    Py_ssize_t i;
200
219M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
219M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
5.94M
        end = ptr + maxcount;
205
206
219M
    switch (pattern[0]) {
207
208
213M
    case SRE_OP_IN:
209
        /* repeated set */
210
213M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
346M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
133M
            ptr++;
213
213M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
2.87M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
2.87M
        chr = pattern[1];
232
2.87M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
2.87M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
3.12M
        while (ptr < end && *ptr == c)
240
243k
            ptr++;
241
2.87M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
3.58M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
3.58M
        chr = pattern[1];
270
3.58M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
3.58M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
24.6M
        while (ptr < end && *ptr != c)
278
21.0M
            ptr++;
279
3.58M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
219M
    }
319
320
219M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
219M
           ptr - (SRE_CHAR*) state->ptr));
322
219M
    return ptr - (SRE_CHAR*) state->ptr;
323
219M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
776M
    do { \
355
776M
        ctx->lastmark = state->lastmark; \
356
776M
        ctx->lastindex = state->lastindex; \
357
776M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
257M
    do { \
360
257M
        state->lastmark = ctx->lastmark; \
361
257M
        state->lastindex = ctx->lastindex; \
362
257M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
264M
    do { \
366
264M
        TRACE(("push last_ptr: %zd", \
367
264M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
264M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
264M
    } while (0)
370
#define LAST_PTR_POP()  \
371
264M
    do { \
372
264M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
264M
        TRACE(("pop last_ptr: %zd", \
374
264M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
264M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
623M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
965M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.48G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
142M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
95.2M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.58G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.58G
do { \
390
1.58G
    alloc_pos = state->data_stack_base; \
391
1.58G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.58G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.58G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
170M
        int j = data_stack_grow(state, sizeof(type)); \
395
170M
        if (j < 0) return j; \
396
170M
        if (ctx_pos != -1) \
397
170M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
170M
    } \
399
1.58G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.58G
    state->data_stack_base += sizeof(type); \
401
1.58G
} while (0)
402
403
1.74G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.74G
do { \
405
1.74G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.74G
    ptr = (type*)(state->data_stack+pos); \
407
1.74G
} while (0)
408
409
659M
#define DATA_STACK_PUSH(state, data, size) \
410
659M
do { \
411
659M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
659M
           data, state->data_stack_base, size)); \
413
659M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
86.6k
        int j = data_stack_grow(state, size); \
415
86.6k
        if (j < 0) return j; \
416
86.6k
        if (ctx_pos != -1) \
417
86.6k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
86.6k
    } \
419
659M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
659M
    state->data_stack_base += size; \
421
659M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
396M
#define DATA_STACK_POP(state, data, size, discard) \
427
396M
do { \
428
396M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
396M
           data, state->data_stack_base-size, size)); \
430
396M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
396M
    if (discard) \
432
396M
        state->data_stack_base -= size; \
433
396M
} while (0)
434
435
1.85G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.85G
do { \
437
1.85G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.85G
           state->data_stack_base-size, size)); \
439
1.85G
    state->data_stack_base -= size; \
440
1.85G
} while(0)
441
442
#define DATA_PUSH(x) \
443
264M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
264M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.58G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.58G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.74G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
647M
    do if (lastmark >= 0) { \
473
395M
        MARK_TRACE("push", (lastmark)); \
474
395M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
395M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
647M
    } while (0)
477
#define MARK_POP(lastmark) \
478
175M
    do if (lastmark >= 0) { \
479
130M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
130M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
130M
        MARK_TRACE("pop", (lastmark)); \
482
175M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.01M
    do if (lastmark >= 0) { \
485
2.01M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
2.01M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
2.01M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.01M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
471M
    do if (lastmark >= 0) { \
491
265M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
265M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
265M
        MARK_TRACE("pop discard", (lastmark)); \
494
471M
    } while (0)
495
496
511M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
264M
#define JUMP_MAX_UNTIL_2     2
499
142M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
141M
#define JUMP_REPEAT          7
504
17.6M
#define JUMP_REPEAT_ONE_1    8
505
189M
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
181M
#define JUMP_BRANCH          11
508
95.2M
#define JUMP_ASSERT          12
509
45.9M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
1.07G
    ctx->pattern = pattern; \
516
1.07G
    ctx->ptr = ptr; \
517
1.07G
    DATA_ALLOC(SRE(match_context), nextctx); \
518
1.07G
    nextctx->pattern = nextpattern; \
519
1.07G
    nextctx->toplevel = toplevel_; \
520
1.07G
    nextctx->jump = jumpvalue; \
521
1.07G
    nextctx->last_ctx_pos = ctx_pos; \
522
1.07G
    pattern = nextpattern; \
523
1.07G
    ctx_pos = alloc_pos; \
524
1.07G
    ctx = nextctx; \
525
1.07G
    goto entrance; \
526
1.07G
    jumplabel: \
527
1.07G
    pattern = ctx->pattern; \
528
1.07G
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
936M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
141M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.78G
    do {                                                           \
553
2.78G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.78G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.78G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.88G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.78G
        do {                               \
588
2.78G
            MAYBE_CHECK_SIGNALS;           \
589
2.78G
            goto *sre_targets[*pattern++]; \
590
2.78G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
511M
{
601
511M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
511M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
511M
    Py_ssize_t ret = 0;
604
511M
    int jump;
605
511M
    unsigned int sigcount = state->sigcount;
606
607
511M
    SRE(match_context)* ctx;
608
511M
    SRE(match_context)* nextctx;
609
511M
    INIT_TRACE(state);
610
611
511M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
511M
    DATA_ALLOC(SRE(match_context), ctx);
614
511M
    ctx->last_ctx_pos = -1;
615
511M
    ctx->jump = JUMP_NONE;
616
511M
    ctx->toplevel = toplevel;
617
511M
    ctx_pos = alloc_pos;
618
619
511M
#if USE_COMPUTED_GOTOS
620
511M
#include "sre_targets.h"
621
511M
#endif
622
623
1.58G
entrance:
624
625
1.58G
    ;  // Fashion statement.
626
1.58G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.58G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
87.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
6.49M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
6.49M
                   end - ptr, (size_t) pattern[3]));
634
6.49M
            RETURN_FAILURE;
635
6.49M
        }
636
80.7M
        pattern += pattern[1] + 1;
637
80.7M
    }
638
639
1.58G
#if USE_COMPUTED_GOTOS
640
1.58G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.58G
    {
647
648
1.58G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
541M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
541M
                   ptr, pattern[0]));
653
541M
            {
654
541M
                int i = pattern[0];
655
541M
                if (i & 1)
656
81.4M
                    state->lastindex = i/2 + 1;
657
541M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
533M
                    int j = state->lastmark + 1;
663
540M
                    while (j < i)
664
7.48M
                        state->mark[j++] = NULL;
665
533M
                    state->lastmark = i;
666
533M
                }
667
541M
                state->mark[i] = ptr;
668
541M
            }
669
541M
            pattern++;
670
541M
            DISPATCH;
671
672
541M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
164M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
164M
                   ptr, *pattern));
677
164M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
58.6M
                RETURN_FAILURE;
679
105M
            pattern++;
680
105M
            ptr++;
681
105M
            DISPATCH;
682
683
105M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
242M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
242M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
242M
            if (ctx->toplevel &&
698
242M
                ((state->match_all && ptr != state->end) ||
699
69.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
242M
            state->ptr = ptr;
704
242M
            RETURN_SUCCESS;
705
706
14.3M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
14.3M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
14.3M
            if (!SRE(at)(state, ptr, *pattern))
711
4.12M
                RETURN_FAILURE;
712
10.2M
            pattern++;
713
10.2M
            DISPATCH;
714
715
10.2M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
300M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
300M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
300M
            if (ptr >= end ||
749
300M
                !SRE(charset)(state, pattern + 1, *ptr))
750
8.33M
                RETURN_FAILURE;
751
291M
            pattern += pattern[0];
752
291M
            ptr++;
753
291M
            DISPATCH;
754
755
291M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
6.38M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
6.38M
                   pattern, ptr, pattern[0]));
758
6.38M
            if (ptr >= end ||
759
6.38M
                sre_lower_ascii(*ptr) != *pattern)
760
421k
                RETURN_FAILURE;
761
5.96M
            pattern++;
762
5.96M
            ptr++;
763
5.96M
            DISPATCH;
764
765
5.96M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
106M
        TARGET(SRE_OP_JUMP):
845
106M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
106M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
106M
                   ptr, pattern[0]));
850
106M
            pattern += pattern[0];
851
106M
            DISPATCH;
852
853
179M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
179M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
179M
            LASTMARK_SAVE();
858
179M
            if (state->repeat)
859
129M
                MARK_PUSH(ctx->lastmark);
860
437M
            for (; pattern[0]; pattern += pattern[0]) {
861
361M
                if (pattern[1] == SRE_OP_LITERAL &&
862
361M
                    (ptr >= end ||
863
167M
                     (SRE_CODE) *ptr != pattern[2]))
864
88.3M
                    continue;
865
272M
                if (pattern[1] == SRE_OP_IN &&
866
272M
                    (ptr >= end ||
867
122M
                     !SRE(charset)(state, pattern + 3,
868
122M
                                   (SRE_CODE) *ptr)))
869
91.4M
                    continue;
870
181M
                state->ptr = ptr;
871
181M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
181M
                if (ret) {
873
103M
                    if (state->repeat)
874
84.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
103M
                    RETURN_ON_ERROR(ret);
876
103M
                    RETURN_SUCCESS;
877
103M
                }
878
78.2M
                if (state->repeat)
879
29.7k
                    MARK_POP_KEEP(ctx->lastmark);
880
78.2M
                LASTMARK_RESTORE();
881
78.2M
            }
882
76.1M
            if (state->repeat)
883
45.4M
                MARK_POP_DISCARD(ctx->lastmark);
884
76.1M
            RETURN_FAILURE;
885
886
666M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
666M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
666M
                   pattern[1], pattern[2]));
898
899
666M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.13M
                RETURN_FAILURE; /* cannot match */
901
902
665M
            state->ptr = ptr;
903
904
665M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
665M
            RETURN_ON_ERROR(ret);
906
665M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
665M
            ctx->count = ret;
908
665M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
665M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
378M
                RETURN_FAILURE;
917
918
287M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
287M
                ptr == state->end &&
920
287M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
77.3k
            {
922
                /* tail is empty.  we're finished */
923
77.3k
                state->ptr = ptr;
924
77.3k
                RETURN_SUCCESS;
925
77.3k
            }
926
927
287M
            LASTMARK_SAVE();
928
287M
            if (state->repeat)
929
207M
                MARK_PUSH(ctx->lastmark);
930
931
287M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
100M
                ctx->u.chr = pattern[pattern[0]+1];
935
100M
                for (;;) {
936
227M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
227M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
127M
                        ptr--;
939
127M
                        ctx->count--;
940
127M
                    }
941
100M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
82.7M
                        break;
943
17.6M
                    state->ptr = ptr;
944
17.6M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
17.6M
                            pattern+pattern[0]);
946
17.6M
                    if (ret) {
947
17.6M
                        if (state->repeat)
948
15.6M
                            MARK_POP_DISCARD(ctx->lastmark);
949
17.6M
                        RETURN_ON_ERROR(ret);
950
17.6M
                        RETURN_SUCCESS;
951
17.6M
                    }
952
559
                    if (state->repeat)
953
559
                        MARK_POP_KEEP(ctx->lastmark);
954
559
                    LASTMARK_RESTORE();
955
956
559
                    ptr--;
957
559
                    ctx->count--;
958
559
                }
959
82.7M
                if (state->repeat)
960
81.4M
                    MARK_POP_DISCARD(ctx->lastmark);
961
186M
            } else {
962
                /* general case */
963
190M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
189M
                    state->ptr = ptr;
965
189M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
189M
                            pattern+pattern[0]);
967
189M
                    if (ret) {
968
185M
                        if (state->repeat)
969
109M
                            MARK_POP_DISCARD(ctx->lastmark);
970
185M
                        RETURN_ON_ERROR(ret);
971
185M
                        RETURN_SUCCESS;
972
185M
                    }
973
3.54M
                    if (state->repeat)
974
1.98M
                        MARK_POP_KEEP(ctx->lastmark);
975
3.54M
                    LASTMARK_RESTORE();
976
977
3.54M
                    ptr--;
978
3.54M
                    ctx->count--;
979
3.54M
                }
980
1.23M
                if (state->repeat)
981
1.04M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.23M
            }
983
83.9M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
141M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
141M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
141M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
141M
            ctx->u.rep = repeat_pool_malloc(state);
1127
141M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
141M
            ctx->u.rep->count = -1;
1131
141M
            ctx->u.rep->pattern = pattern;
1132
141M
            ctx->u.rep->prev = state->repeat;
1133
141M
            ctx->u.rep->last_ptr = NULL;
1134
141M
            state->repeat = ctx->u.rep;
1135
1136
141M
            state->ptr = ptr;
1137
141M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
141M
            state->repeat = ctx->u.rep->prev;
1139
141M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
141M
            if (ret) {
1142
141M
                RETURN_ON_ERROR(ret);
1143
141M
                RETURN_SUCCESS;
1144
141M
            }
1145
99.7k
            RETURN_FAILURE;
1146
1147
276M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
276M
            ctx->u.rep = state->repeat;
1155
276M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
276M
            state->ptr = ptr;
1159
1160
276M
            ctx->count = ctx->u.rep->count+1;
1161
1162
276M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
276M
                   ptr, ctx->count));
1164
1165
276M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
276M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
276M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
276M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
264M
                ctx->u.rep->count = ctx->count;
1185
264M
                LASTMARK_SAVE();
1186
264M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
264M
                LAST_PTR_PUSH();
1189
264M
                ctx->u.rep->last_ptr = state->ptr;
1190
264M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
264M
                        ctx->u.rep->pattern+3);
1192
264M
                LAST_PTR_POP();
1193
264M
                if (ret) {
1194
134M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
134M
                    RETURN_ON_ERROR(ret);
1196
134M
                    RETURN_SUCCESS;
1197
134M
                }
1198
130M
                MARK_POP(ctx->lastmark);
1199
130M
                LASTMARK_RESTORE();
1200
130M
                ctx->u.rep->count = ctx->count-1;
1201
130M
                state->ptr = ptr;
1202
130M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
142M
            state->repeat = ctx->u.rep->prev;
1207
142M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
142M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
142M
            RETURN_ON_SUCCESS(ret);
1211
1.09M
            state->ptr = ptr;
1212
1.09M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
95.2M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
95.2M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
95.2M
                   ptr, pattern[1]));
1565
95.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
95.2M
            state->ptr = ptr - pattern[1];
1568
95.2M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
95.2M
            RETURN_ON_FAILURE(ret);
1570
90.4M
            pattern += pattern[0];
1571
90.4M
            DISPATCH;
1572
1573
90.4M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
45.9M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
45.9M
                   ptr, pattern[1]));
1578
45.9M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
45.9M
                state->ptr = ptr - pattern[1];
1580
45.9M
                LASTMARK_SAVE();
1581
45.9M
                if (state->repeat)
1582
45.9M
                    MARK_PUSH(ctx->lastmark);
1583
1584
91.8M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
91.8M
                if (ret) {
1586
23.3k
                    if (state->repeat)
1587
23.3k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
23.3k
                    RETURN_ON_ERROR(ret);
1589
23.3k
                    RETURN_FAILURE;
1590
23.3k
                }
1591
45.9M
                if (state->repeat)
1592
45.9M
                    MARK_POP(ctx->lastmark);
1593
45.9M
                LASTMARK_RESTORE();
1594
45.9M
            }
1595
45.9M
            pattern += pattern[0];
1596
45.9M
            DISPATCH;
1597
1598
45.9M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.58G
exit:
1620
1.58G
    ctx_pos = ctx->last_ctx_pos;
1621
1.58G
    jump = ctx->jump;
1622
1.58G
    DATA_POP_DISCARD(ctx);
1623
1.58G
    if (ctx_pos == -1) {
1624
511M
        state->sigcount = sigcount;
1625
511M
        return ret;
1626
511M
    }
1627
1.07G
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
1.07G
    switch (jump) {
1630
264M
        case JUMP_MAX_UNTIL_2:
1631
264M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
264M
            goto jump_max_until_2;
1633
142M
        case JUMP_MAX_UNTIL_3:
1634
142M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
142M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
181M
        case JUMP_BRANCH:
1643
181M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
181M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
141M
        case JUMP_REPEAT:
1658
141M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
141M
            goto jump_repeat;
1660
17.6M
        case JUMP_REPEAT_ONE_1:
1661
17.6M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
17.6M
            goto jump_repeat_one_1;
1663
189M
        case JUMP_REPEAT_ONE_2:
1664
189M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
189M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
95.2M
        case JUMP_ASSERT:
1673
95.2M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
95.2M
            goto jump_assert;
1675
45.9M
        case JUMP_ASSERT_NOT:
1676
45.9M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
45.9M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
1.07G
    }
1683
1684
0
    return ret; /* should never get here */
1685
1.07G
}
sre.c:sre_ucs1_match
Line
Count
Source
600
160M
{
601
160M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
160M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
160M
    Py_ssize_t ret = 0;
604
160M
    int jump;
605
160M
    unsigned int sigcount = state->sigcount;
606
607
160M
    SRE(match_context)* ctx;
608
160M
    SRE(match_context)* nextctx;
609
160M
    INIT_TRACE(state);
610
611
160M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
160M
    DATA_ALLOC(SRE(match_context), ctx);
614
160M
    ctx->last_ctx_pos = -1;
615
160M
    ctx->jump = JUMP_NONE;
616
160M
    ctx->toplevel = toplevel;
617
160M
    ctx_pos = alloc_pos;
618
619
160M
#if USE_COMPUTED_GOTOS
620
160M
#include "sre_targets.h"
621
160M
#endif
622
623
322M
entrance:
624
625
322M
    ;  // Fashion statement.
626
322M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
322M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
30.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
6.49M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
6.49M
                   end - ptr, (size_t) pattern[3]));
634
6.49M
            RETURN_FAILURE;
635
6.49M
        }
636
24.0M
        pattern += pattern[1] + 1;
637
24.0M
    }
638
639
315M
#if USE_COMPUTED_GOTOS
640
315M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
315M
    {
647
648
315M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
135M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
135M
                   ptr, pattern[0]));
653
135M
            {
654
135M
                int i = pattern[0];
655
135M
                if (i & 1)
656
20.4M
                    state->lastindex = i/2 + 1;
657
135M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
132M
                    int j = state->lastmark + 1;
663
135M
                    while (j < i)
664
3.54M
                        state->mark[j++] = NULL;
665
132M
                    state->lastmark = i;
666
132M
                }
667
135M
                state->mark[i] = ptr;
668
135M
            }
669
135M
            pattern++;
670
135M
            DISPATCH;
671
672
135M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
54.7M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
54.7M
                   ptr, *pattern));
677
54.7M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
18.0M
                RETURN_FAILURE;
679
36.7M
            pattern++;
680
36.7M
            ptr++;
681
36.7M
            DISPATCH;
682
683
36.7M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
51.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
51.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
51.9M
            if (ctx->toplevel &&
698
51.9M
                ((state->match_all && ptr != state->end) ||
699
16.9M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
51.9M
            state->ptr = ptr;
704
51.9M
            RETURN_SUCCESS;
705
706
13.0M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
13.0M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
13.0M
            if (!SRE(at)(state, ptr, *pattern))
711
2.79M
                RETURN_FAILURE;
712
10.2M
            pattern++;
713
10.2M
            DISPATCH;
714
715
10.2M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
38.9M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
38.9M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
38.9M
            if (ptr >= end ||
749
38.9M
                !SRE(charset)(state, pattern + 1, *ptr))
750
346k
                RETURN_FAILURE;
751
38.6M
            pattern += pattern[0];
752
38.6M
            ptr++;
753
38.6M
            DISPATCH;
754
755
38.6M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.36M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.36M
                   pattern, ptr, pattern[0]));
758
1.36M
            if (ptr >= end ||
759
1.36M
                sre_lower_ascii(*ptr) != *pattern)
760
244k
                RETURN_FAILURE;
761
1.11M
            pattern++;
762
1.11M
            ptr++;
763
1.11M
            DISPATCH;
764
765
1.11M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
27.9M
        TARGET(SRE_OP_JUMP):
845
27.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
27.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
27.9M
                   ptr, pattern[0]));
850
27.9M
            pattern += pattern[0];
851
27.9M
            DISPATCH;
852
853
54.3M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
54.3M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
54.3M
            LASTMARK_SAVE();
858
54.3M
            if (state->repeat)
859
11.4M
                MARK_PUSH(ctx->lastmark);
860
162M
            for (; pattern[0]; pattern += pattern[0]) {
861
133M
                if (pattern[1] == SRE_OP_LITERAL &&
862
133M
                    (ptr >= end ||
863
57.9M
                     (SRE_CODE) *ptr != pattern[2]))
864
27.4M
                    continue;
865
106M
                if (pattern[1] == SRE_OP_IN &&
866
106M
                    (ptr >= end ||
867
12.5M
                     !SRE(charset)(state, pattern + 3,
868
12.5M
                                   (SRE_CODE) *ptr)))
869
6.69M
                    continue;
870
99.4M
                state->ptr = ptr;
871
99.4M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
99.4M
                if (ret) {
873
25.8M
                    if (state->repeat)
874
11.2M
                        MARK_POP_DISCARD(ctx->lastmark);
875
25.8M
                    RETURN_ON_ERROR(ret);
876
25.8M
                    RETURN_SUCCESS;
877
25.8M
                }
878
73.5M
                if (state->repeat)
879
7.04k
                    MARK_POP_KEEP(ctx->lastmark);
880
73.5M
                LASTMARK_RESTORE();
881
73.5M
            }
882
28.5M
            if (state->repeat)
883
241k
                MARK_POP_DISCARD(ctx->lastmark);
884
28.5M
            RETURN_FAILURE;
885
886
160M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
160M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
160M
                   pattern[1], pattern[2]));
898
899
160M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
931k
                RETURN_FAILURE; /* cannot match */
901
902
159M
            state->ptr = ptr;
903
904
159M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
159M
            RETURN_ON_ERROR(ret);
906
159M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
159M
            ctx->count = ret;
908
159M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
159M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
132M
                RETURN_FAILURE;
917
918
27.0M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
27.0M
                ptr == state->end &&
920
27.0M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
57.0k
            {
922
                /* tail is empty.  we're finished */
923
57.0k
                state->ptr = ptr;
924
57.0k
                RETURN_SUCCESS;
925
57.0k
            }
926
927
26.9M
            LASTMARK_SAVE();
928
26.9M
            if (state->repeat)
929
12.1M
                MARK_PUSH(ctx->lastmark);
930
931
26.9M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.38M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.38M
                for (;;) {
936
17.9M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
17.9M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
12.5M
                        ptr--;
939
12.5M
                        ctx->count--;
940
12.5M
                    }
941
5.38M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.94M
                        break;
943
2.44M
                    state->ptr = ptr;
944
2.44M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.44M
                            pattern+pattern[0]);
946
2.44M
                    if (ret) {
947
2.44M
                        if (state->repeat)
948
467k
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.44M
                        RETURN_ON_ERROR(ret);
950
2.44M
                        RETURN_SUCCESS;
951
2.44M
                    }
952
113
                    if (state->repeat)
953
113
                        MARK_POP_KEEP(ctx->lastmark);
954
113
                    LASTMARK_RESTORE();
955
956
113
                    ptr--;
957
113
                    ctx->count--;
958
113
                }
959
2.94M
                if (state->repeat)
960
1.65M
                    MARK_POP_DISCARD(ctx->lastmark);
961
21.5M
            } else {
962
                /* general case */
963
23.4M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
22.6M
                    state->ptr = ptr;
965
22.6M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
22.6M
                            pattern+pattern[0]);
967
22.6M
                    if (ret) {
968
20.7M
                        if (state->repeat)
969
9.35M
                            MARK_POP_DISCARD(ctx->lastmark);
970
20.7M
                        RETURN_ON_ERROR(ret);
971
20.7M
                        RETURN_SUCCESS;
972
20.7M
                    }
973
1.92M
                    if (state->repeat)
974
1.17M
                        MARK_POP_KEEP(ctx->lastmark);
975
1.92M
                    LASTMARK_RESTORE();
976
977
1.92M
                    ptr--;
978
1.92M
                    ctx->count--;
979
1.92M
                }
980
827k
                if (state->repeat)
981
634k
                    MARK_POP_DISCARD(ctx->lastmark);
982
827k
            }
983
3.76M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
5.66M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
5.66M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
5.66M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
5.66M
            ctx->u.rep = repeat_pool_malloc(state);
1127
5.66M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
5.66M
            ctx->u.rep->count = -1;
1131
5.66M
            ctx->u.rep->pattern = pattern;
1132
5.66M
            ctx->u.rep->prev = state->repeat;
1133
5.66M
            ctx->u.rep->last_ptr = NULL;
1134
5.66M
            state->repeat = ctx->u.rep;
1135
1136
5.66M
            state->ptr = ptr;
1137
5.66M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
5.66M
            state->repeat = ctx->u.rep->prev;
1139
5.66M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
5.66M
            if (ret) {
1142
5.57M
                RETURN_ON_ERROR(ret);
1143
5.57M
                RETURN_SUCCESS;
1144
5.57M
            }
1145
98.0k
            RETURN_FAILURE;
1146
1147
22.0M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
22.0M
            ctx->u.rep = state->repeat;
1155
22.0M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
22.0M
            state->ptr = ptr;
1159
1160
22.0M
            ctx->count = ctx->u.rep->count+1;
1161
1162
22.0M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
22.0M
                   ptr, ctx->count));
1164
1165
22.0M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
22.0M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
22.0M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
22.0M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
18.3M
                ctx->u.rep->count = ctx->count;
1185
18.3M
                LASTMARK_SAVE();
1186
18.3M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
18.3M
                LAST_PTR_PUSH();
1189
18.3M
                ctx->u.rep->last_ptr = state->ptr;
1190
18.3M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
18.3M
                        ctx->u.rep->pattern+3);
1192
18.3M
                LAST_PTR_POP();
1193
18.3M
                if (ret) {
1194
15.7M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
15.7M
                    RETURN_ON_ERROR(ret);
1196
15.7M
                    RETURN_SUCCESS;
1197
15.7M
                }
1198
2.55M
                MARK_POP(ctx->lastmark);
1199
2.55M
                LASTMARK_RESTORE();
1200
2.55M
                ctx->u.rep->count = ctx->count-1;
1201
2.55M
                state->ptr = ptr;
1202
2.55M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
6.26M
            state->repeat = ctx->u.rep->prev;
1207
6.26M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
6.26M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
6.26M
            RETURN_ON_SUCCESS(ret);
1211
692k
            state->ptr = ptr;
1212
692k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
1.97M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
1.97M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
1.97M
                   ptr, pattern[1]));
1565
1.97M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
1.97M
            state->ptr = ptr - pattern[1];
1568
1.97M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
1.97M
            RETURN_ON_FAILURE(ret);
1570
1.92M
            pattern += pattern[0];
1571
1.92M
            DISPATCH;
1572
1573
5.28M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.28M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.28M
                   ptr, pattern[1]));
1578
5.28M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.28M
                state->ptr = ptr - pattern[1];
1580
5.28M
                LASTMARK_SAVE();
1581
5.28M
                if (state->repeat)
1582
5.28M
                    MARK_PUSH(ctx->lastmark);
1583
1584
10.5M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
10.5M
                if (ret) {
1586
1.10k
                    if (state->repeat)
1587
1.10k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.10k
                    RETURN_ON_ERROR(ret);
1589
1.10k
                    RETURN_FAILURE;
1590
1.10k
                }
1591
5.28M
                if (state->repeat)
1592
5.28M
                    MARK_POP(ctx->lastmark);
1593
5.28M
                LASTMARK_RESTORE();
1594
5.28M
            }
1595
5.28M
            pattern += pattern[0];
1596
5.28M
            DISPATCH;
1597
1598
5.28M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
322M
exit:
1620
322M
    ctx_pos = ctx->last_ctx_pos;
1621
322M
    jump = ctx->jump;
1622
322M
    DATA_POP_DISCARD(ctx);
1623
322M
    if (ctx_pos == -1) {
1624
160M
        state->sigcount = sigcount;
1625
160M
        return ret;
1626
160M
    }
1627
162M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
162M
    switch (jump) {
1630
18.3M
        case JUMP_MAX_UNTIL_2:
1631
18.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
18.3M
            goto jump_max_until_2;
1633
6.26M
        case JUMP_MAX_UNTIL_3:
1634
6.26M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
6.26M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
99.4M
        case JUMP_BRANCH:
1643
99.4M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
99.4M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
5.66M
        case JUMP_REPEAT:
1658
5.66M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
5.66M
            goto jump_repeat;
1660
2.44M
        case JUMP_REPEAT_ONE_1:
1661
2.44M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.44M
            goto jump_repeat_one_1;
1663
22.6M
        case JUMP_REPEAT_ONE_2:
1664
22.6M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
22.6M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
1.97M
        case JUMP_ASSERT:
1673
1.97M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
1.97M
            goto jump_assert;
1675
5.28M
        case JUMP_ASSERT_NOT:
1676
5.28M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.28M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
162M
    }
1683
1684
0
    return ret; /* should never get here */
1685
162M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
247M
{
601
247M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
247M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
247M
    Py_ssize_t ret = 0;
604
247M
    int jump;
605
247M
    unsigned int sigcount = state->sigcount;
606
607
247M
    SRE(match_context)* ctx;
608
247M
    SRE(match_context)* nextctx;
609
247M
    INIT_TRACE(state);
610
611
247M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
247M
    DATA_ALLOC(SRE(match_context), ctx);
614
247M
    ctx->last_ctx_pos = -1;
615
247M
    ctx->jump = JUMP_NONE;
616
247M
    ctx->toplevel = toplevel;
617
247M
    ctx_pos = alloc_pos;
618
619
247M
#if USE_COMPUTED_GOTOS
620
247M
#include "sre_targets.h"
621
247M
#endif
622
623
637M
entrance:
624
625
637M
    ;  // Fashion statement.
626
637M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
637M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
29.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
639
            TRACE(("reject (got %tu chars, need %zu)\n",
633
639
                   end - ptr, (size_t) pattern[3]));
634
639
            RETURN_FAILURE;
635
639
        }
636
29.1M
        pattern += pattern[1] + 1;
637
29.1M
    }
638
639
637M
#if USE_COMPUTED_GOTOS
640
637M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
637M
    {
647
648
637M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
243M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
243M
                   ptr, pattern[0]));
653
243M
            {
654
243M
                int i = pattern[0];
655
243M
                if (i & 1)
656
26.1M
                    state->lastindex = i/2 + 1;
657
243M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
241M
                    int j = state->lastmark + 1;
663
243M
                    while (j < i)
664
1.88M
                        state->mark[j++] = NULL;
665
241M
                    state->lastmark = i;
666
241M
                }
667
243M
                state->mark[i] = ptr;
668
243M
            }
669
243M
            pattern++;
670
243M
            DISPATCH;
671
672
243M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
56.4M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
56.4M
                   ptr, *pattern));
677
56.4M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
20.2M
                RETURN_FAILURE;
679
36.2M
            pattern++;
680
36.2M
            ptr++;
681
36.2M
            DISPATCH;
682
683
36.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
102M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
102M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
102M
            if (ctx->toplevel &&
698
102M
                ((state->match_all && ptr != state->end) ||
699
25.5M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
102M
            state->ptr = ptr;
704
102M
            RETURN_SUCCESS;
705
706
791k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
791k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
791k
            if (!SRE(at)(state, ptr, *pattern))
711
743k
                RETURN_FAILURE;
712
47.9k
            pattern++;
713
47.9k
            DISPATCH;
714
715
47.9k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
130M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
130M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
130M
            if (ptr >= end ||
749
130M
                !SRE(charset)(state, pattern + 1, *ptr))
750
6.52M
                RETURN_FAILURE;
751
124M
            pattern += pattern[0];
752
124M
            ptr++;
753
124M
            DISPATCH;
754
755
124M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.09M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.09M
                   pattern, ptr, pattern[0]));
758
4.09M
            if (ptr >= end ||
759
4.09M
                sre_lower_ascii(*ptr) != *pattern)
760
158k
                RETURN_FAILURE;
761
3.93M
            pattern++;
762
3.93M
            ptr++;
763
3.93M
            DISPATCH;
764
765
3.93M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
34.5M
        TARGET(SRE_OP_JUMP):
845
34.5M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
34.5M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
34.5M
                   ptr, pattern[0]));
850
34.5M
            pattern += pattern[0];
851
34.5M
            DISPATCH;
852
853
54.5M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
54.5M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
54.5M
            LASTMARK_SAVE();
858
54.5M
            if (state->repeat)
859
51.4M
                MARK_PUSH(ctx->lastmark);
860
121M
            for (; pattern[0]; pattern += pattern[0]) {
861
100M
                if (pattern[1] == SRE_OP_LITERAL &&
862
100M
                    (ptr >= end ||
863
48.6M
                     (SRE_CODE) *ptr != pattern[2]))
864
24.4M
                    continue;
865
76.2M
                if (pattern[1] == SRE_OP_IN &&
866
76.2M
                    (ptr >= end ||
867
47.4M
                     !SRE(charset)(state, pattern + 3,
868
47.4M
                                   (SRE_CODE) *ptr)))
869
38.6M
                    continue;
870
37.5M
                state->ptr = ptr;
871
37.5M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
37.5M
                if (ret) {
873
34.1M
                    if (state->repeat)
874
32.5M
                        MARK_POP_DISCARD(ctx->lastmark);
875
34.1M
                    RETURN_ON_ERROR(ret);
876
34.1M
                    RETURN_SUCCESS;
877
34.1M
                }
878
3.46M
                if (state->repeat)
879
7.63k
                    MARK_POP_KEEP(ctx->lastmark);
880
3.46M
                LASTMARK_RESTORE();
881
3.46M
            }
882
20.3M
            if (state->repeat)
883
18.8M
                MARK_POP_DISCARD(ctx->lastmark);
884
20.3M
            RETURN_FAILURE;
885
886
286M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
286M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
286M
                   pattern[1], pattern[2]));
898
899
286M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
187k
                RETURN_FAILURE; /* cannot match */
901
902
286M
            state->ptr = ptr;
903
904
286M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
286M
            RETURN_ON_ERROR(ret);
906
286M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
286M
            ctx->count = ret;
908
286M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
286M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
176M
                RETURN_FAILURE;
917
918
109M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
109M
                ptr == state->end &&
920
109M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
16.5k
            {
922
                /* tail is empty.  we're finished */
923
16.5k
                state->ptr = ptr;
924
16.5k
                RETURN_SUCCESS;
925
16.5k
            }
926
927
109M
            LASTMARK_SAVE();
928
109M
            if (state->repeat)
929
78.1M
                MARK_PUSH(ctx->lastmark);
930
931
109M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
37.7M
                ctx->u.chr = pattern[pattern[0]+1];
935
37.7M
                for (;;) {
936
74.7M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
74.7M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
36.9M
                        ptr--;
939
36.9M
                        ctx->count--;
940
36.9M
                    }
941
37.7M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
30.3M
                        break;
943
7.43M
                    state->ptr = ptr;
944
7.43M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
7.43M
                            pattern+pattern[0]);
946
7.43M
                    if (ret) {
947
7.43M
                        if (state->repeat)
948
7.41M
                            MARK_POP_DISCARD(ctx->lastmark);
949
7.43M
                        RETURN_ON_ERROR(ret);
950
7.43M
                        RETURN_SUCCESS;
951
7.43M
                    }
952
217
                    if (state->repeat)
953
217
                        MARK_POP_KEEP(ctx->lastmark);
954
217
                    LASTMARK_RESTORE();
955
956
217
                    ptr--;
957
217
                    ctx->count--;
958
217
                }
959
30.3M
                if (state->repeat)
960
30.3M
                    MARK_POP_DISCARD(ctx->lastmark);
961
72.0M
            } else {
962
                /* general case */
963
72.8M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
72.5M
                    state->ptr = ptr;
965
72.5M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
72.5M
                            pattern+pattern[0]);
967
72.5M
                    if (ret) {
968
71.7M
                        if (state->repeat)
969
40.1M
                            MARK_POP_DISCARD(ctx->lastmark);
970
71.7M
                        RETURN_ON_ERROR(ret);
971
71.7M
                        RETURN_SUCCESS;
972
71.7M
                    }
973
804k
                    if (state->repeat)
974
639k
                        MARK_POP_KEEP(ctx->lastmark);
975
804k
                    LASTMARK_RESTORE();
976
977
804k
                    ptr--;
978
804k
                    ctx->count--;
979
804k
                }
980
321k
                if (state->repeat)
981
319k
                    MARK_POP_DISCARD(ctx->lastmark);
982
321k
            }
983
30.6M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
55.4M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
55.4M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
55.4M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
55.4M
            ctx->u.rep = repeat_pool_malloc(state);
1127
55.4M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
55.4M
            ctx->u.rep->count = -1;
1131
55.4M
            ctx->u.rep->pattern = pattern;
1132
55.4M
            ctx->u.rep->prev = state->repeat;
1133
55.4M
            ctx->u.rep->last_ptr = NULL;
1134
55.4M
            state->repeat = ctx->u.rep;
1135
1136
55.4M
            state->ptr = ptr;
1137
55.4M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
55.4M
            state->repeat = ctx->u.rep->prev;
1139
55.4M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
55.4M
            if (ret) {
1142
55.4M
                RETURN_ON_ERROR(ret);
1143
55.4M
                RETURN_SUCCESS;
1144
55.4M
            }
1145
936
            RETURN_FAILURE;
1146
1147
106M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
106M
            ctx->u.rep = state->repeat;
1155
106M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
106M
            state->ptr = ptr;
1159
1160
106M
            ctx->count = ctx->u.rep->count+1;
1161
1162
106M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
106M
                   ptr, ctx->count));
1164
1165
106M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
106M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
106M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
106M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
101M
                ctx->u.rep->count = ctx->count;
1185
101M
                LASTMARK_SAVE();
1186
101M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
101M
                LAST_PTR_PUSH();
1189
101M
                ctx->u.rep->last_ptr = state->ptr;
1190
101M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
101M
                        ctx->u.rep->pattern+3);
1192
101M
                LAST_PTR_POP();
1193
101M
                if (ret) {
1194
50.3M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
50.3M
                    RETURN_ON_ERROR(ret);
1196
50.3M
                    RETURN_SUCCESS;
1197
50.3M
                }
1198
51.4M
                MARK_POP(ctx->lastmark);
1199
51.4M
                LASTMARK_RESTORE();
1200
51.4M
                ctx->u.rep->count = ctx->count-1;
1201
51.4M
                state->ptr = ptr;
1202
51.4M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
55.7M
            state->repeat = ctx->u.rep->prev;
1207
55.7M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
55.7M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
55.7M
            RETURN_ON_SUCCESS(ret);
1211
320k
            state->ptr = ptr;
1212
320k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
39.0M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
39.0M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
39.0M
                   ptr, pattern[1]));
1565
39.0M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
39.0M
            state->ptr = ptr - pattern[1];
1568
39.0M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
39.0M
            RETURN_ON_FAILURE(ret);
1570
34.8M
            pattern += pattern[0];
1571
34.8M
            DISPATCH;
1572
1573
34.8M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
20.3M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
20.3M
                   ptr, pattern[1]));
1578
20.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
20.3M
                state->ptr = ptr - pattern[1];
1580
20.3M
                LASTMARK_SAVE();
1581
20.3M
                if (state->repeat)
1582
20.3M
                    MARK_PUSH(ctx->lastmark);
1583
1584
40.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
40.7M
                if (ret) {
1586
7.39k
                    if (state->repeat)
1587
7.39k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
7.39k
                    RETURN_ON_ERROR(ret);
1589
7.39k
                    RETURN_FAILURE;
1590
7.39k
                }
1591
20.3M
                if (state->repeat)
1592
20.3M
                    MARK_POP(ctx->lastmark);
1593
20.3M
                LASTMARK_RESTORE();
1594
20.3M
            }
1595
20.3M
            pattern += pattern[0];
1596
20.3M
            DISPATCH;
1597
1598
20.3M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
637M
exit:
1620
637M
    ctx_pos = ctx->last_ctx_pos;
1621
637M
    jump = ctx->jump;
1622
637M
    DATA_POP_DISCARD(ctx);
1623
637M
    if (ctx_pos == -1) {
1624
247M
        state->sigcount = sigcount;
1625
247M
        return ret;
1626
247M
    }
1627
389M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
389M
    switch (jump) {
1630
101M
        case JUMP_MAX_UNTIL_2:
1631
101M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
101M
            goto jump_max_until_2;
1633
55.7M
        case JUMP_MAX_UNTIL_3:
1634
55.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
55.7M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
37.5M
        case JUMP_BRANCH:
1643
37.5M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
37.5M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
55.4M
        case JUMP_REPEAT:
1658
55.4M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
55.4M
            goto jump_repeat;
1660
7.43M
        case JUMP_REPEAT_ONE_1:
1661
7.43M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
7.43M
            goto jump_repeat_one_1;
1663
72.5M
        case JUMP_REPEAT_ONE_2:
1664
72.5M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
72.5M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
39.0M
        case JUMP_ASSERT:
1673
39.0M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
39.0M
            goto jump_assert;
1675
20.3M
        case JUMP_ASSERT_NOT:
1676
20.3M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
20.3M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
389M
    }
1683
1684
0
    return ret; /* should never get here */
1685
389M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
104M
{
601
104M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
104M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
104M
    Py_ssize_t ret = 0;
604
104M
    int jump;
605
104M
    unsigned int sigcount = state->sigcount;
606
607
104M
    SRE(match_context)* ctx;
608
104M
    SRE(match_context)* nextctx;
609
104M
    INIT_TRACE(state);
610
611
104M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
104M
    DATA_ALLOC(SRE(match_context), ctx);
614
104M
    ctx->last_ctx_pos = -1;
615
104M
    ctx->jump = JUMP_NONE;
616
104M
    ctx->toplevel = toplevel;
617
104M
    ctx_pos = alloc_pos;
618
619
104M
#if USE_COMPUTED_GOTOS
620
104M
#include "sre_targets.h"
621
104M
#endif
622
623
630M
entrance:
624
625
630M
    ;  // Fashion statement.
626
630M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
630M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
27.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
343
            TRACE(("reject (got %tu chars, need %zu)\n",
633
343
                   end - ptr, (size_t) pattern[3]));
634
343
            RETURN_FAILURE;
635
343
        }
636
27.5M
        pattern += pattern[1] + 1;
637
27.5M
    }
638
639
630M
#if USE_COMPUTED_GOTOS
640
630M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
630M
    {
647
648
630M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
162M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
162M
                   ptr, pattern[0]));
653
162M
            {
654
162M
                int i = pattern[0];
655
162M
                if (i & 1)
656
34.7M
                    state->lastindex = i/2 + 1;
657
162M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
160M
                    int j = state->lastmark + 1;
663
162M
                    while (j < i)
664
2.05M
                        state->mark[j++] = NULL;
665
160M
                    state->lastmark = i;
666
160M
                }
667
162M
                state->mark[i] = ptr;
668
162M
            }
669
162M
            pattern++;
670
162M
            DISPATCH;
671
672
162M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
53.0M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
53.0M
                   ptr, *pattern));
677
53.0M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
20.3M
                RETURN_FAILURE;
679
32.6M
            pattern++;
680
32.6M
            ptr++;
681
32.6M
            DISPATCH;
682
683
32.6M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
87.8M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
87.8M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
87.8M
            if (ctx->toplevel &&
698
87.8M
                ((state->match_all && ptr != state->end) ||
699
27.1M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
87.8M
            state->ptr = ptr;
704
87.8M
            RETURN_SUCCESS;
705
706
593k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
593k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
593k
            if (!SRE(at)(state, ptr, *pattern))
711
584k
                RETURN_FAILURE;
712
8.87k
            pattern++;
713
8.87k
            DISPATCH;
714
715
8.87k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
130M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
130M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
130M
            if (ptr >= end ||
749
130M
                !SRE(charset)(state, pattern + 1, *ptr))
750
1.45M
                RETURN_FAILURE;
751
129M
            pattern += pattern[0];
752
129M
            ptr++;
753
129M
            DISPATCH;
754
755
129M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
929k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
929k
                   pattern, ptr, pattern[0]));
758
929k
            if (ptr >= end ||
759
929k
                sre_lower_ascii(*ptr) != *pattern)
760
18.7k
                RETURN_FAILURE;
761
910k
            pattern++;
762
910k
            ptr++;
763
910k
            DISPATCH;
764
765
910k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
43.8M
        TARGET(SRE_OP_JUMP):
845
43.8M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
43.8M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
43.8M
                   ptr, pattern[0]));
850
43.8M
            pattern += pattern[0];
851
43.8M
            DISPATCH;
852
853
70.5M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
70.5M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
70.5M
            LASTMARK_SAVE();
858
70.5M
            if (state->repeat)
859
66.8M
                MARK_PUSH(ctx->lastmark);
860
154M
            for (; pattern[0]; pattern += pattern[0]) {
861
127M
                if (pattern[1] == SRE_OP_LITERAL &&
862
127M
                    (ptr >= end ||
863
60.9M
                     (SRE_CODE) *ptr != pattern[2]))
864
36.4M
                    continue;
865
90.6M
                if (pattern[1] == SRE_OP_IN &&
866
90.6M
                    (ptr >= end ||
867
62.7M
                     !SRE(charset)(state, pattern + 3,
868
62.7M
                                   (SRE_CODE) *ptr)))
869
46.0M
                    continue;
870
44.5M
                state->ptr = ptr;
871
44.5M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
44.5M
                if (ret) {
873
43.3M
                    if (state->repeat)
874
40.5M
                        MARK_POP_DISCARD(ctx->lastmark);
875
43.3M
                    RETURN_ON_ERROR(ret);
876
43.3M
                    RETURN_SUCCESS;
877
43.3M
                }
878
1.16M
                if (state->repeat)
879
15.0k
                    MARK_POP_KEEP(ctx->lastmark);
880
1.16M
                LASTMARK_RESTORE();
881
1.16M
            }
882
27.1M
            if (state->repeat)
883
26.3M
                MARK_POP_DISCARD(ctx->lastmark);
884
27.1M
            RETURN_FAILURE;
885
886
219M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
219M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
219M
                   pattern[1], pattern[2]));
898
899
219M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
13.7k
                RETURN_FAILURE; /* cannot match */
901
902
219M
            state->ptr = ptr;
903
904
219M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
219M
            RETURN_ON_ERROR(ret);
906
219M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
219M
            ctx->count = ret;
908
219M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
219M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
69.4M
                RETURN_FAILURE;
917
918
150M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
150M
                ptr == state->end &&
920
150M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.69k
            {
922
                /* tail is empty.  we're finished */
923
3.69k
                state->ptr = ptr;
924
3.69k
                RETURN_SUCCESS;
925
3.69k
            }
926
927
150M
            LASTMARK_SAVE();
928
150M
            if (state->repeat)
929
117M
                MARK_PUSH(ctx->lastmark);
930
931
150M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
57.2M
                ctx->u.chr = pattern[pattern[0]+1];
935
57.2M
                for (;;) {
936
134M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
134M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
77.5M
                        ptr--;
939
77.5M
                        ctx->count--;
940
77.5M
                    }
941
57.2M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
49.4M
                        break;
943
7.78M
                    state->ptr = ptr;
944
7.78M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
7.78M
                            pattern+pattern[0]);
946
7.78M
                    if (ret) {
947
7.78M
                        if (state->repeat)
948
7.78M
                            MARK_POP_DISCARD(ctx->lastmark);
949
7.78M
                        RETURN_ON_ERROR(ret);
950
7.78M
                        RETURN_SUCCESS;
951
7.78M
                    }
952
229
                    if (state->repeat)
953
229
                        MARK_POP_KEEP(ctx->lastmark);
954
229
                    LASTMARK_RESTORE();
955
956
229
                    ptr--;
957
229
                    ctx->count--;
958
229
                }
959
49.4M
                if (state->repeat)
960
49.4M
                    MARK_POP_DISCARD(ctx->lastmark);
961
93.2M
            } else {
962
                /* general case */
963
94.0M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
93.9M
                    state->ptr = ptr;
965
93.9M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
93.9M
                            pattern+pattern[0]);
967
93.9M
                    if (ret) {
968
93.1M
                        if (state->repeat)
969
59.9M
                            MARK_POP_DISCARD(ctx->lastmark);
970
93.1M
                        RETURN_ON_ERROR(ret);
971
93.1M
                        RETURN_SUCCESS;
972
93.1M
                    }
973
820k
                    if (state->repeat)
974
170k
                        MARK_POP_KEEP(ctx->lastmark);
975
820k
                    LASTMARK_RESTORE();
976
977
820k
                    ptr--;
978
820k
                    ctx->count--;
979
820k
                }
980
86.1k
                if (state->repeat)
981
85.4k
                    MARK_POP_DISCARD(ctx->lastmark);
982
86.1k
            }
983
49.5M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
80.2M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
80.2M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
80.2M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
80.2M
            ctx->u.rep = repeat_pool_malloc(state);
1127
80.2M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
80.2M
            ctx->u.rep->count = -1;
1131
80.2M
            ctx->u.rep->pattern = pattern;
1132
80.2M
            ctx->u.rep->prev = state->repeat;
1133
80.2M
            ctx->u.rep->last_ptr = NULL;
1134
80.2M
            state->repeat = ctx->u.rep;
1135
1136
80.2M
            state->ptr = ptr;
1137
80.2M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
80.2M
            state->repeat = ctx->u.rep->prev;
1139
80.2M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
80.2M
            if (ret) {
1142
80.2M
                RETURN_ON_ERROR(ret);
1143
80.2M
                RETURN_SUCCESS;
1144
80.2M
            }
1145
777
            RETURN_FAILURE;
1146
1147
148M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
148M
            ctx->u.rep = state->repeat;
1155
148M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
148M
            state->ptr = ptr;
1159
1160
148M
            ctx->count = ctx->u.rep->count+1;
1161
1162
148M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
148M
                   ptr, ctx->count));
1164
1165
148M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
148M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
148M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
148M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
144M
                ctx->u.rep->count = ctx->count;
1185
144M
                LASTMARK_SAVE();
1186
144M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
144M
                LAST_PTR_PUSH();
1189
144M
                ctx->u.rep->last_ptr = state->ptr;
1190
144M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
144M
                        ctx->u.rep->pattern+3);
1192
144M
                LAST_PTR_POP();
1193
144M
                if (ret) {
1194
68.0M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
68.0M
                    RETURN_ON_ERROR(ret);
1196
68.0M
                    RETURN_SUCCESS;
1197
68.0M
                }
1198
76.0M
                MARK_POP(ctx->lastmark);
1199
76.0M
                LASTMARK_RESTORE();
1200
76.0M
                ctx->u.rep->count = ctx->count-1;
1201
76.0M
                state->ptr = ptr;
1202
76.0M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
80.3M
            state->repeat = ctx->u.rep->prev;
1207
80.3M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
80.3M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
80.3M
            RETURN_ON_SUCCESS(ret);
1211
85.9k
            state->ptr = ptr;
1212
85.9k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
54.2M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
54.2M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
54.2M
                   ptr, pattern[1]));
1565
54.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
54.2M
            state->ptr = ptr - pattern[1];
1568
54.2M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
54.2M
            RETURN_ON_FAILURE(ret);
1570
53.6M
            pattern += pattern[0];
1571
53.6M
            DISPATCH;
1572
1573
53.6M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
20.2M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
20.2M
                   ptr, pattern[1]));
1578
20.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
20.2M
                state->ptr = ptr - pattern[1];
1580
20.2M
                LASTMARK_SAVE();
1581
20.2M
                if (state->repeat)
1582
20.2M
                    MARK_PUSH(ctx->lastmark);
1583
1584
40.5M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
40.5M
                if (ret) {
1586
14.8k
                    if (state->repeat)
1587
14.8k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
14.8k
                    RETURN_ON_ERROR(ret);
1589
14.8k
                    RETURN_FAILURE;
1590
14.8k
                }
1591
20.2M
                if (state->repeat)
1592
20.2M
                    MARK_POP(ctx->lastmark);
1593
20.2M
                LASTMARK_RESTORE();
1594
20.2M
            }
1595
20.2M
            pattern += pattern[0];
1596
20.2M
            DISPATCH;
1597
1598
20.2M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
630M
exit:
1620
630M
    ctx_pos = ctx->last_ctx_pos;
1621
630M
    jump = ctx->jump;
1622
630M
    DATA_POP_DISCARD(ctx);
1623
630M
    if (ctx_pos == -1) {
1624
104M
        state->sigcount = sigcount;
1625
104M
        return ret;
1626
104M
    }
1627
525M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
525M
    switch (jump) {
1630
144M
        case JUMP_MAX_UNTIL_2:
1631
144M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
144M
            goto jump_max_until_2;
1633
80.3M
        case JUMP_MAX_UNTIL_3:
1634
80.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
80.3M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
44.5M
        case JUMP_BRANCH:
1643
44.5M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
44.5M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
80.2M
        case JUMP_REPEAT:
1658
80.2M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
80.2M
            goto jump_repeat;
1660
7.78M
        case JUMP_REPEAT_ONE_1:
1661
7.78M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
7.78M
            goto jump_repeat_one_1;
1663
93.9M
        case JUMP_REPEAT_ONE_2:
1664
93.9M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
93.9M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
54.2M
        case JUMP_ASSERT:
1673
54.2M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
54.2M
            goto jump_assert;
1675
20.2M
        case JUMP_ASSERT_NOT:
1676
20.2M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
20.2M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
525M
    }
1683
1684
0
    return ret; /* should never get here */
1685
525M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
341M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
89.4M
{
1694
89.4M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
89.4M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
89.4M
    Py_ssize_t status = 0;
1697
89.4M
    Py_ssize_t prefix_len = 0;
1698
89.4M
    Py_ssize_t prefix_skip = 0;
1699
89.4M
    SRE_CODE* prefix = NULL;
1700
89.4M
    SRE_CODE* charset = NULL;
1701
89.4M
    SRE_CODE* overlap = NULL;
1702
89.4M
    int flags = 0;
1703
89.4M
    INIT_TRACE(state);
1704
1705
89.4M
    if (ptr > end)
1706
0
        return 0;
1707
1708
89.4M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
89.4M
        flags = pattern[2];
1713
1714
89.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.76M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.76M
                   end - ptr, (size_t) pattern[3]));
1717
1.76M
            return 0;
1718
1.76M
        }
1719
87.7M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
7.59M
            end -= pattern[3] - 1;
1723
7.59M
            if (end <= ptr)
1724
0
                end = ptr;
1725
7.59M
        }
1726
1727
87.7M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
7.59M
            prefix_len = pattern[5];
1731
7.59M
            prefix_skip = pattern[6];
1732
7.59M
            prefix = pattern + 7;
1733
7.59M
            overlap = prefix + prefix_len - 1;
1734
80.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
75.9M
            charset = pattern + 5;
1738
1739
87.7M
        pattern += 1 + pattern[1];
1740
87.7M
    }
1741
1742
87.7M
    TRACE(("prefix = %p %zd %zd\n",
1743
87.7M
           prefix, prefix_len, prefix_skip));
1744
87.7M
    TRACE(("charset = %p\n", charset));
1745
1746
87.7M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
7.07M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
4.22M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
4.22M
#endif
1753
4.22M
        end = (SRE_CHAR *)state->end;
1754
4.22M
        state->must_advance = 0;
1755
7.78M
        while (ptr < end) {
1756
105M
            while (*ptr != c) {
1757
98.3M
                if (++ptr >= end)
1758
547k
                    return 0;
1759
98.3M
            }
1760
7.22M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
7.22M
            state->start = ptr;
1762
7.22M
            state->ptr = ptr + prefix_skip;
1763
7.22M
            if (flags & SRE_INFO_LITERAL)
1764
4.73k
                return 1; /* we got all of it */
1765
7.21M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
7.21M
            if (status != 0)
1767
6.51M
                return status;
1768
708k
            ++ptr;
1769
708k
            RESET_CAPTURE_GROUP();
1770
708k
        }
1771
11.5k
        return 0;
1772
4.22M
    }
1773
1774
80.6M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
524k
        Py_ssize_t i = 0;
1778
1779
524k
        end = (SRE_CHAR *)state->end;
1780
524k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.55M
        for (i = 0; i < prefix_len; i++)
1784
1.03M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
519k
#endif
1787
1.50M
        while (ptr < end) {
1788
1.50M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
8.99M
            while (*ptr++ != c) {
1790
7.49M
                if (ptr >= end)
1791
291
                    return 0;
1792
7.49M
            }
1793
1.50M
            if (ptr >= end)
1794
48
                return 0;
1795
1796
1.50M
            i = 1;
1797
1.50M
            state->must_advance = 0;
1798
1.50M
            do {
1799
1.50M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.31M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.31M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.31M
                    state->start = ptr - (prefix_len - 1);
1808
1.31M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.31M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.31M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.31M
                    if (status != 0)
1813
524k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
788k
                    if (++ptr >= end)
1816
24
                        return 0;
1817
788k
                    RESET_CAPTURE_GROUP();
1818
788k
                }
1819
978k
                i = overlap[i];
1820
978k
            } while (i != 0);
1821
1.50M
        }
1822
0
        return 0;
1823
524k
    }
1824
1825
80.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
75.9M
        end = (SRE_CHAR *)state->end;
1828
75.9M
        state->must_advance = 0;
1829
78.9M
        for (;;) {
1830
350M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
271M
                ptr++;
1832
78.9M
            if (ptr >= end)
1833
4.12M
                return 0;
1834
74.8M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
74.8M
            state->start = ptr;
1836
74.8M
            state->ptr = ptr;
1837
74.8M
            status = SRE(match)(state, pattern, 0);
1838
74.8M
            if (status != 0)
1839
71.8M
                break;
1840
3.02M
            ptr++;
1841
3.02M
            RESET_CAPTURE_GROUP();
1842
3.02M
        }
1843
75.9M
    } else {
1844
        /* general case */
1845
4.18M
        assert(ptr <= end);
1846
4.18M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
4.18M
        state->start = state->ptr = ptr;
1848
4.18M
        status = SRE(match)(state, pattern, 1);
1849
4.18M
        state->must_advance = 0;
1850
4.18M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.18M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
341M
        while (status == 0 && ptr < end) {
1858
337M
            ptr++;
1859
337M
            RESET_CAPTURE_GROUP();
1860
337M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
337M
            state->start = state->ptr = ptr;
1862
337M
            status = SRE(match)(state, pattern, 0);
1863
337M
        }
1864
4.18M
    }
1865
1866
76.0M
    return status;
1867
80.1M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
38.8M
{
1694
38.8M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
38.8M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
38.8M
    Py_ssize_t status = 0;
1697
38.8M
    Py_ssize_t prefix_len = 0;
1698
38.8M
    Py_ssize_t prefix_skip = 0;
1699
38.8M
    SRE_CODE* prefix = NULL;
1700
38.8M
    SRE_CODE* charset = NULL;
1701
38.8M
    SRE_CODE* overlap = NULL;
1702
38.8M
    int flags = 0;
1703
38.8M
    INIT_TRACE(state);
1704
1705
38.8M
    if (ptr > end)
1706
0
        return 0;
1707
1708
38.8M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
38.8M
        flags = pattern[2];
1713
1714
38.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.62M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.62M
                   end - ptr, (size_t) pattern[3]));
1717
1.62M
            return 0;
1718
1.62M
        }
1719
37.1M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.68M
            end -= pattern[3] - 1;
1723
2.68M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.68M
        }
1726
1727
37.1M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.68M
            prefix_len = pattern[5];
1731
2.68M
            prefix_skip = pattern[6];
1732
2.68M
            prefix = pattern + 7;
1733
2.68M
            overlap = prefix + prefix_len - 1;
1734
34.5M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
31.5M
            charset = pattern + 5;
1738
1739
37.1M
        pattern += 1 + pattern[1];
1740
37.1M
    }
1741
1742
37.1M
    TRACE(("prefix = %p %zd %zd\n",
1743
37.1M
           prefix, prefix_len, prefix_skip));
1744
37.1M
    TRACE(("charset = %p\n", charset));
1745
1746
37.1M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.67M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.67M
#if SIZEOF_SRE_CHAR < 4
1750
2.67M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.67M
#endif
1753
2.67M
        end = (SRE_CHAR *)state->end;
1754
2.67M
        state->must_advance = 0;
1755
2.85M
        while (ptr < end) {
1756
31.1M
            while (*ptr != c) {
1757
28.7M
                if (++ptr >= end)
1758
481k
                    return 0;
1759
28.7M
            }
1760
2.36M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.36M
            state->start = ptr;
1762
2.36M
            state->ptr = ptr + prefix_skip;
1763
2.36M
            if (flags & SRE_INFO_LITERAL)
1764
341
                return 1; /* we got all of it */
1765
2.36M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.36M
            if (status != 0)
1767
2.18M
                return status;
1768
181k
            ++ptr;
1769
181k
            RESET_CAPTURE_GROUP();
1770
181k
        }
1771
8.96k
        return 0;
1772
2.67M
    }
1773
1774
34.5M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
11.9k
        Py_ssize_t i = 0;
1778
1779
11.9k
        end = (SRE_CHAR *)state->end;
1780
11.9k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
11.9k
#if SIZEOF_SRE_CHAR < 4
1783
35.8k
        for (i = 0; i < prefix_len; i++)
1784
23.9k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
11.9k
#endif
1787
382k
        while (ptr < end) {
1788
382k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.67M
            while (*ptr++ != c) {
1790
2.29M
                if (ptr >= end)
1791
58
                    return 0;
1792
2.29M
            }
1793
382k
            if (ptr >= end)
1794
24
                return 0;
1795
1796
381k
            i = 1;
1797
381k
            state->must_advance = 0;
1798
382k
            do {
1799
382k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
281k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
281k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
281k
                    state->start = ptr - (prefix_len - 1);
1808
281k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
281k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
281k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
281k
                    if (status != 0)
1813
11.8k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
269k
                    if (++ptr >= end)
1816
9
                        return 0;
1817
269k
                    RESET_CAPTURE_GROUP();
1818
269k
                }
1819
370k
                i = overlap[i];
1820
370k
            } while (i != 0);
1821
381k
        }
1822
0
        return 0;
1823
11.9k
    }
1824
1825
34.5M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
31.5M
        end = (SRE_CHAR *)state->end;
1828
31.5M
        state->must_advance = 0;
1829
33.6M
        for (;;) {
1830
85.5M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
51.8M
                ptr++;
1832
33.6M
            if (ptr >= end)
1833
2.95M
                return 0;
1834
30.6M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
30.6M
            state->start = ptr;
1836
30.6M
            state->ptr = ptr;
1837
30.6M
            status = SRE(match)(state, pattern, 0);
1838
30.6M
            if (status != 0)
1839
28.5M
                break;
1840
2.10M
            ptr++;
1841
2.10M
            RESET_CAPTURE_GROUP();
1842
2.10M
        }
1843
31.5M
    } else {
1844
        /* general case */
1845
2.98M
        assert(ptr <= end);
1846
2.98M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.98M
        state->start = state->ptr = ptr;
1848
2.98M
        status = SRE(match)(state, pattern, 1);
1849
2.98M
        state->must_advance = 0;
1850
2.98M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
2.98M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
96.3M
        while (status == 0 && ptr < end) {
1858
93.3M
            ptr++;
1859
93.3M
            RESET_CAPTURE_GROUP();
1860
93.3M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
93.3M
            state->start = state->ptr = ptr;
1862
93.3M
            status = SRE(match)(state, pattern, 0);
1863
93.3M
        }
1864
2.98M
    }
1865
1866
31.5M
    return status;
1867
34.5M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
43.5M
{
1694
43.5M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
43.5M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
43.5M
    Py_ssize_t status = 0;
1697
43.5M
    Py_ssize_t prefix_len = 0;
1698
43.5M
    Py_ssize_t prefix_skip = 0;
1699
43.5M
    SRE_CODE* prefix = NULL;
1700
43.5M
    SRE_CODE* charset = NULL;
1701
43.5M
    SRE_CODE* overlap = NULL;
1702
43.5M
    int flags = 0;
1703
43.5M
    INIT_TRACE(state);
1704
1705
43.5M
    if (ptr > end)
1706
0
        return 0;
1707
1708
43.5M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
43.5M
        flags = pattern[2];
1713
1714
43.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
127k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
127k
                   end - ptr, (size_t) pattern[3]));
1717
127k
            return 0;
1718
127k
        }
1719
43.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.05M
            end -= pattern[3] - 1;
1723
2.05M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.05M
        }
1726
1727
43.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.06M
            prefix_len = pattern[5];
1731
2.06M
            prefix_skip = pattern[6];
1732
2.06M
            prefix = pattern + 7;
1733
2.06M
            overlap = prefix + prefix_len - 1;
1734
41.4M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
40.4M
            charset = pattern + 5;
1738
1739
43.4M
        pattern += 1 + pattern[1];
1740
43.4M
    }
1741
1742
43.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
43.4M
           prefix, prefix_len, prefix_skip));
1744
43.4M
    TRACE(("charset = %p\n", charset));
1745
1746
43.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.55M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.55M
#if SIZEOF_SRE_CHAR < 4
1750
1.55M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.55M
#endif
1753
1.55M
        end = (SRE_CHAR *)state->end;
1754
1.55M
        state->must_advance = 0;
1755
1.74M
        while (ptr < end) {
1756
49.6M
            while (*ptr != c) {
1757
47.9M
                if (++ptr >= end)
1758
61.4k
                    return 0;
1759
47.9M
            }
1760
1.68M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.68M
            state->start = ptr;
1762
1.68M
            state->ptr = ptr + prefix_skip;
1763
1.68M
            if (flags & SRE_INFO_LITERAL)
1764
1.37k
                return 1; /* we got all of it */
1765
1.68M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.68M
            if (status != 0)
1767
1.48M
                return status;
1768
194k
            ++ptr;
1769
194k
            RESET_CAPTURE_GROUP();
1770
194k
        }
1771
1.48k
        return 0;
1772
1.55M
    }
1773
1774
41.9M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
507k
        Py_ssize_t i = 0;
1778
1779
507k
        end = (SRE_CHAR *)state->end;
1780
507k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
507k
#if SIZEOF_SRE_CHAR < 4
1783
1.52M
        for (i = 0; i < prefix_len; i++)
1784
1.01M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
507k
#endif
1787
918k
        while (ptr < end) {
1788
918k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
4.00M
            while (*ptr++ != c) {
1790
3.08M
                if (ptr >= end)
1791
116
                    return 0;
1792
3.08M
            }
1793
918k
            if (ptr >= end)
1794
12
                return 0;
1795
1796
918k
            i = 1;
1797
918k
            state->must_advance = 0;
1798
918k
            do {
1799
918k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
838k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
838k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
838k
                    state->start = ptr - (prefix_len - 1);
1808
838k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
838k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
838k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
838k
                    if (status != 0)
1813
507k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
330k
                    if (++ptr >= end)
1816
10
                        return 0;
1817
330k
                    RESET_CAPTURE_GROUP();
1818
330k
                }
1819
410k
                i = overlap[i];
1820
410k
            } while (i != 0);
1821
918k
        }
1822
0
        return 0;
1823
507k
    }
1824
1825
41.4M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
40.4M
        end = (SRE_CHAR *)state->end;
1828
40.4M
        state->must_advance = 0;
1829
40.8M
        for (;;) {
1830
191M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
150M
                ptr++;
1832
40.8M
            if (ptr >= end)
1833
1.11M
                return 0;
1834
39.7M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
39.7M
            state->start = ptr;
1836
39.7M
            state->ptr = ptr;
1837
39.7M
            status = SRE(match)(state, pattern, 0);
1838
39.7M
            if (status != 0)
1839
39.2M
                break;
1840
423k
            ptr++;
1841
423k
            RESET_CAPTURE_GROUP();
1842
423k
        }
1843
40.4M
    } else {
1844
        /* general case */
1845
1.00M
        assert(ptr <= end);
1846
1.00M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
1.00M
        state->start = state->ptr = ptr;
1848
1.00M
        status = SRE(match)(state, pattern, 1);
1849
1.00M
        state->must_advance = 0;
1850
1.00M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
1.00M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
175M
        while (status == 0 && ptr < end) {
1858
174M
            ptr++;
1859
174M
            RESET_CAPTURE_GROUP();
1860
174M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
174M
            state->start = state->ptr = ptr;
1862
174M
            status = SRE(match)(state, pattern, 0);
1863
174M
        }
1864
1.00M
    }
1865
1866
40.2M
    return status;
1867
41.4M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.08M
{
1694
7.08M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.08M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.08M
    Py_ssize_t status = 0;
1697
7.08M
    Py_ssize_t prefix_len = 0;
1698
7.08M
    Py_ssize_t prefix_skip = 0;
1699
7.08M
    SRE_CODE* prefix = NULL;
1700
7.08M
    SRE_CODE* charset = NULL;
1701
7.08M
    SRE_CODE* overlap = NULL;
1702
7.08M
    int flags = 0;
1703
7.08M
    INIT_TRACE(state);
1704
1705
7.08M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.08M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.08M
        flags = pattern[2];
1713
1714
7.08M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.17k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.17k
                   end - ptr, (size_t) pattern[3]));
1717
6.17k
            return 0;
1718
6.17k
        }
1719
7.07M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.84M
            end -= pattern[3] - 1;
1723
2.84M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.84M
        }
1726
1727
7.07M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.85M
            prefix_len = pattern[5];
1731
2.85M
            prefix_skip = pattern[6];
1732
2.85M
            prefix = pattern + 7;
1733
2.85M
            overlap = prefix + prefix_len - 1;
1734
4.22M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
4.03M
            charset = pattern + 5;
1738
1739
7.07M
        pattern += 1 + pattern[1];
1740
7.07M
    }
1741
1742
7.07M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.07M
           prefix, prefix_len, prefix_skip));
1744
7.07M
    TRACE(("charset = %p\n", charset));
1745
1746
7.07M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.84M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.84M
        end = (SRE_CHAR *)state->end;
1754
2.84M
        state->must_advance = 0;
1755
3.17M
        while (ptr < end) {
1756
24.7M
            while (*ptr != c) {
1757
21.6M
                if (++ptr >= end)
1758
3.95k
                    return 0;
1759
21.6M
            }
1760
3.17M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.17M
            state->start = ptr;
1762
3.17M
            state->ptr = ptr + prefix_skip;
1763
3.17M
            if (flags & SRE_INFO_LITERAL)
1764
3.01k
                return 1; /* we got all of it */
1765
3.16M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.16M
            if (status != 0)
1767
2.83M
                return status;
1768
331k
            ++ptr;
1769
331k
            RESET_CAPTURE_GROUP();
1770
331k
        }
1771
1.12k
        return 0;
1772
2.84M
    }
1773
1774
4.22M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
4.57k
        Py_ssize_t i = 0;
1778
1779
4.57k
        end = (SRE_CHAR *)state->end;
1780
4.57k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
200k
        while (ptr < end) {
1788
200k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.31M
            while (*ptr++ != c) {
1790
2.11M
                if (ptr >= end)
1791
117
                    return 0;
1792
2.11M
            }
1793
200k
            if (ptr >= end)
1794
12
                return 0;
1795
1796
200k
            i = 1;
1797
200k
            state->must_advance = 0;
1798
201k
            do {
1799
201k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
193k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
193k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
193k
                    state->start = ptr - (prefix_len - 1);
1808
193k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
193k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
193k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
193k
                    if (status != 0)
1813
4.44k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
189k
                    if (++ptr >= end)
1816
5
                        return 0;
1817
189k
                    RESET_CAPTURE_GROUP();
1818
189k
                }
1819
196k
                i = overlap[i];
1820
196k
            } while (i != 0);
1821
200k
        }
1822
0
        return 0;
1823
4.57k
    }
1824
1825
4.22M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
4.03M
        end = (SRE_CHAR *)state->end;
1828
4.03M
        state->must_advance = 0;
1829
4.52M
        for (;;) {
1830
73.5M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
69.0M
                ptr++;
1832
4.52M
            if (ptr >= end)
1833
56.4k
                return 0;
1834
4.47M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
4.47M
            state->start = ptr;
1836
4.47M
            state->ptr = ptr;
1837
4.47M
            status = SRE(match)(state, pattern, 0);
1838
4.47M
            if (status != 0)
1839
3.97M
                break;
1840
498k
            ptr++;
1841
498k
            RESET_CAPTURE_GROUP();
1842
498k
        }
1843
4.03M
    } else {
1844
        /* general case */
1845
194k
        assert(ptr <= end);
1846
194k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
194k
        state->start = state->ptr = ptr;
1848
194k
        status = SRE(match)(state, pattern, 1);
1849
194k
        state->must_advance = 0;
1850
194k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
194k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
69.0M
        while (status == 0 && ptr < end) {
1858
68.8M
            ptr++;
1859
68.8M
            RESET_CAPTURE_GROUP();
1860
68.8M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
68.8M
            state->start = state->ptr = ptr;
1862
68.8M
            status = SRE(match)(state, pattern, 0);
1863
68.8M
        }
1864
194k
    }
1865
1866
4.16M
    return status;
1867
4.22M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/