Coverage Report

Created: 2025-11-11 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
14.9M
{
18
    /* check if pointer is at given position */
19
20
14.9M
    Py_ssize_t thisp, thatp;
21
22
14.9M
    switch (at) {
23
24
6.69M
    case SRE_AT_BEGINNING:
25
6.69M
    case SRE_AT_BEGINNING_STRING:
26
6.69M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.79M
    case SRE_AT_END:
33
4.79M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
27.9k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.79M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
3.47M
    case SRE_AT_END_STRING:
42
3.47M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
14.9M
    }
87
88
0
    return 0;
89
14.9M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
13.1M
{
18
    /* check if pointer is at given position */
19
20
13.1M
    Py_ssize_t thisp, thatp;
21
22
13.1M
    switch (at) {
23
24
6.65M
    case SRE_AT_BEGINNING:
25
6.65M
    case SRE_AT_BEGINNING_STRING:
26
6.65M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.44M
    case SRE_AT_END:
33
4.44M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
27.3k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.44M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.05M
    case SRE_AT_END_STRING:
42
2.05M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
13.1M
    }
87
88
0
    return 0;
89
13.1M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
993k
{
18
    /* check if pointer is at given position */
19
20
993k
    Py_ssize_t thisp, thatp;
21
22
993k
    switch (at) {
23
24
29.8k
    case SRE_AT_BEGINNING:
25
29.8k
    case SRE_AT_BEGINNING_STRING:
26
29.8k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
261k
    case SRE_AT_END:
33
261k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
89
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
261k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
702k
    case SRE_AT_END_STRING:
42
702k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
993k
    }
87
88
0
    return 0;
89
993k
}
sre.c:sre_ucs4_at
Line
Count
Source
17
803k
{
18
    /* check if pointer is at given position */
19
20
803k
    Py_ssize_t thisp, thatp;
21
22
803k
    switch (at) {
23
24
3.72k
    case SRE_AT_BEGINNING:
25
3.72k
    case SRE_AT_BEGINNING_STRING:
26
3.72k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
81.8k
    case SRE_AT_END:
33
81.8k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
502
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
81.8k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
718k
    case SRE_AT_END_STRING:
42
718k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
803k
    }
87
88
0
    return 0;
89
803k
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.45G
{
94
    /* check if character is a member of the given set */
95
96
1.45G
    int ok = 1;
97
98
3.27G
    for (;;) {
99
3.27G
        switch (*set++) {
100
101
948M
        case SRE_OP_FAILURE:
102
948M
            return !ok;
103
104
1.02G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.02G
            if (ch == set[0])
107
4.80M
                return ok;
108
1.01G
            set++;
109
1.01G
            break;
110
111
13.1M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
13.1M
            if (sre_category(set[0], (int) ch))
114
9.22M
                return ok;
115
3.93M
            set++;
116
3.93M
            break;
117
118
644M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
644M
            if (ch < 256 &&
121
598M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
275M
                return ok;
123
369M
            set += 256/SRE_CODE_BITS;
124
369M
            break;
125
126
346M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
346M
            if (set[0] <= ch && ch <= set[1])
129
213M
                return ok;
130
132M
            set += 2;
131
132M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
300M
        case SRE_OP_NEGATE:
148
300M
            ok = !ok;
149
300M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.27G
        }
175
3.27G
    }
176
1.45G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
335M
{
94
    /* check if character is a member of the given set */
95
96
335M
    int ok = 1;
97
98
673M
    for (;;) {
99
673M
        switch (*set++) {
100
101
176M
        case SRE_OP_FAILURE:
102
176M
            return !ok;
103
104
182M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
182M
            if (ch == set[0])
107
2.64M
                return ok;
108
179M
            set++;
109
179M
            break;
110
111
12.6M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
12.6M
            if (sre_category(set[0], (int) ch))
114
8.67M
                return ok;
115
3.92M
            set++;
116
3.92M
            break;
117
118
94.0M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
94.0M
            if (ch < 256 &&
121
94.0M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
46.4M
                return ok;
123
47.5M
            set += 256/SRE_CODE_BITS;
124
47.5M
            break;
125
126
168M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
168M
            if (set[0] <= ch && ch <= set[1])
129
101M
                return ok;
130
67.0M
            set += 2;
131
67.0M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
39.9M
        case SRE_OP_NEGATE:
148
39.9M
            ok = !ok;
149
39.9M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
673M
        }
175
673M
    }
176
335M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
631M
{
94
    /* check if character is a member of the given set */
95
96
631M
    int ok = 1;
97
98
1.53G
    for (;;) {
99
1.53G
        switch (*set++) {
100
101
461M
        case SRE_OP_FAILURE:
102
461M
            return !ok;
103
104
583M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
583M
            if (ch == set[0])
107
1.35M
                return ok;
108
582M
            set++;
109
582M
            break;
110
111
119k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
119k
            if (sre_category(set[0], (int) ch))
114
111k
                return ok;
115
7.66k
            set++;
116
7.66k
            break;
117
118
210M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
210M
            if (ch < 256 &&
121
192M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
68.5M
                return ok;
123
141M
            set += 256/SRE_CODE_BITS;
124
141M
            break;
125
126
157M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
157M
            if (set[0] <= ch && ch <= set[1])
129
99.9M
                return ok;
130
57.5M
            set += 2;
131
57.5M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
122M
        case SRE_OP_NEGATE:
148
122M
            ok = !ok;
149
122M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.53G
        }
175
1.53G
    }
176
631M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
484M
{
94
    /* check if character is a member of the given set */
95
96
484M
    int ok = 1;
97
98
1.06G
    for (;;) {
99
1.06G
        switch (*set++) {
100
101
310M
        case SRE_OP_FAILURE:
102
310M
            return !ok;
103
104
254M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
254M
            if (ch == set[0])
107
800k
                return ok;
108
253M
            set++;
109
253M
            break;
110
111
437k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
437k
            if (sre_category(set[0], (int) ch))
114
436k
                return ok;
115
819
            set++;
116
819
            break;
117
118
340M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
340M
            if (ch < 256 &&
121
311M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
160M
                return ok;
123
179M
            set += 256/SRE_CODE_BITS;
124
179M
            break;
125
126
20.4M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
20.4M
            if (set[0] <= ch && ch <= set[1])
129
12.1M
                return ok;
130
8.31M
            set += 2;
131
8.31M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
137M
        case SRE_OP_NEGATE:
148
137M
            ok = !ok;
149
137M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.06G
        }
175
1.06G
    }
176
484M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
541M
{
195
541M
    SRE_CODE chr;
196
541M
    SRE_CHAR c;
197
541M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
541M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
541M
    Py_ssize_t i;
200
541M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
541M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
17.8M
        end = ptr + maxcount;
205
206
541M
    switch (pattern[0]) {
207
208
462M
    case SRE_OP_IN:
209
        /* repeated set */
210
462M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
842M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
379M
            ptr++;
213
462M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
73.7M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
73.7M
        chr = pattern[1];
232
73.7M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
73.7M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
69.5M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
69.5M
        else
238
69.5M
#endif
239
79.3M
        while (ptr < end && *ptr == c)
240
5.59M
            ptr++;
241
73.7M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
5.25M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
5.25M
        chr = pattern[1];
270
5.25M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
5.25M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
2.81M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
2.81M
        else
276
2.81M
#endif
277
49.9M
        while (ptr < end && *ptr != c)
278
44.7M
            ptr++;
279
5.25M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
541M
    }
319
320
541M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
541M
           ptr - (SRE_CHAR*) state->ptr));
322
541M
    return ptr - (SRE_CHAR*) state->ptr;
323
541M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
155M
{
195
155M
    SRE_CODE chr;
196
155M
    SRE_CHAR c;
197
155M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
155M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
155M
    Py_ssize_t i;
200
155M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
155M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
3.49M
        end = ptr + maxcount;
205
206
155M
    switch (pattern[0]) {
207
208
93.2M
    case SRE_OP_IN:
209
        /* repeated set */
210
93.2M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
214M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
121M
            ptr++;
213
93.2M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
61.3M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
61.3M
        chr = pattern[1];
232
61.3M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
61.3M
        c = (SRE_CHAR) chr;
234
61.3M
#if SIZEOF_SRE_CHAR < 4
235
61.3M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
61.3M
        else
238
61.3M
#endif
239
63.2M
        while (ptr < end && *ptr == c)
240
1.87M
            ptr++;
241
61.3M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.00M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.00M
        chr = pattern[1];
270
1.00M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.00M
        c = (SRE_CHAR) chr;
272
1.00M
#if SIZEOF_SRE_CHAR < 4
273
1.00M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
1.00M
        else
276
1.00M
#endif
277
12.0M
        while (ptr < end && *ptr != c)
278
11.0M
            ptr++;
279
1.00M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
155M
    }
319
320
155M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
155M
           ptr - (SRE_CHAR*) state->ptr));
322
155M
    return ptr - (SRE_CHAR*) state->ptr;
323
155M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
222M
{
195
222M
    SRE_CODE chr;
196
222M
    SRE_CHAR c;
197
222M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
222M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
222M
    Py_ssize_t i;
200
222M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
222M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
6.36M
        end = ptr + maxcount;
205
206
222M
    switch (pattern[0]) {
207
208
212M
    case SRE_OP_IN:
209
        /* repeated set */
210
212M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
340M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
127M
            ptr++;
213
212M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
8.23M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
8.23M
        chr = pattern[1];
232
8.23M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
8.23M
        c = (SRE_CHAR) chr;
234
8.23M
#if SIZEOF_SRE_CHAR < 4
235
8.23M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
8.23M
        else
238
8.23M
#endif
239
11.4M
        while (ptr < end && *ptr == c)
240
3.20M
            ptr++;
241
8.23M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.81M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.81M
        chr = pattern[1];
270
1.81M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.81M
        c = (SRE_CHAR) chr;
272
1.81M
#if SIZEOF_SRE_CHAR < 4
273
1.81M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
1.81M
        else
276
1.81M
#endif
277
16.7M
        while (ptr < end && *ptr != c)
278
14.8M
            ptr++;
279
1.81M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
222M
    }
319
320
222M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
222M
           ptr - (SRE_CHAR*) state->ptr));
322
222M
    return ptr - (SRE_CHAR*) state->ptr;
323
222M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
163M
{
195
163M
    SRE_CODE chr;
196
163M
    SRE_CHAR c;
197
163M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
163M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
163M
    Py_ssize_t i;
200
163M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
163M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
8.03M
        end = ptr + maxcount;
205
206
163M
    switch (pattern[0]) {
207
208
156M
    case SRE_OP_IN:
209
        /* repeated set */
210
156M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
287M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
131M
            ptr++;
213
156M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
4.21M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
4.21M
        chr = pattern[1];
232
4.21M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
4.21M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
4.73M
        while (ptr < end && *ptr == c)
240
514k
            ptr++;
241
4.21M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.43M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.43M
        chr = pattern[1];
270
2.43M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.43M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
21.2M
        while (ptr < end && *ptr != c)
278
18.7M
            ptr++;
279
2.43M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
163M
    }
319
320
163M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
163M
           ptr - (SRE_CHAR*) state->ptr));
322
163M
    return ptr - (SRE_CHAR*) state->ptr;
323
163M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
491M
    do { \
355
491M
        ctx->lastmark = state->lastmark; \
356
491M
        ctx->lastindex = state->lastindex; \
357
491M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
172M
    do { \
360
172M
        state->lastmark = ctx->lastmark; \
361
172M
        state->lastindex = ctx->lastindex; \
362
172M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
148M
    do { \
366
148M
        TRACE(("push last_ptr: %zd", \
367
148M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
148M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
148M
    } while (0)
370
#define LAST_PTR_POP()  \
371
148M
    do { \
372
148M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
148M
        TRACE(("pop last_ptr: %zd", \
374
148M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
148M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
523M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
638M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.05G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
73.5M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
50.3M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.16G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.16G
do { \
390
1.16G
    alloc_pos = state->data_stack_base; \
391
1.16G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.16G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.16G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
151M
        int j = data_stack_grow(state, sizeof(type)); \
395
151M
        if (j < 0) return j; \
396
151M
        if (ctx_pos != -1) \
397
151M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
151M
    } \
399
1.16G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.16G
    state->data_stack_base += sizeof(type); \
401
1.16G
} while (0)
402
403
1.22G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.22G
do { \
405
1.22G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.22G
    ptr = (type*)(state->data_stack+pos); \
407
1.22G
} while (0)
408
409
383M
#define DATA_STACK_PUSH(state, data, size) \
410
383M
do { \
411
383M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
383M
           data, state->data_stack_base, size)); \
413
383M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
79.7k
        int j = data_stack_grow(state, size); \
415
79.7k
        if (j < 0) return j; \
416
79.7k
        if (ctx_pos != -1) \
417
79.7k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
79.7k
    } \
419
383M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
383M
    state->data_stack_base += size; \
421
383M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
214M
#define DATA_STACK_POP(state, data, size, discard) \
427
214M
do { \
428
214M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
214M
           data, state->data_stack_base-size, size)); \
430
214M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
214M
    if (discard) \
432
214M
        state->data_stack_base -= size; \
433
214M
} while (0)
434
435
1.33G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.33G
do { \
437
1.33G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.33G
           state->data_stack_base-size, size)); \
439
1.33G
    state->data_stack_base -= size; \
440
1.33G
} while(0)
441
442
#define DATA_PUSH(x) \
443
148M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
148M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.16G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.16G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.22G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
375M
    do if (lastmark >= 0) { \
473
234M
        MARK_TRACE("push", (lastmark)); \
474
234M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
234M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
375M
    } while (0)
477
#define MARK_POP(lastmark) \
478
87.5M
    do if (lastmark >= 0) { \
479
63.3M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
63.3M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
63.3M
        MARK_TRACE("pop", (lastmark)); \
482
87.5M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.90M
    do if (lastmark >= 0) { \
485
1.90M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.90M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.90M
        MARK_TRACE("pop keep", (lastmark)); \
488
1.90M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
287M
    do if (lastmark >= 0) { \
491
171M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
171M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
171M
        MARK_TRACE("pop discard", (lastmark)); \
494
287M
    } while (0)
495
496
478M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
148M
#define JUMP_MAX_UNTIL_2     2
499
73.5M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
72.6M
#define JUMP_REPEAT          7
504
15.6M
#define JUMP_REPEAT_ONE_1    8
505
129M
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
166M
#define JUMP_BRANCH          11
508
50.3M
#define JUMP_ASSERT          12
509
26.9M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
683M
    ctx->pattern = pattern; \
516
683M
    ctx->ptr = ptr; \
517
683M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
683M
    nextctx->pattern = nextpattern; \
519
683M
    nextctx->toplevel = toplevel_; \
520
683M
    nextctx->jump = jumpvalue; \
521
683M
    nextctx->last_ctx_pos = ctx_pos; \
522
683M
    pattern = nextpattern; \
523
683M
    ctx_pos = alloc_pos; \
524
683M
    ctx = nextctx; \
525
683M
    goto entrance; \
526
683M
    jumplabel: \
527
683M
    pattern = ctx->pattern; \
528
683M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
606M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
77.3M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.08G
    do {                                                           \
553
2.08G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.08G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.08G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.17G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.08G
        do {                               \
588
2.08G
            MAYBE_CHECK_SIGNALS;           \
589
2.08G
            goto *sre_targets[*pattern++]; \
590
2.08G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
478M
{
601
478M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
478M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
478M
    Py_ssize_t ret = 0;
604
478M
    int jump;
605
478M
    unsigned int sigcount = state->sigcount;
606
607
478M
    SRE(match_context)* ctx;
608
478M
    SRE(match_context)* nextctx;
609
478M
    INIT_TRACE(state);
610
611
478M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
478M
    DATA_ALLOC(SRE(match_context), ctx);
614
478M
    ctx->last_ctx_pos = -1;
615
478M
    ctx->jump = JUMP_NONE;
616
478M
    ctx->toplevel = toplevel;
617
478M
    ctx_pos = alloc_pos;
618
619
478M
#if USE_COMPUTED_GOTOS
620
478M
#include "sre_targets.h"
621
478M
#endif
622
623
1.16G
entrance:
624
625
1.16G
    ;  // Fashion statement.
626
1.16G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.16G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
67.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
2.81M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
2.81M
                   end - ptr, (size_t) pattern[3]));
634
2.81M
            RETURN_FAILURE;
635
2.81M
        }
636
64.8M
        pattern += pattern[1] + 1;
637
64.8M
    }
638
639
1.15G
#if USE_COMPUTED_GOTOS
640
1.15G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.15G
    {
647
648
1.15G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
461M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
461M
                   ptr, pattern[0]));
653
461M
            {
654
461M
                int i = pattern[0];
655
461M
                if (i & 1)
656
59.9M
                    state->lastindex = i/2 + 1;
657
461M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
454M
                    int j = state->lastmark + 1;
663
461M
                    while (j < i)
664
7.64M
                        state->mark[j++] = NULL;
665
454M
                    state->lastmark = i;
666
454M
                }
667
461M
                state->mark[i] = ptr;
668
461M
            }
669
461M
            pattern++;
670
461M
            DISPATCH;
671
672
461M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
132M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
132M
                   ptr, *pattern));
677
132M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
41.4M
                RETURN_FAILURE;
679
91.3M
            pattern++;
680
91.3M
            ptr++;
681
91.3M
            DISPATCH;
682
683
91.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
178M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
178M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
178M
            if (ctx->toplevel &&
698
52.8M
                ((state->match_all && ptr != state->end) ||
699
52.8M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
178M
            state->ptr = ptr;
704
178M
            RETURN_SUCCESS;
705
706
14.9M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
14.9M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
14.9M
            if (!SRE(at)(state, ptr, *pattern))
711
4.52M
                RETURN_FAILURE;
712
10.4M
            pattern++;
713
10.4M
            DISPATCH;
714
715
10.4M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
210M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
210M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
210M
            if (ptr >= end ||
749
210M
                !SRE(charset)(state, pattern + 1, *ptr))
750
13.8M
                RETURN_FAILURE;
751
196M
            pattern += pattern[0];
752
196M
            ptr++;
753
196M
            DISPATCH;
754
755
196M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
7.35M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
7.35M
                   pattern, ptr, pattern[0]));
758
7.35M
            if (ptr >= end ||
759
7.35M
                sre_lower_ascii(*ptr) != *pattern)
760
241k
                RETURN_FAILURE;
761
7.11M
            pattern++;
762
7.11M
            ptr++;
763
7.11M
            DISPATCH;
764
765
7.11M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
88.0M
        TARGET(SRE_OP_JUMP):
845
88.0M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
88.0M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
88.0M
                   ptr, pattern[0]));
850
88.0M
            pattern += pattern[0];
851
88.0M
            DISPATCH;
852
853
137M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
137M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
137M
            LASTMARK_SAVE();
858
137M
            if (state->repeat)
859
84.0M
                MARK_PUSH(ctx->lastmark);
860
332M
            for (; pattern[0]; pattern += pattern[0]) {
861
280M
                if (pattern[1] == SRE_OP_LITERAL &&
862
131M
                    (ptr >= end ||
863
131M
                     (SRE_CODE) *ptr != pattern[2]))
864
67.4M
                    continue;
865
213M
                if (pattern[1] == SRE_OP_IN &&
866
76.5M
                    (ptr >= end ||
867
76.4M
                     !SRE(charset)(state, pattern + 3,
868
76.4M
                                   (SRE_CODE) *ptr)))
869
47.0M
                    continue;
870
166M
                state->ptr = ptr;
871
166M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
166M
                if (ret) {
873
85.0M
                    if (state->repeat)
874
61.5M
                        MARK_POP_DISCARD(ctx->lastmark);
875
85.0M
                    RETURN_ON_ERROR(ret);
876
85.0M
                    RETURN_SUCCESS;
877
85.0M
                }
878
81.0M
                if (state->repeat)
879
23.9k
                    MARK_POP_KEEP(ctx->lastmark);
880
81.0M
                LASTMARK_RESTORE();
881
81.0M
            }
882
51.9M
            if (state->repeat)
883
22.4M
                MARK_POP_DISCARD(ctx->lastmark);
884
51.9M
            RETURN_FAILURE;
885
886
542M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
542M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
542M
                   pattern[1], pattern[2]));
898
899
542M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.36M
                RETURN_FAILURE; /* cannot match */
901
902
541M
            state->ptr = ptr;
903
904
541M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
541M
            RETURN_ON_ERROR(ret);
906
541M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
541M
            ctx->count = ret;
908
541M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
541M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
363M
                RETURN_FAILURE;
917
918
178M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
6.76M
                ptr == state->end &&
920
77.7k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
77.7k
            {
922
                /* tail is empty.  we're finished */
923
77.7k
                state->ptr = ptr;
924
77.7k
                RETURN_SUCCESS;
925
77.7k
            }
926
927
178M
            LASTMARK_SAVE();
928
178M
            if (state->repeat)
929
115M
                MARK_PUSH(ctx->lastmark);
930
931
178M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
50.6M
                ctx->u.chr = pattern[pattern[0]+1];
935
50.6M
                for (;;) {
936
123M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
88.8M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
73.1M
                        ptr--;
939
73.1M
                        ctx->count--;
940
73.1M
                    }
941
50.6M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
35.0M
                        break;
943
15.6M
                    state->ptr = ptr;
944
15.6M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
15.6M
                            pattern+pattern[0]);
946
15.6M
                    if (ret) {
947
15.6M
                        if (state->repeat)
948
14.3M
                            MARK_POP_DISCARD(ctx->lastmark);
949
15.6M
                        RETURN_ON_ERROR(ret);
950
15.6M
                        RETURN_SUCCESS;
951
15.6M
                    }
952
631
                    if (state->repeat)
953
631
                        MARK_POP_KEEP(ctx->lastmark);
954
631
                    LASTMARK_RESTORE();
955
956
631
                    ptr--;
957
631
                    ctx->count--;
958
631
                }
959
35.0M
                if (state->repeat)
960
33.6M
                    MARK_POP_DISCARD(ctx->lastmark);
961
127M
            } else {
962
                /* general case */
963
130M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
129M
                    state->ptr = ptr;
965
129M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
129M
                            pattern+pattern[0]);
967
129M
                    if (ret) {
968
126M
                        if (state->repeat)
969
66.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
126M
                        RETURN_ON_ERROR(ret);
971
126M
                        RETURN_SUCCESS;
972
126M
                    }
973
3.51M
                    if (state->repeat)
974
1.87M
                        MARK_POP_KEEP(ctx->lastmark);
975
3.51M
                    LASTMARK_RESTORE();
976
977
3.51M
                    ptr--;
978
3.51M
                    ctx->count--;
979
3.51M
                }
980
1.22M
                if (state->repeat)
981
991k
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.22M
            }
983
36.2M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
72.6M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
72.6M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
72.6M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
72.6M
            ctx->u.rep = repeat_pool_malloc(state);
1127
72.6M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
72.6M
            ctx->u.rep->count = -1;
1131
72.6M
            ctx->u.rep->pattern = pattern;
1132
72.6M
            ctx->u.rep->prev = state->repeat;
1133
72.6M
            ctx->u.rep->last_ptr = NULL;
1134
72.6M
            state->repeat = ctx->u.rep;
1135
1136
72.6M
            state->ptr = ptr;
1137
72.6M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
72.6M
            state->repeat = ctx->u.rep->prev;
1139
72.6M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
72.6M
            if (ret) {
1142
72.5M
                RETURN_ON_ERROR(ret);
1143
72.5M
                RETURN_SUCCESS;
1144
72.5M
            }
1145
111k
            RETURN_FAILURE;
1146
1147
162M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
162M
            ctx->u.rep = state->repeat;
1155
162M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
162M
            state->ptr = ptr;
1159
1160
162M
            ctx->count = ctx->u.rep->count+1;
1161
1162
162M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
162M
                   ptr, ctx->count));
1164
1165
162M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
162M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
13.0M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
148M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
148M
                ctx->u.rep->count = ctx->count;
1185
148M
                LASTMARK_SAVE();
1186
148M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
148M
                LAST_PTR_PUSH();
1189
148M
                ctx->u.rep->last_ptr = state->ptr;
1190
148M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
148M
                        ctx->u.rep->pattern+3);
1192
148M
                LAST_PTR_POP();
1193
148M
                if (ret) {
1194
88.4M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
88.4M
                    RETURN_ON_ERROR(ret);
1196
88.4M
                    RETURN_SUCCESS;
1197
88.4M
                }
1198
60.5M
                MARK_POP(ctx->lastmark);
1199
60.5M
                LASTMARK_RESTORE();
1200
60.5M
                ctx->u.rep->count = ctx->count-1;
1201
60.5M
                state->ptr = ptr;
1202
60.5M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
73.5M
            state->repeat = ctx->u.rep->prev;
1207
73.5M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
73.5M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
73.5M
            RETURN_ON_SUCCESS(ret);
1211
1.05M
            state->ptr = ptr;
1212
1.05M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
50.3M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
50.3M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
50.3M
                   ptr, pattern[1]));
1565
50.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
50.3M
            state->ptr = ptr - pattern[1];
1568
50.3M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
50.3M
            RETURN_ON_FAILURE(ret);
1570
43.5M
            pattern += pattern[0];
1571
43.5M
            DISPATCH;
1572
1573
43.5M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
26.9M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
26.9M
                   ptr, pattern[1]));
1578
26.9M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
26.9M
                state->ptr = ptr - pattern[1];
1580
26.9M
                LASTMARK_SAVE();
1581
26.9M
                if (state->repeat)
1582
26.9M
                    MARK_PUSH(ctx->lastmark);
1583
1584
53.9M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
53.9M
                if (ret) {
1586
17.6k
                    if (state->repeat)
1587
17.6k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
17.6k
                    RETURN_ON_ERROR(ret);
1589
17.6k
                    RETURN_FAILURE;
1590
17.6k
                }
1591
26.9M
                if (state->repeat)
1592
26.9M
                    MARK_POP(ctx->lastmark);
1593
26.9M
                LASTMARK_RESTORE();
1594
26.9M
            }
1595
26.9M
            pattern += pattern[0];
1596
26.9M
            DISPATCH;
1597
1598
26.9M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.16G
exit:
1620
1.16G
    ctx_pos = ctx->last_ctx_pos;
1621
1.16G
    jump = ctx->jump;
1622
1.16G
    DATA_POP_DISCARD(ctx);
1623
1.16G
    if (ctx_pos == -1) {
1624
478M
        state->sigcount = sigcount;
1625
478M
        return ret;
1626
478M
    }
1627
683M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
683M
    switch (jump) {
1630
148M
        case JUMP_MAX_UNTIL_2:
1631
148M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
148M
            goto jump_max_until_2;
1633
73.5M
        case JUMP_MAX_UNTIL_3:
1634
73.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
73.5M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
166M
        case JUMP_BRANCH:
1643
166M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
166M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
72.6M
        case JUMP_REPEAT:
1658
72.6M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
72.6M
            goto jump_repeat;
1660
15.6M
        case JUMP_REPEAT_ONE_1:
1661
15.6M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
15.6M
            goto jump_repeat_one_1;
1663
129M
        case JUMP_REPEAT_ONE_2:
1664
129M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
129M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
50.3M
        case JUMP_ASSERT:
1673
50.3M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
50.3M
            goto jump_assert;
1675
26.9M
        case JUMP_ASSERT_NOT:
1676
26.9M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
26.9M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
683M
    }
1683
1684
0
    return ret; /* should never get here */
1685
683M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
147M
{
601
147M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
147M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
147M
    Py_ssize_t ret = 0;
604
147M
    int jump;
605
147M
    unsigned int sigcount = state->sigcount;
606
607
147M
    SRE(match_context)* ctx;
608
147M
    SRE(match_context)* nextctx;
609
147M
    INIT_TRACE(state);
610
611
147M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
147M
    DATA_ALLOC(SRE(match_context), ctx);
614
147M
    ctx->last_ctx_pos = -1;
615
147M
    ctx->jump = JUMP_NONE;
616
147M
    ctx->toplevel = toplevel;
617
147M
    ctx_pos = alloc_pos;
618
619
147M
#if USE_COMPUTED_GOTOS
620
147M
#include "sre_targets.h"
621
147M
#endif
622
623
329M
entrance:
624
625
329M
    ;  // Fashion statement.
626
329M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
329M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
30.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
2.80M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
2.80M
                   end - ptr, (size_t) pattern[3]));
634
2.80M
            RETURN_FAILURE;
635
2.80M
        }
636
27.7M
        pattern += pattern[1] + 1;
637
27.7M
    }
638
639
326M
#if USE_COMPUTED_GOTOS
640
326M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
326M
    {
647
648
326M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
130M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
130M
                   ptr, pattern[0]));
653
130M
            {
654
130M
                int i = pattern[0];
655
130M
                if (i & 1)
656
21.8M
                    state->lastindex = i/2 + 1;
657
130M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
126M
                    int j = state->lastmark + 1;
663
130M
                    while (j < i)
664
4.06M
                        state->mark[j++] = NULL;
665
126M
                    state->lastmark = i;
666
126M
                }
667
130M
                state->mark[i] = ptr;
668
130M
            }
669
130M
            pattern++;
670
130M
            DISPATCH;
671
672
130M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
69.6M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
69.6M
                   ptr, *pattern));
677
69.6M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
22.5M
                RETURN_FAILURE;
679
47.1M
            pattern++;
680
47.1M
            ptr++;
681
47.1M
            DISPATCH;
682
683
47.1M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
53.2M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
53.2M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
53.2M
            if (ctx->toplevel &&
698
21.3M
                ((state->match_all && ptr != state->end) ||
699
21.3M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
53.2M
            state->ptr = ptr;
704
53.2M
            RETURN_SUCCESS;
705
706
13.1M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
13.1M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
13.1M
            if (!SRE(at)(state, ptr, *pattern))
711
2.76M
                RETURN_FAILURE;
712
10.4M
            pattern++;
713
10.4M
            DISPATCH;
714
715
10.4M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
36.5M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
36.5M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
36.5M
            if (ptr >= end ||
749
36.5M
                !SRE(charset)(state, pattern + 1, *ptr))
750
592k
                RETURN_FAILURE;
751
35.9M
            pattern += pattern[0];
752
35.9M
            ptr++;
753
35.9M
            DISPATCH;
754
755
35.9M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.11M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.11M
                   pattern, ptr, pattern[0]));
758
1.11M
            if (ptr >= end ||
759
1.11M
                sre_lower_ascii(*ptr) != *pattern)
760
146k
                RETURN_FAILURE;
761
963k
            pattern++;
762
963k
            ptr++;
763
963k
            DISPATCH;
764
765
963k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
31.6M
        TARGET(SRE_OP_JUMP):
845
31.6M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
31.6M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
31.6M
                   ptr, pattern[0]));
850
31.6M
            pattern += pattern[0];
851
31.6M
            DISPATCH;
852
853
57.2M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
57.2M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
57.2M
            LASTMARK_SAVE();
858
57.2M
            if (state->repeat)
859
12.3M
                MARK_PUSH(ctx->lastmark);
860
165M
            for (; pattern[0]; pattern += pattern[0]) {
861
138M
                if (pattern[1] == SRE_OP_LITERAL &&
862
65.0M
                    (ptr >= end ||
863
64.9M
                     (SRE_CODE) *ptr != pattern[2]))
864
25.5M
                    continue;
865
112M
                if (pattern[1] == SRE_OP_IN &&
866
12.4M
                    (ptr >= end ||
867
12.4M
                     !SRE(charset)(state, pattern + 3,
868
12.4M
                                   (SRE_CODE) *ptr)))
869
6.93M
                    continue;
870
105M
                state->ptr = ptr;
871
105M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
105M
                if (ret) {
873
29.5M
                    if (state->repeat)
874
11.8M
                        MARK_POP_DISCARD(ctx->lastmark);
875
29.5M
                    RETURN_ON_ERROR(ret);
876
29.5M
                    RETURN_SUCCESS;
877
29.5M
                }
878
76.1M
                if (state->repeat)
879
7.12k
                    MARK_POP_KEEP(ctx->lastmark);
880
76.1M
                LASTMARK_RESTORE();
881
76.1M
            }
882
27.6M
            if (state->repeat)
883
468k
                MARK_POP_DISCARD(ctx->lastmark);
884
27.6M
            RETURN_FAILURE;
885
886
156M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
156M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
156M
                   pattern[1], pattern[2]));
898
899
156M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.15M
                RETURN_FAILURE; /* cannot match */
901
902
155M
            state->ptr = ptr;
903
904
155M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
155M
            RETURN_ON_ERROR(ret);
906
155M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
155M
            ctx->count = ret;
908
155M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
155M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
122M
                RETURN_FAILURE;
917
918
32.7M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
712k
                ptr == state->end &&
920
55.9k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
55.9k
            {
922
                /* tail is empty.  we're finished */
923
55.9k
                state->ptr = ptr;
924
55.9k
                RETURN_SUCCESS;
925
55.9k
            }
926
927
32.6M
            LASTMARK_SAVE();
928
32.6M
            if (state->repeat)
929
17.5M
                MARK_PUSH(ctx->lastmark);
930
931
32.6M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.72M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.72M
                for (;;) {
936
17.8M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
14.6M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
11.0M
                        ptr--;
939
11.0M
                        ctx->count--;
940
11.0M
                    }
941
6.72M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
3.15M
                        break;
943
3.57M
                    state->ptr = ptr;
944
3.57M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.57M
                            pattern+pattern[0]);
946
3.57M
                    if (ret) {
947
3.57M
                        if (state->repeat)
948
2.25M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.57M
                        RETURN_ON_ERROR(ret);
950
3.57M
                        RETURN_SUCCESS;
951
3.57M
                    }
952
139
                    if (state->repeat)
953
139
                        MARK_POP_KEEP(ctx->lastmark);
954
139
                    LASTMARK_RESTORE();
955
956
139
                    ptr--;
957
139
                    ctx->count--;
958
139
                }
959
3.15M
                if (state->repeat)
960
1.77M
                    MARK_POP_DISCARD(ctx->lastmark);
961
25.9M
            } else {
962
                /* general case */
963
28.2M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
27.3M
                    state->ptr = ptr;
965
27.3M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
27.3M
                            pattern+pattern[0]);
967
27.3M
                    if (ret) {
968
25.0M
                        if (state->repeat)
969
12.8M
                            MARK_POP_DISCARD(ctx->lastmark);
970
25.0M
                        RETURN_ON_ERROR(ret);
971
25.0M
                        RETURN_SUCCESS;
972
25.0M
                    }
973
2.28M
                    if (state->repeat)
974
1.19M
                        MARK_POP_KEEP(ctx->lastmark);
975
2.28M
                    LASTMARK_RESTORE();
976
977
2.28M
                    ptr--;
978
2.28M
                    ctx->count--;
979
2.28M
                }
980
875k
                if (state->repeat)
981
649k
                    MARK_POP_DISCARD(ctx->lastmark);
982
875k
            }
983
4.03M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
7.49M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
7.49M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
7.49M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
7.49M
            ctx->u.rep = repeat_pool_malloc(state);
1127
7.49M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
7.49M
            ctx->u.rep->count = -1;
1131
7.49M
            ctx->u.rep->pattern = pattern;
1132
7.49M
            ctx->u.rep->prev = state->repeat;
1133
7.49M
            ctx->u.rep->last_ptr = NULL;
1134
7.49M
            state->repeat = ctx->u.rep;
1135
1136
7.49M
            state->ptr = ptr;
1137
7.49M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
7.49M
            state->repeat = ctx->u.rep->prev;
1139
7.49M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
7.49M
            if (ret) {
1142
7.38M
                RETURN_ON_ERROR(ret);
1143
7.38M
                RETURN_SUCCESS;
1144
7.38M
            }
1145
109k
            RETURN_FAILURE;
1146
1147
25.6M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
25.6M
            ctx->u.rep = state->repeat;
1155
25.6M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
25.6M
            state->ptr = ptr;
1159
1160
25.6M
            ctx->count = ctx->u.rep->count+1;
1161
1162
25.6M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
25.6M
                   ptr, ctx->count));
1164
1165
25.6M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
25.6M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.87M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
20.7M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
20.7M
                ctx->u.rep->count = ctx->count;
1185
20.7M
                LASTMARK_SAVE();
1186
20.7M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
20.7M
                LAST_PTR_PUSH();
1189
20.7M
                ctx->u.rep->last_ptr = state->ptr;
1190
20.7M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
20.7M
                        ctx->u.rep->pattern+3);
1192
20.7M
                LAST_PTR_POP();
1193
20.7M
                if (ret) {
1194
17.5M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
17.5M
                    RETURN_ON_ERROR(ret);
1196
17.5M
                    RETURN_SUCCESS;
1197
17.5M
                }
1198
3.21M
                MARK_POP(ctx->lastmark);
1199
3.21M
                LASTMARK_RESTORE();
1200
3.21M
                ctx->u.rep->count = ctx->count-1;
1201
3.21M
                state->ptr = ptr;
1202
3.21M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
8.09M
            state->repeat = ctx->u.rep->prev;
1207
8.09M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
8.09M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
8.09M
            RETURN_ON_SUCCESS(ret);
1211
714k
            state->ptr = ptr;
1212
714k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.29M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.29M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.29M
                   ptr, pattern[1]));
1565
3.29M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.29M
            state->ptr = ptr - pattern[1];
1568
3.29M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.29M
            RETURN_ON_FAILURE(ret);
1570
3.08M
            pattern += pattern[0];
1571
3.08M
            DISPATCH;
1572
1573
5.41M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.41M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.41M
                   ptr, pattern[1]));
1578
5.41M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.41M
                state->ptr = ptr - pattern[1];
1580
5.41M
                LASTMARK_SAVE();
1581
5.41M
                if (state->repeat)
1582
5.41M
                    MARK_PUSH(ctx->lastmark);
1583
1584
10.8M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
10.8M
                if (ret) {
1586
1.43k
                    if (state->repeat)
1587
1.43k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.43k
                    RETURN_ON_ERROR(ret);
1589
1.43k
                    RETURN_FAILURE;
1590
1.43k
                }
1591
5.41M
                if (state->repeat)
1592
5.41M
                    MARK_POP(ctx->lastmark);
1593
5.41M
                LASTMARK_RESTORE();
1594
5.41M
            }
1595
5.41M
            pattern += pattern[0];
1596
5.41M
            DISPATCH;
1597
1598
5.41M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
329M
exit:
1620
329M
    ctx_pos = ctx->last_ctx_pos;
1621
329M
    jump = ctx->jump;
1622
329M
    DATA_POP_DISCARD(ctx);
1623
329M
    if (ctx_pos == -1) {
1624
147M
        state->sigcount = sigcount;
1625
147M
        return ret;
1626
147M
    }
1627
181M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
181M
    switch (jump) {
1630
20.7M
        case JUMP_MAX_UNTIL_2:
1631
20.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
20.7M
            goto jump_max_until_2;
1633
8.09M
        case JUMP_MAX_UNTIL_3:
1634
8.09M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
8.09M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
105M
        case JUMP_BRANCH:
1643
105M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
105M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
7.49M
        case JUMP_REPEAT:
1658
7.49M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
7.49M
            goto jump_repeat;
1660
3.57M
        case JUMP_REPEAT_ONE_1:
1661
3.57M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.57M
            goto jump_repeat_one_1;
1663
27.3M
        case JUMP_REPEAT_ONE_2:
1664
27.3M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
27.3M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.29M
        case JUMP_ASSERT:
1673
3.29M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.29M
            goto jump_assert;
1675
5.41M
        case JUMP_ASSERT_NOT:
1676
5.41M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.41M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
181M
    }
1683
1684
0
    return ret; /* should never get here */
1685
181M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
240M
{
601
240M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
240M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
240M
    Py_ssize_t ret = 0;
604
240M
    int jump;
605
240M
    unsigned int sigcount = state->sigcount;
606
607
240M
    SRE(match_context)* ctx;
608
240M
    SRE(match_context)* nextctx;
609
240M
    INIT_TRACE(state);
610
611
240M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
240M
    DATA_ALLOC(SRE(match_context), ctx);
614
240M
    ctx->last_ctx_pos = -1;
615
240M
    ctx->jump = JUMP_NONE;
616
240M
    ctx->toplevel = toplevel;
617
240M
    ctx_pos = alloc_pos;
618
619
240M
#if USE_COMPUTED_GOTOS
620
240M
#include "sre_targets.h"
621
240M
#endif
622
623
401M
entrance:
624
625
401M
    ;  // Fashion statement.
626
401M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
401M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
19.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
881
            TRACE(("reject (got %tu chars, need %zu)\n",
633
881
                   end - ptr, (size_t) pattern[3]));
634
881
            RETURN_FAILURE;
635
881
        }
636
19.6M
        pattern += pattern[1] + 1;
637
19.6M
    }
638
639
401M
#if USE_COMPUTED_GOTOS
640
401M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
401M
    {
647
648
401M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
203M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
203M
                   ptr, pattern[0]));
653
203M
            {
654
203M
                int i = pattern[0];
655
203M
                if (i & 1)
656
12.9M
                    state->lastindex = i/2 + 1;
657
203M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
202M
                    int j = state->lastmark + 1;
663
203M
                    while (j < i)
664
1.15M
                        state->mark[j++] = NULL;
665
202M
                    state->lastmark = i;
666
202M
                }
667
203M
                state->mark[i] = ptr;
668
203M
            }
669
203M
            pattern++;
670
203M
            DISPATCH;
671
672
203M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
28.0M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
28.0M
                   ptr, *pattern));
677
28.0M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
7.13M
                RETURN_FAILURE;
679
20.9M
            pattern++;
680
20.9M
            ptr++;
681
20.9M
            DISPATCH;
682
683
20.9M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
69.7M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
69.7M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
69.7M
            if (ctx->toplevel &&
698
14.7M
                ((state->match_all && ptr != state->end) ||
699
14.7M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
69.7M
            state->ptr = ptr;
704
69.7M
            RETURN_SUCCESS;
705
706
993k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
993k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
993k
            if (!SRE(at)(state, ptr, *pattern))
711
962k
                RETURN_FAILURE;
712
30.9k
            pattern++;
713
30.9k
            DISPATCH;
714
715
30.9k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
85.9M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
85.9M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
85.9M
            if (ptr >= end ||
749
85.9M
                !SRE(charset)(state, pattern + 1, *ptr))
750
9.15M
                RETURN_FAILURE;
751
76.7M
            pattern += pattern[0];
752
76.7M
            ptr++;
753
76.7M
            DISPATCH;
754
755
76.7M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.24M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.24M
                   pattern, ptr, pattern[0]));
758
4.24M
            if (ptr >= end ||
759
4.24M
                sre_lower_ascii(*ptr) != *pattern)
760
74.5k
                RETURN_FAILURE;
761
4.16M
            pattern++;
762
4.16M
            ptr++;
763
4.16M
            DISPATCH;
764
765
4.16M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
19.5M
        TARGET(SRE_OP_JUMP):
845
19.5M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
19.5M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
19.5M
                   ptr, pattern[0]));
850
19.5M
            pattern += pattern[0];
851
19.5M
            DISPATCH;
852
853
27.4M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
27.4M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
27.4M
            LASTMARK_SAVE();
858
27.4M
            if (state->repeat)
859
23.7M
                MARK_PUSH(ctx->lastmark);
860
56.8M
            for (; pattern[0]; pattern += pattern[0]) {
861
48.5M
                if (pattern[1] == SRE_OP_LITERAL &&
862
22.2M
                    (ptr >= end ||
863
22.2M
                     (SRE_CODE) *ptr != pattern[2]))
864
11.9M
                    continue;
865
36.5M
                if (pattern[1] == SRE_OP_IN &&
866
20.9M
                    (ptr >= end ||
867
20.9M
                     !SRE(charset)(state, pattern + 3,
868
20.9M
                                   (SRE_CODE) *ptr)))
869
13.3M
                    continue;
870
23.1M
                state->ptr = ptr;
871
23.1M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
23.1M
                if (ret) {
873
19.1M
                    if (state->repeat)
874
17.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
19.1M
                    RETURN_ON_ERROR(ret);
876
19.1M
                    RETURN_SUCCESS;
877
19.1M
                }
878
4.04M
                if (state->repeat)
879
3.35k
                    MARK_POP_KEEP(ctx->lastmark);
880
4.04M
                LASTMARK_RESTORE();
881
4.04M
            }
882
8.28M
            if (state->repeat)
883
6.62M
                MARK_POP_DISCARD(ctx->lastmark);
884
8.28M
            RETURN_FAILURE;
885
886
222M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
222M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
222M
                   pattern[1], pattern[2]));
898
899
222M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
186k
                RETURN_FAILURE; /* cannot match */
901
902
222M
            state->ptr = ptr;
903
904
222M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
222M
            RETURN_ON_ERROR(ret);
906
222M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
222M
            ctx->count = ret;
908
222M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
222M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
175M
                RETURN_FAILURE;
917
918
47.0M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.10M
                ptr == state->end &&
920
18.1k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
18.1k
            {
922
                /* tail is empty.  we're finished */
923
18.1k
                state->ptr = ptr;
924
18.1k
                RETURN_SUCCESS;
925
18.1k
            }
926
927
47.0M
            LASTMARK_SAVE();
928
47.0M
            if (state->repeat)
929
24.5M
                MARK_PUSH(ctx->lastmark);
930
931
47.0M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
10.5M
                ctx->u.chr = pattern[pattern[0]+1];
935
10.5M
                for (;;) {
936
23.6M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
17.7M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
13.1M
                        ptr--;
939
13.1M
                        ctx->count--;
940
13.1M
                    }
941
10.5M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
5.85M
                        break;
943
4.65M
                    state->ptr = ptr;
944
4.65M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.65M
                            pattern+pattern[0]);
946
4.65M
                    if (ret) {
947
4.65M
                        if (state->repeat)
948
4.62M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.65M
                        RETURN_ON_ERROR(ret);
950
4.65M
                        RETURN_SUCCESS;
951
4.65M
                    }
952
201
                    if (state->repeat)
953
201
                        MARK_POP_KEEP(ctx->lastmark);
954
201
                    LASTMARK_RESTORE();
955
956
201
                    ptr--;
957
201
                    ctx->count--;
958
201
                }
959
5.85M
                if (state->repeat)
960
5.84M
                    MARK_POP_DISCARD(ctx->lastmark);
961
36.5M
            } else {
962
                /* general case */
963
37.2M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
36.9M
                    state->ptr = ptr;
965
36.9M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
36.9M
                            pattern+pattern[0]);
967
36.9M
                    if (ret) {
968
36.3M
                        if (state->repeat)
969
13.8M
                            MARK_POP_DISCARD(ctx->lastmark);
970
36.3M
                        RETURN_ON_ERROR(ret);
971
36.3M
                        RETURN_SUCCESS;
972
36.3M
                    }
973
632k
                    if (state->repeat)
974
520k
                        MARK_POP_KEEP(ctx->lastmark);
975
632k
                    LASTMARK_RESTORE();
976
977
632k
                    ptr--;
978
632k
                    ctx->count--;
979
632k
                }
980
262k
                if (state->repeat)
981
260k
                    MARK_POP_DISCARD(ctx->lastmark);
982
262k
            }
983
6.11M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
18.2M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
18.2M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
18.2M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
18.2M
            ctx->u.rep = repeat_pool_malloc(state);
1127
18.2M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
18.2M
            ctx->u.rep->count = -1;
1131
18.2M
            ctx->u.rep->pattern = pattern;
1132
18.2M
            ctx->u.rep->prev = state->repeat;
1133
18.2M
            ctx->u.rep->last_ptr = NULL;
1134
18.2M
            state->repeat = ctx->u.rep;
1135
1136
18.2M
            state->ptr = ptr;
1137
18.2M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
18.2M
            state->repeat = ctx->u.rep->prev;
1139
18.2M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
18.2M
            if (ret) {
1142
18.2M
                RETURN_ON_ERROR(ret);
1143
18.2M
                RETURN_SUCCESS;
1144
18.2M
            }
1145
1.19k
            RETURN_FAILURE;
1146
1147
40.6M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
40.6M
            ctx->u.rep = state->repeat;
1155
40.6M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
40.6M
            state->ptr = ptr;
1159
1160
40.6M
            ctx->count = ctx->u.rep->count+1;
1161
1162
40.6M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
40.6M
                   ptr, ctx->count));
1164
1165
40.6M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
40.6M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.07M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
37.5M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
37.5M
                ctx->u.rep->count = ctx->count;
1185
37.5M
                LASTMARK_SAVE();
1186
37.5M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
37.5M
                LAST_PTR_PUSH();
1189
37.5M
                ctx->u.rep->last_ptr = state->ptr;
1190
37.5M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
37.5M
                        ctx->u.rep->pattern+3);
1192
37.5M
                LAST_PTR_POP();
1193
37.5M
                if (ret) {
1194
22.1M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
22.1M
                    RETURN_ON_ERROR(ret);
1196
22.1M
                    RETURN_SUCCESS;
1197
22.1M
                }
1198
15.4M
                MARK_POP(ctx->lastmark);
1199
15.4M
                LASTMARK_RESTORE();
1200
15.4M
                ctx->u.rep->count = ctx->count-1;
1201
15.4M
                state->ptr = ptr;
1202
15.4M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
18.5M
            state->repeat = ctx->u.rep->prev;
1207
18.5M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
18.5M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
18.5M
            RETURN_ON_SUCCESS(ret);
1211
261k
            state->ptr = ptr;
1212
261k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
14.8M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
14.8M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
14.8M
                   ptr, pattern[1]));
1565
14.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
14.8M
            state->ptr = ptr - pattern[1];
1568
14.8M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
14.8M
            RETURN_ON_FAILURE(ret);
1570
9.22M
            pattern += pattern[0];
1571
9.22M
            DISPATCH;
1572
1573
9.22M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
7.69M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
7.69M
                   ptr, pattern[1]));
1578
7.69M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
7.69M
                state->ptr = ptr - pattern[1];
1580
7.69M
                LASTMARK_SAVE();
1581
7.69M
                if (state->repeat)
1582
7.69M
                    MARK_PUSH(ctx->lastmark);
1583
1584
15.3M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
15.3M
                if (ret) {
1586
3.12k
                    if (state->repeat)
1587
3.12k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
3.12k
                    RETURN_ON_ERROR(ret);
1589
3.12k
                    RETURN_FAILURE;
1590
3.12k
                }
1591
7.69M
                if (state->repeat)
1592
7.69M
                    MARK_POP(ctx->lastmark);
1593
7.69M
                LASTMARK_RESTORE();
1594
7.69M
            }
1595
7.69M
            pattern += pattern[0];
1596
7.69M
            DISPATCH;
1597
1598
7.69M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
401M
exit:
1620
401M
    ctx_pos = ctx->last_ctx_pos;
1621
401M
    jump = ctx->jump;
1622
401M
    DATA_POP_DISCARD(ctx);
1623
401M
    if (ctx_pos == -1) {
1624
240M
        state->sigcount = sigcount;
1625
240M
        return ret;
1626
240M
    }
1627
161M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
161M
    switch (jump) {
1630
37.5M
        case JUMP_MAX_UNTIL_2:
1631
37.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
37.5M
            goto jump_max_until_2;
1633
18.5M
        case JUMP_MAX_UNTIL_3:
1634
18.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
18.5M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
23.1M
        case JUMP_BRANCH:
1643
23.1M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
23.1M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
18.2M
        case JUMP_REPEAT:
1658
18.2M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
18.2M
            goto jump_repeat;
1660
4.65M
        case JUMP_REPEAT_ONE_1:
1661
4.65M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.65M
            goto jump_repeat_one_1;
1663
36.9M
        case JUMP_REPEAT_ONE_2:
1664
36.9M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
36.9M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
14.8M
        case JUMP_ASSERT:
1673
14.8M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
14.8M
            goto jump_assert;
1675
7.69M
        case JUMP_ASSERT_NOT:
1676
7.69M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
7.69M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
161M
    }
1683
1684
0
    return ret; /* should never get here */
1685
161M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
90.6M
{
601
90.6M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
90.6M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
90.6M
    Py_ssize_t ret = 0;
604
90.6M
    int jump;
605
90.6M
    unsigned int sigcount = state->sigcount;
606
607
90.6M
    SRE(match_context)* ctx;
608
90.6M
    SRE(match_context)* nextctx;
609
90.6M
    INIT_TRACE(state);
610
611
90.6M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
90.6M
    DATA_ALLOC(SRE(match_context), ctx);
614
90.6M
    ctx->last_ctx_pos = -1;
615
90.6M
    ctx->jump = JUMP_NONE;
616
90.6M
    ctx->toplevel = toplevel;
617
90.6M
    ctx_pos = alloc_pos;
618
619
90.6M
#if USE_COMPUTED_GOTOS
620
90.6M
#include "sre_targets.h"
621
90.6M
#endif
622
623
431M
entrance:
624
625
431M
    ;  // Fashion statement.
626
431M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
431M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
17.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
386
            TRACE(("reject (got %tu chars, need %zu)\n",
633
386
                   end - ptr, (size_t) pattern[3]));
634
386
            RETURN_FAILURE;
635
386
        }
636
17.4M
        pattern += pattern[1] + 1;
637
17.4M
    }
638
639
431M
#if USE_COMPUTED_GOTOS
640
431M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
431M
    {
647
648
431M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
127M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
127M
                   ptr, pattern[0]));
653
127M
            {
654
127M
                int i = pattern[0];
655
127M
                if (i & 1)
656
25.2M
                    state->lastindex = i/2 + 1;
657
127M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
124M
                    int j = state->lastmark + 1;
663
127M
                    while (j < i)
664
2.41M
                        state->mark[j++] = NULL;
665
124M
                    state->lastmark = i;
666
124M
                }
667
127M
                state->mark[i] = ptr;
668
127M
            }
669
127M
            pattern++;
670
127M
            DISPATCH;
671
672
127M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
35.0M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
35.0M
                   ptr, *pattern));
677
35.0M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
11.8M
                RETURN_FAILURE;
679
23.2M
            pattern++;
680
23.2M
            ptr++;
681
23.2M
            DISPATCH;
682
683
23.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
55.2M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
55.2M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
55.2M
            if (ctx->toplevel &&
698
16.6M
                ((state->match_all && ptr != state->end) ||
699
16.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
55.2M
            state->ptr = ptr;
704
55.2M
            RETURN_SUCCESS;
705
706
803k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
803k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
803k
            if (!SRE(at)(state, ptr, *pattern))
711
799k
                RETURN_FAILURE;
712
4.12k
            pattern++;
713
4.12k
            DISPATCH;
714
715
4.12k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
87.5M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
87.5M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
87.5M
            if (ptr >= end ||
749
87.5M
                !SRE(charset)(state, pattern + 1, *ptr))
750
4.10M
                RETURN_FAILURE;
751
83.4M
            pattern += pattern[0];
752
83.4M
            ptr++;
753
83.4M
            DISPATCH;
754
755
83.4M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.00M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.00M
                   pattern, ptr, pattern[0]));
758
2.00M
            if (ptr >= end ||
759
2.00M
                sre_lower_ascii(*ptr) != *pattern)
760
20.0k
                RETURN_FAILURE;
761
1.98M
            pattern++;
762
1.98M
            ptr++;
763
1.98M
            DISPATCH;
764
765
1.98M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
36.8M
        TARGET(SRE_OP_JUMP):
845
36.8M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
36.8M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
36.8M
                   ptr, pattern[0]));
850
36.8M
            pattern += pattern[0];
851
36.8M
            DISPATCH;
852
853
52.4M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
52.4M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
52.4M
            LASTMARK_SAVE();
858
52.4M
            if (state->repeat)
859
47.9M
                MARK_PUSH(ctx->lastmark);
860
109M
            for (; pattern[0]; pattern += pattern[0]) {
861
93.8M
                if (pattern[1] == SRE_OP_LITERAL &&
862
44.5M
                    (ptr >= end ||
863
44.5M
                     (SRE_CODE) *ptr != pattern[2]))
864
29.9M
                    continue;
865
63.9M
                if (pattern[1] == SRE_OP_IN &&
866
43.0M
                    (ptr >= end ||
867
43.0M
                     !SRE(charset)(state, pattern + 3,
868
43.0M
                                   (SRE_CODE) *ptr)))
869
26.7M
                    continue;
870
37.2M
                state->ptr = ptr;
871
37.2M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
37.2M
                if (ret) {
873
36.3M
                    if (state->repeat)
874
32.6M
                        MARK_POP_DISCARD(ctx->lastmark);
875
36.3M
                    RETURN_ON_ERROR(ret);
876
36.3M
                    RETURN_SUCCESS;
877
36.3M
                }
878
848k
                if (state->repeat)
879
13.4k
                    MARK_POP_KEEP(ctx->lastmark);
880
848k
                LASTMARK_RESTORE();
881
848k
            }
882
16.0M
            if (state->repeat)
883
15.3M
                MARK_POP_DISCARD(ctx->lastmark);
884
16.0M
            RETURN_FAILURE;
885
886
163M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
163M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
163M
                   pattern[1], pattern[2]));
898
899
163M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
21.0k
                RETURN_FAILURE; /* cannot match */
901
902
163M
            state->ptr = ptr;
903
904
163M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
163M
            RETURN_ON_ERROR(ret);
906
163M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
163M
            ctx->count = ret;
908
163M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
163M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
64.9M
                RETURN_FAILURE;
917
918
98.3M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
944k
                ptr == state->end &&
920
3.76k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.76k
            {
922
                /* tail is empty.  we're finished */
923
3.76k
                state->ptr = ptr;
924
3.76k
                RETURN_SUCCESS;
925
3.76k
            }
926
927
98.3M
            LASTMARK_SAVE();
928
98.3M
            if (state->repeat)
929
73.4M
                MARK_PUSH(ctx->lastmark);
930
931
98.3M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
33.4M
                ctx->u.chr = pattern[pattern[0]+1];
935
33.4M
                for (;;) {
936
82.4M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
56.4M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
48.9M
                        ptr--;
939
48.9M
                        ctx->count--;
940
48.9M
                    }
941
33.4M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
26.0M
                        break;
943
7.44M
                    state->ptr = ptr;
944
7.44M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
7.44M
                            pattern+pattern[0]);
946
7.44M
                    if (ret) {
947
7.44M
                        if (state->repeat)
948
7.44M
                            MARK_POP_DISCARD(ctx->lastmark);
949
7.44M
                        RETURN_ON_ERROR(ret);
950
7.44M
                        RETURN_SUCCESS;
951
7.44M
                    }
952
291
                    if (state->repeat)
953
291
                        MARK_POP_KEEP(ctx->lastmark);
954
291
                    LASTMARK_RESTORE();
955
956
291
                    ptr--;
957
291
                    ctx->count--;
958
291
                }
959
26.0M
                if (state->repeat)
960
26.0M
                    MARK_POP_DISCARD(ctx->lastmark);
961
64.8M
            } else {
962
                /* general case */
963
65.4M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
65.3M
                    state->ptr = ptr;
965
65.3M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
65.3M
                            pattern+pattern[0]);
967
65.3M
                    if (ret) {
968
64.7M
                        if (state->repeat)
969
39.8M
                            MARK_POP_DISCARD(ctx->lastmark);
970
64.7M
                        RETURN_ON_ERROR(ret);
971
64.7M
                        RETURN_SUCCESS;
972
64.7M
                    }
973
599k
                    if (state->repeat)
974
162k
                        MARK_POP_KEEP(ctx->lastmark);
975
599k
                    LASTMARK_RESTORE();
976
977
599k
                    ptr--;
978
599k
                    ctx->count--;
979
599k
                }
980
82.1k
                if (state->repeat)
981
81.4k
                    MARK_POP_DISCARD(ctx->lastmark);
982
82.1k
            }
983
26.0M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
46.9M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
46.9M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
46.9M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
46.9M
            ctx->u.rep = repeat_pool_malloc(state);
1127
46.9M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
46.9M
            ctx->u.rep->count = -1;
1131
46.9M
            ctx->u.rep->pattern = pattern;
1132
46.9M
            ctx->u.rep->prev = state->repeat;
1133
46.9M
            ctx->u.rep->last_ptr = NULL;
1134
46.9M
            state->repeat = ctx->u.rep;
1135
1136
46.9M
            state->ptr = ptr;
1137
46.9M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
46.9M
            state->repeat = ctx->u.rep->prev;
1139
46.9M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
46.9M
            if (ret) {
1142
46.9M
                RETURN_ON_ERROR(ret);
1143
46.9M
                RETURN_SUCCESS;
1144
46.9M
            }
1145
748
            RETURN_FAILURE;
1146
1147
95.7M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
95.7M
            ctx->u.rep = state->repeat;
1155
95.7M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
95.7M
            state->ptr = ptr;
1159
1160
95.7M
            ctx->count = ctx->u.rep->count+1;
1161
1162
95.7M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
95.7M
                   ptr, ctx->count));
1164
1165
95.7M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
95.7M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
5.08M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
90.6M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
90.6M
                ctx->u.rep->count = ctx->count;
1185
90.6M
                LASTMARK_SAVE();
1186
90.6M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
90.6M
                LAST_PTR_PUSH();
1189
90.6M
                ctx->u.rep->last_ptr = state->ptr;
1190
90.6M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
90.6M
                        ctx->u.rep->pattern+3);
1192
90.6M
                LAST_PTR_POP();
1193
90.6M
                if (ret) {
1194
48.7M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
48.7M
                    RETURN_ON_ERROR(ret);
1196
48.7M
                    RETURN_SUCCESS;
1197
48.7M
                }
1198
41.9M
                MARK_POP(ctx->lastmark);
1199
41.9M
                LASTMARK_RESTORE();
1200
41.9M
                ctx->u.rep->count = ctx->count-1;
1201
41.9M
                state->ptr = ptr;
1202
41.9M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
46.9M
            state->repeat = ctx->u.rep->prev;
1207
46.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
46.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
46.9M
            RETURN_ON_SUCCESS(ret);
1211
81.9k
            state->ptr = ptr;
1212
81.9k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
32.2M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
32.2M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
32.2M
                   ptr, pattern[1]));
1565
32.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
32.2M
            state->ptr = ptr - pattern[1];
1568
32.2M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
32.2M
            RETURN_ON_FAILURE(ret);
1570
31.2M
            pattern += pattern[0];
1571
31.2M
            DISPATCH;
1572
1573
31.2M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
13.8M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
13.8M
                   ptr, pattern[1]));
1578
13.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
13.8M
                state->ptr = ptr - pattern[1];
1580
13.8M
                LASTMARK_SAVE();
1581
13.8M
                if (state->repeat)
1582
13.8M
                    MARK_PUSH(ctx->lastmark);
1583
1584
27.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
27.7M
                if (ret) {
1586
13.1k
                    if (state->repeat)
1587
13.1k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
13.1k
                    RETURN_ON_ERROR(ret);
1589
13.1k
                    RETURN_FAILURE;
1590
13.1k
                }
1591
13.8M
                if (state->repeat)
1592
13.8M
                    MARK_POP(ctx->lastmark);
1593
13.8M
                LASTMARK_RESTORE();
1594
13.8M
            }
1595
13.8M
            pattern += pattern[0];
1596
13.8M
            DISPATCH;
1597
1598
13.8M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
431M
exit:
1620
431M
    ctx_pos = ctx->last_ctx_pos;
1621
431M
    jump = ctx->jump;
1622
431M
    DATA_POP_DISCARD(ctx);
1623
431M
    if (ctx_pos == -1) {
1624
90.6M
        state->sigcount = sigcount;
1625
90.6M
        return ret;
1626
90.6M
    }
1627
340M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
340M
    switch (jump) {
1630
90.6M
        case JUMP_MAX_UNTIL_2:
1631
90.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
90.6M
            goto jump_max_until_2;
1633
46.9M
        case JUMP_MAX_UNTIL_3:
1634
46.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
46.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
37.2M
        case JUMP_BRANCH:
1643
37.2M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
37.2M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
46.9M
        case JUMP_REPEAT:
1658
46.9M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
46.9M
            goto jump_repeat;
1660
7.44M
        case JUMP_REPEAT_ONE_1:
1661
7.44M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
7.44M
            goto jump_repeat_one_1;
1663
65.3M
        case JUMP_REPEAT_ONE_2:
1664
65.3M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
65.3M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
32.2M
        case JUMP_ASSERT:
1673
32.2M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
32.2M
            goto jump_assert;
1675
13.8M
        case JUMP_ASSERT_NOT:
1676
13.8M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
13.8M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
340M
    }
1683
1684
0
    return ret; /* should never get here */
1685
340M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
328M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
90.4M
{
1694
90.4M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
90.4M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
90.4M
    Py_ssize_t status = 0;
1697
90.4M
    Py_ssize_t prefix_len = 0;
1698
90.4M
    Py_ssize_t prefix_skip = 0;
1699
90.4M
    SRE_CODE* prefix = NULL;
1700
90.4M
    SRE_CODE* charset = NULL;
1701
90.4M
    SRE_CODE* overlap = NULL;
1702
90.4M
    int flags = 0;
1703
90.4M
    INIT_TRACE(state);
1704
1705
90.4M
    if (ptr > end)
1706
0
        return 0;
1707
1708
90.4M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
90.4M
        flags = pattern[2];
1713
1714
90.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
2.33M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
2.33M
                   end - ptr, (size_t) pattern[3]));
1717
2.33M
            return 0;
1718
2.33M
        }
1719
88.1M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
9.31M
            end -= pattern[3] - 1;
1723
9.31M
            if (end <= ptr)
1724
0
                end = ptr;
1725
9.31M
        }
1726
1727
88.1M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
9.31M
            prefix_len = pattern[5];
1731
9.31M
            prefix_skip = pattern[6];
1732
9.31M
            prefix = pattern + 7;
1733
9.31M
            overlap = prefix + prefix_len - 1;
1734
78.8M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
73.4M
            charset = pattern + 5;
1738
1739
88.1M
        pattern += 1 + pattern[1];
1740
88.1M
    }
1741
1742
88.1M
    TRACE(("prefix = %p %zd %zd\n",
1743
88.1M
           prefix, prefix_len, prefix_skip));
1744
88.1M
    TRACE(("charset = %p\n", charset));
1745
1746
88.1M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
8.53M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
4.77M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
4.77M
#endif
1753
4.77M
        end = (SRE_CHAR *)state->end;
1754
4.77M
        state->must_advance = 0;
1755
9.09M
        while (ptr < end) {
1756
115M
            while (*ptr != c) {
1757
107M
                if (++ptr >= end)
1758
1.06M
                    return 0;
1759
107M
            }
1760
7.99M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
7.99M
            state->start = ptr;
1762
7.99M
            state->ptr = ptr + prefix_skip;
1763
7.99M
            if (flags & SRE_INFO_LITERAL)
1764
3.45k
                return 1; /* we got all of it */
1765
7.98M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
7.98M
            if (status != 0)
1767
7.42M
                return status;
1768
559k
            ++ptr;
1769
559k
            RESET_CAPTURE_GROUP();
1770
559k
        }
1771
38.6k
        return 0;
1772
4.77M
    }
1773
1774
79.6M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
780k
        Py_ssize_t i = 0;
1778
1779
780k
        end = (SRE_CHAR *)state->end;
1780
780k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.87M
        for (i = 0; i < prefix_len; i++)
1784
1.24M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
624k
#endif
1787
1.69M
        while (ptr < end) {
1788
1.69M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
10.1M
            while (*ptr++ != c) {
1790
8.48M
                if (ptr >= end)
1791
322
                    return 0;
1792
8.48M
            }
1793
1.69M
            if (ptr >= end)
1794
60
                return 0;
1795
1796
1.69M
            i = 1;
1797
1.69M
            state->must_advance = 0;
1798
1.69M
            do {
1799
1.69M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.54M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.54M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.54M
                    state->start = ptr - (prefix_len - 1);
1808
1.54M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.54M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.54M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.54M
                    if (status != 0)
1813
779k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
760k
                    if (++ptr >= end)
1816
36
                        return 0;
1817
760k
                    RESET_CAPTURE_GROUP();
1818
760k
                }
1819
916k
                i = overlap[i];
1820
916k
            } while (i != 0);
1821
1.69M
        }
1822
0
        return 0;
1823
780k
    }
1824
1825
78.8M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
73.4M
        end = (SRE_CHAR *)state->end;
1828
73.4M
        state->must_advance = 0;
1829
76.4M
        for (;;) {
1830
331M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
255M
                ptr++;
1832
76.4M
            if (ptr >= end)
1833
4.05M
                return 0;
1834
72.3M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
72.3M
            state->start = ptr;
1836
72.3M
            state->ptr = ptr;
1837
72.3M
            status = SRE(match)(state, pattern, 0);
1838
72.3M
            if (status != 0)
1839
69.4M
                break;
1840
2.95M
            ptr++;
1841
2.95M
            RESET_CAPTURE_GROUP();
1842
2.95M
        }
1843
73.4M
    } else {
1844
        /* general case */
1845
5.33M
        assert(ptr <= end);
1846
5.33M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
5.33M
        state->start = state->ptr = ptr;
1848
5.33M
        status = SRE(match)(state, pattern, 1);
1849
5.33M
        state->must_advance = 0;
1850
5.33M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
52
            (pattern[1] == SRE_AT_BEGINNING ||
1852
52
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
329M
        while (status == 0 && ptr < end) {
1858
323M
            ptr++;
1859
323M
            RESET_CAPTURE_GROUP();
1860
323M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
323M
            state->start = state->ptr = ptr;
1862
323M
            status = SRE(match)(state, pattern, 0);
1863
323M
        }
1864
5.33M
    }
1865
1866
74.7M
    return status;
1867
78.8M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
35.6M
{
1694
35.6M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
35.6M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
35.6M
    Py_ssize_t status = 0;
1697
35.6M
    Py_ssize_t prefix_len = 0;
1698
35.6M
    Py_ssize_t prefix_skip = 0;
1699
35.6M
    SRE_CODE* prefix = NULL;
1700
35.6M
    SRE_CODE* charset = NULL;
1701
35.6M
    SRE_CODE* overlap = NULL;
1702
35.6M
    int flags = 0;
1703
35.6M
    INIT_TRACE(state);
1704
1705
35.6M
    if (ptr > end)
1706
0
        return 0;
1707
1708
35.6M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
35.6M
        flags = pattern[2];
1713
1714
35.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
2.19M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
2.19M
                   end - ptr, (size_t) pattern[3]));
1717
2.19M
            return 0;
1718
2.19M
        }
1719
33.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.81M
            end -= pattern[3] - 1;
1723
2.81M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.81M
        }
1726
1727
33.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.81M
            prefix_len = pattern[5];
1731
2.81M
            prefix_skip = pattern[6];
1732
2.81M
            prefix = pattern + 7;
1733
2.81M
            overlap = prefix + prefix_len - 1;
1734
30.6M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
26.6M
            charset = pattern + 5;
1738
1739
33.4M
        pattern += 1 + pattern[1];
1740
33.4M
    }
1741
1742
33.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
33.4M
           prefix, prefix_len, prefix_skip));
1744
33.4M
    TRACE(("charset = %p\n", charset));
1745
1746
33.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.75M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.75M
#if SIZEOF_SRE_CHAR < 4
1750
2.75M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.75M
#endif
1753
2.75M
        end = (SRE_CHAR *)state->end;
1754
2.75M
        state->must_advance = 0;
1755
2.95M
        while (ptr < end) {
1756
29.3M
            while (*ptr != c) {
1757
27.3M
                if (++ptr >= end)
1758
990k
                    return 0;
1759
27.3M
            }
1760
1.92M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.92M
            state->start = ptr;
1762
1.92M
            state->ptr = ptr + prefix_skip;
1763
1.92M
            if (flags & SRE_INFO_LITERAL)
1764
251
                return 1; /* we got all of it */
1765
1.92M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.92M
            if (status != 0)
1767
1.72M
                return status;
1768
200k
            ++ptr;
1769
200k
            RESET_CAPTURE_GROUP();
1770
200k
        }
1771
35.2k
        return 0;
1772
2.75M
    }
1773
1774
30.6M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
58.5k
        Py_ssize_t i = 0;
1778
1779
58.5k
        end = (SRE_CHAR *)state->end;
1780
58.5k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
58.5k
#if SIZEOF_SRE_CHAR < 4
1783
175k
        for (i = 0; i < prefix_len; i++)
1784
117k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
58.5k
#endif
1787
292k
        while (ptr < end) {
1788
292k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
1.99M
            while (*ptr++ != c) {
1790
1.70M
                if (ptr >= end)
1791
60
                    return 0;
1792
1.70M
            }
1793
292k
            if (ptr >= end)
1794
26
                return 0;
1795
1796
292k
            i = 1;
1797
292k
            state->must_advance = 0;
1798
292k
            do {
1799
292k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
230k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
230k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
230k
                    state->start = ptr - (prefix_len - 1);
1808
230k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
230k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
230k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
230k
                    if (status != 0)
1813
58.4k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
171k
                    if (++ptr >= end)
1816
15
                        return 0;
1817
171k
                    RESET_CAPTURE_GROUP();
1818
171k
                }
1819
234k
                i = overlap[i];
1820
234k
            } while (i != 0);
1821
292k
        }
1822
0
        return 0;
1823
58.5k
    }
1824
1825
30.6M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
26.6M
        end = (SRE_CHAR *)state->end;
1828
26.6M
        state->must_advance = 0;
1829
28.6M
        for (;;) {
1830
79.0M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
50.3M
                ptr++;
1832
28.6M
            if (ptr >= end)
1833
2.86M
                return 0;
1834
25.8M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
25.8M
            state->start = ptr;
1836
25.8M
            state->ptr = ptr;
1837
25.8M
            status = SRE(match)(state, pattern, 0);
1838
25.8M
            if (status != 0)
1839
23.7M
                break;
1840
2.04M
            ptr++;
1841
2.04M
            RESET_CAPTURE_GROUP();
1842
2.04M
        }
1843
26.6M
    } else {
1844
        /* general case */
1845
3.98M
        assert(ptr <= end);
1846
3.98M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
3.98M
        state->start = state->ptr = ptr;
1848
3.98M
        status = SRE(match)(state, pattern, 1);
1849
3.98M
        state->must_advance = 0;
1850
3.98M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
11
            (pattern[1] == SRE_AT_BEGINNING ||
1852
11
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
89.1M
        while (status == 0 && ptr < end) {
1858
85.1M
            ptr++;
1859
85.1M
            RESET_CAPTURE_GROUP();
1860
85.1M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
85.1M
            state->start = state->ptr = ptr;
1862
85.1M
            status = SRE(match)(state, pattern, 0);
1863
85.1M
        }
1864
3.98M
    }
1865
1866
27.7M
    return status;
1867
30.6M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
47.3M
{
1694
47.3M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
47.3M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
47.3M
    Py_ssize_t status = 0;
1697
47.3M
    Py_ssize_t prefix_len = 0;
1698
47.3M
    Py_ssize_t prefix_skip = 0;
1699
47.3M
    SRE_CODE* prefix = NULL;
1700
47.3M
    SRE_CODE* charset = NULL;
1701
47.3M
    SRE_CODE* overlap = NULL;
1702
47.3M
    int flags = 0;
1703
47.3M
    INIT_TRACE(state);
1704
1705
47.3M
    if (ptr > end)
1706
0
        return 0;
1707
1708
47.3M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
47.3M
        flags = pattern[2];
1713
1714
47.3M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
124k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
124k
                   end - ptr, (size_t) pattern[3]));
1717
124k
            return 0;
1718
124k
        }
1719
47.2M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.58M
            end -= pattern[3] - 1;
1723
2.58M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.58M
        }
1726
1727
47.2M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.58M
            prefix_len = pattern[5];
1731
2.58M
            prefix_skip = pattern[6];
1732
2.58M
            prefix = pattern + 7;
1733
2.58M
            overlap = prefix + prefix_len - 1;
1734
44.6M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
43.5M
            charset = pattern + 5;
1738
1739
47.2M
        pattern += 1 + pattern[1];
1740
47.2M
    }
1741
1742
47.2M
    TRACE(("prefix = %p %zd %zd\n",
1743
47.2M
           prefix, prefix_len, prefix_skip));
1744
47.2M
    TRACE(("charset = %p\n", charset));
1745
1746
47.2M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.02M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.02M
#if SIZEOF_SRE_CHAR < 4
1750
2.02M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.02M
#endif
1753
2.02M
        end = (SRE_CHAR *)state->end;
1754
2.02M
        state->must_advance = 0;
1755
2.21M
        while (ptr < end) {
1756
54.4M
            while (*ptr != c) {
1757
52.3M
                if (++ptr >= end)
1758
72.0k
                    return 0;
1759
52.3M
            }
1760
2.14M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.14M
            state->start = ptr;
1762
2.14M
            state->ptr = ptr + prefix_skip;
1763
2.14M
            if (flags & SRE_INFO_LITERAL)
1764
1.73k
                return 1; /* we got all of it */
1765
2.14M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.14M
            if (status != 0)
1767
1.94M
                return status;
1768
198k
            ++ptr;
1769
198k
            RESET_CAPTURE_GROUP();
1770
198k
        }
1771
2.60k
        return 0;
1772
2.02M
    }
1773
1774
45.2M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
565k
        Py_ssize_t i = 0;
1778
1779
565k
        end = (SRE_CHAR *)state->end;
1780
565k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
565k
#if SIZEOF_SRE_CHAR < 4
1783
1.69M
        for (i = 0; i < prefix_len; i++)
1784
1.13M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
565k
#endif
1787
970k
        while (ptr < end) {
1788
970k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.85M
            while (*ptr++ != c) {
1790
2.88M
                if (ptr >= end)
1791
123
                    return 0;
1792
2.88M
            }
1793
970k
            if (ptr >= end)
1794
15
                return 0;
1795
1796
970k
            i = 1;
1797
970k
            state->must_advance = 0;
1798
970k
            do {
1799
970k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
898k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
898k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
898k
                    state->start = ptr - (prefix_len - 1);
1808
898k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
898k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
898k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
898k
                    if (status != 0)
1813
565k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
333k
                    if (++ptr >= end)
1816
15
                        return 0;
1817
333k
                    RESET_CAPTURE_GROUP();
1818
333k
                }
1819
404k
                i = overlap[i];
1820
404k
            } while (i != 0);
1821
970k
        }
1822
0
        return 0;
1823
565k
    }
1824
1825
44.6M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
43.5M
        end = (SRE_CHAR *)state->end;
1828
43.5M
        state->must_advance = 0;
1829
43.9M
        for (;;) {
1830
186M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
142M
                ptr++;
1832
43.9M
            if (ptr >= end)
1833
1.14M
                return 0;
1834
42.7M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
42.7M
            state->start = ptr;
1836
42.7M
            state->ptr = ptr;
1837
42.7M
            status = SRE(match)(state, pattern, 0);
1838
42.7M
            if (status != 0)
1839
42.3M
                break;
1840
414k
            ptr++;
1841
414k
            RESET_CAPTURE_GROUP();
1842
414k
        }
1843
43.5M
    } else {
1844
        /* general case */
1845
1.14M
        assert(ptr <= end);
1846
1.14M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
1.14M
        state->start = state->ptr = ptr;
1848
1.14M
        status = SRE(match)(state, pattern, 1);
1849
1.14M
        state->must_advance = 0;
1850
1.14M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
22
            (pattern[1] == SRE_AT_BEGINNING ||
1852
22
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
174M
        while (status == 0 && ptr < end) {
1858
173M
            ptr++;
1859
173M
            RESET_CAPTURE_GROUP();
1860
173M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
173M
            state->start = state->ptr = ptr;
1862
173M
            status = SRE(match)(state, pattern, 0);
1863
173M
        }
1864
1.14M
    }
1865
1866
43.5M
    return status;
1867
44.6M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.46M
{
1694
7.46M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.46M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.46M
    Py_ssize_t status = 0;
1697
7.46M
    Py_ssize_t prefix_len = 0;
1698
7.46M
    Py_ssize_t prefix_skip = 0;
1699
7.46M
    SRE_CODE* prefix = NULL;
1700
7.46M
    SRE_CODE* charset = NULL;
1701
7.46M
    SRE_CODE* overlap = NULL;
1702
7.46M
    int flags = 0;
1703
7.46M
    INIT_TRACE(state);
1704
1705
7.46M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.46M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.46M
        flags = pattern[2];
1713
1714
7.46M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
7.74k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
7.74k
                   end - ptr, (size_t) pattern[3]));
1717
7.74k
            return 0;
1718
7.74k
        }
1719
7.45M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.91M
            end -= pattern[3] - 1;
1723
3.91M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.91M
        }
1726
1727
7.45M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.91M
            prefix_len = pattern[5];
1731
3.91M
            prefix_skip = pattern[6];
1732
3.91M
            prefix = pattern + 7;
1733
3.91M
            overlap = prefix + prefix_len - 1;
1734
3.91M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.33M
            charset = pattern + 5;
1738
1739
7.45M
        pattern += 1 + pattern[1];
1740
7.45M
    }
1741
1742
7.45M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.45M
           prefix, prefix_len, prefix_skip));
1744
7.45M
    TRACE(("charset = %p\n", charset));
1745
1746
7.45M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.76M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
3.76M
        end = (SRE_CHAR *)state->end;
1754
3.76M
        state->must_advance = 0;
1755
3.92M
        while (ptr < end) {
1756
32.0M
            while (*ptr != c) {
1757
28.1M
                if (++ptr >= end)
1758
4.55k
                    return 0;
1759
28.1M
            }
1760
3.91M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.91M
            state->start = ptr;
1762
3.91M
            state->ptr = ptr + prefix_skip;
1763
3.91M
            if (flags & SRE_INFO_LITERAL)
1764
1.46k
                return 1; /* we got all of it */
1765
3.91M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.91M
            if (status != 0)
1767
3.75M
                return status;
1768
161k
            ++ptr;
1769
161k
            RESET_CAPTURE_GROUP();
1770
161k
        }
1771
835
        return 0;
1772
3.76M
    }
1773
1774
3.69M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
155k
        Py_ssize_t i = 0;
1778
1779
155k
        end = (SRE_CHAR *)state->end;
1780
155k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
432k
        while (ptr < end) {
1788
432k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
4.32M
            while (*ptr++ != c) {
1790
3.89M
                if (ptr >= end)
1791
139
                    return 0;
1792
3.89M
            }
1793
432k
            if (ptr >= end)
1794
19
                return 0;
1795
1796
432k
            i = 1;
1797
432k
            state->must_advance = 0;
1798
433k
            do {
1799
433k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
411k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
411k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
411k
                    state->start = ptr - (prefix_len - 1);
1808
411k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
411k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
411k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
411k
                    if (status != 0)
1813
155k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
256k
                    if (++ptr >= end)
1816
6
                        return 0;
1817
256k
                    RESET_CAPTURE_GROUP();
1818
256k
                }
1819
277k
                i = overlap[i];
1820
277k
            } while (i != 0);
1821
432k
        }
1822
0
        return 0;
1823
155k
    }
1824
1825
3.53M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.33M
        end = (SRE_CHAR *)state->end;
1828
3.33M
        state->must_advance = 0;
1829
3.82M
        for (;;) {
1830
66.5M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
62.7M
                ptr++;
1832
3.82M
            if (ptr >= end)
1833
53.6k
                return 0;
1834
3.77M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
3.77M
            state->start = ptr;
1836
3.77M
            state->ptr = ptr;
1837
3.77M
            status = SRE(match)(state, pattern, 0);
1838
3.77M
            if (status != 0)
1839
3.27M
                break;
1840
491k
            ptr++;
1841
491k
            RESET_CAPTURE_GROUP();
1842
491k
        }
1843
3.33M
    } else {
1844
        /* general case */
1845
203k
        assert(ptr <= end);
1846
203k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
203k
        state->start = state->ptr = ptr;
1848
203k
        status = SRE(match)(state, pattern, 1);
1849
203k
        state->must_advance = 0;
1850
203k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
19
            (pattern[1] == SRE_AT_BEGINNING ||
1852
19
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
65.0M
        while (status == 0 && ptr < end) {
1858
64.8M
            ptr++;
1859
64.8M
            RESET_CAPTURE_GROUP();
1860
64.8M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
64.8M
            state->start = state->ptr = ptr;
1862
64.8M
            status = SRE(match)(state, pattern, 0);
1863
64.8M
        }
1864
203k
    }
1865
1866
3.48M
    return status;
1867
3.53M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/