Coverage Report

Created: 2026-02-26 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
103M
{
18
    /* check if pointer is at given position */
19
20
103M
    Py_ssize_t thisp, thatp;
21
22
103M
    switch (at) {
23
24
10.3M
    case SRE_AT_BEGINNING:
25
10.3M
    case SRE_AT_BEGINNING_STRING:
26
10.3M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
82.6M
    case SRE_AT_END:
33
82.6M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
1.50M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
82.6M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
10.0M
    case SRE_AT_END_STRING:
42
10.0M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
103M
    }
87
88
0
    return 0;
89
103M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
32.6M
{
18
    /* check if pointer is at given position */
19
20
32.6M
    Py_ssize_t thisp, thatp;
21
22
32.6M
    switch (at) {
23
24
8.92M
    case SRE_AT_BEGINNING:
25
8.92M
    case SRE_AT_BEGINNING_STRING:
26
8.92M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
18.8M
    case SRE_AT_END:
33
18.8M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
311k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
18.8M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
4.93M
    case SRE_AT_END_STRING:
42
4.93M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
32.6M
    }
87
88
0
    return 0;
89
32.6M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
59.2M
{
18
    /* check if pointer is at given position */
19
20
59.2M
    Py_ssize_t thisp, thatp;
21
22
59.2M
    switch (at) {
23
24
1.43M
    case SRE_AT_BEGINNING:
25
1.43M
    case SRE_AT_BEGINNING_STRING:
26
1.43M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
56.3M
    case SRE_AT_END:
33
56.3M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
1.18M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
56.3M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.46M
    case SRE_AT_END_STRING:
42
1.46M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
59.2M
    }
87
88
0
    return 0;
89
59.2M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
11.1M
{
18
    /* check if pointer is at given position */
19
20
11.1M
    Py_ssize_t thisp, thatp;
21
22
11.1M
    switch (at) {
23
24
17.4k
    case SRE_AT_BEGINNING:
25
17.4k
    case SRE_AT_BEGINNING_STRING:
26
17.4k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
7.45M
    case SRE_AT_END:
33
7.45M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
6.54k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
7.45M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
3.68M
    case SRE_AT_END_STRING:
42
3.68M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
11.1M
    }
87
88
0
    return 0;
89
11.1M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.55G
{
94
    /* check if character is a member of the given set */
95
96
1.55G
    int ok = 1;
97
98
3.56G
    for (;;) {
99
3.56G
        switch (*set++) {
100
101
1.05G
        case SRE_OP_FAILURE:
102
1.05G
            return !ok;
103
104
1.21G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.21G
            if (ch == set[0])
107
7.06M
                return ok;
108
1.20G
            set++;
109
1.20G
            break;
110
111
99.3M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
99.3M
            if (sre_category(set[0], (int) ch))
114
30.9M
                return ok;
115
68.4M
            set++;
116
68.4M
            break;
117
118
506M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
506M
            if (ch < 256 &&
121
485M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
192M
                return ok;
123
313M
            set += 256/SRE_CODE_BITS;
124
313M
            break;
125
126
410M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
410M
            if (set[0] <= ch && ch <= set[1])
129
269M
                return ok;
130
140M
            set += 2;
131
140M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
279M
        case SRE_OP_NEGATE:
148
279M
            ok = !ok;
149
279M
            break;
150
151
2
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
2
        {
154
2
            Py_ssize_t count, block;
155
2
            count = *(set++);
156
157
2
            if (ch < 0x10000u)
158
2
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
2
            set += 256/sizeof(SRE_CODE);
162
2
            if (block >=0 &&
163
2
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
2
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
2
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
2
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.56G
        }
175
3.56G
    }
176
1.55G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
478M
{
94
    /* check if character is a member of the given set */
95
96
478M
    int ok = 1;
97
98
998M
    for (;;) {
99
998M
        switch (*set++) {
100
101
273M
        case SRE_OP_FAILURE:
102
273M
            return !ok;
103
104
300M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
300M
            if (ch == set[0])
107
4.42M
                return ok;
108
296M
            set++;
109
296M
            break;
110
111
32.2M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
32.2M
            if (sre_category(set[0], (int) ch))
114
14.5M
                return ok;
115
17.7M
            set++;
116
17.7M
            break;
117
118
122M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
122M
            if (ch < 256 &&
121
122M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
54.6M
                return ok;
123
67.8M
            set += 256/SRE_CODE_BITS;
124
67.8M
            break;
125
126
208M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
208M
            if (set[0] <= ch && ch <= set[1])
129
131M
                return ok;
130
77.1M
            set += 2;
131
77.1M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
61.3M
        case SRE_OP_NEGATE:
148
61.3M
            ok = !ok;
149
61.3M
            break;
150
151
2
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
2
        {
154
2
            Py_ssize_t count, block;
155
2
            count = *(set++);
156
157
2
            if (ch < 0x10000u)
158
2
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
2
            set += 256/sizeof(SRE_CODE);
162
2
            if (block >=0 &&
163
2
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
2
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
2
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
2
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
998M
        }
175
998M
    }
176
478M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
720M
{
94
    /* check if character is a member of the given set */
95
96
720M
    int ok = 1;
97
98
1.72G
    for (;;) {
99
1.72G
        switch (*set++) {
100
101
531M
        case SRE_OP_FAILURE:
102
531M
            return !ok;
103
104
677M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
677M
            if (ch == set[0])
107
1.49M
                return ok;
108
675M
            set++;
109
675M
            break;
110
111
58.9M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
58.9M
            if (sre_category(set[0], (int) ch))
114
13.6M
                return ok;
115
45.3M
            set++;
116
45.3M
            break;
117
118
180M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
180M
            if (ch < 256 &&
121
170M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
58.2M
                return ok;
123
122M
            set += 256/SRE_CODE_BITS;
124
122M
            break;
125
126
167M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
167M
            if (set[0] <= ch && ch <= set[1])
129
114M
                return ok;
130
52.8M
            set += 2;
131
52.8M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
112M
        case SRE_OP_NEGATE:
148
112M
            ok = !ok;
149
112M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.72G
        }
175
1.72G
    }
176
720M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
358M
{
94
    /* check if character is a member of the given set */
95
96
358M
    int ok = 1;
97
98
838M
    for (;;) {
99
838M
        switch (*set++) {
100
101
251M
        case SRE_OP_FAILURE:
102
251M
            return !ok;
103
104
235M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
235M
            if (ch == set[0])
107
1.14M
                return ok;
108
234M
            set++;
109
234M
            break;
110
111
8.10M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
8.10M
            if (sre_category(set[0], (int) ch))
114
2.73M
                return ok;
115
5.37M
            set++;
116
5.37M
            break;
117
118
203M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
203M
            if (ch < 256 &&
121
191M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
79.6M
                return ok;
123
123M
            set += 256/SRE_CODE_BITS;
124
123M
            break;
125
126
34.4M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
34.4M
            if (set[0] <= ch && ch <= set[1])
129
23.5M
                return ok;
130
10.8M
            set += 2;
131
10.8M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
106M
        case SRE_OP_NEGATE:
148
106M
            ok = !ok;
149
106M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
838M
        }
175
838M
    }
176
358M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
621M
{
195
621M
    SRE_CODE chr;
196
621M
    SRE_CHAR c;
197
621M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
621M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
621M
    Py_ssize_t i;
200
621M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
621M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
107M
        end = ptr + maxcount;
205
206
621M
    switch (pattern[0]) {
207
208
458M
    case SRE_OP_IN:
209
        /* repeated set */
210
458M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
849M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
391M
            ptr++;
213
458M
        break;
214
215
62.4M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
62.4M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
158M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
95.8M
            ptr++;
220
62.4M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
99.7M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
99.7M
        chr = pattern[1];
232
99.7M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
99.7M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
87.6M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
87.6M
        else
238
87.6M
#endif
239
104M
        while (ptr < end && *ptr == c)
240
5.18M
            ptr++;
241
99.7M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.28M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.28M
        chr = pattern[1];
270
1.28M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.28M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
719k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
719k
        else
276
719k
#endif
277
48.4M
        while (ptr < end && *ptr != c)
278
47.1M
            ptr++;
279
1.28M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
621M
    }
319
320
621M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
621M
           ptr - (SRE_CHAR*) state->ptr));
322
621M
    return ptr - (SRE_CHAR*) state->ptr;
323
621M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
228M
{
195
228M
    SRE_CODE chr;
196
228M
    SRE_CHAR c;
197
228M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
228M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
228M
    Py_ssize_t i;
200
228M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
228M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
28.9M
        end = ptr + maxcount;
205
206
228M
    switch (pattern[0]) {
207
208
144M
    case SRE_OP_IN:
209
        /* repeated set */
210
144M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
287M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
143M
            ptr++;
213
144M
        break;
214
215
12.5M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
12.5M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
32.1M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
19.6M
            ptr++;
220
12.5M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
70.7M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
70.7M
        chr = pattern[1];
232
70.7M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
70.7M
        c = (SRE_CHAR) chr;
234
70.7M
#if SIZEOF_SRE_CHAR < 4
235
70.7M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
70.7M
        else
238
70.7M
#endif
239
72.6M
        while (ptr < end && *ptr == c)
240
1.85M
            ptr++;
241
70.7M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
440k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
440k
        chr = pattern[1];
270
440k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
440k
        c = (SRE_CHAR) chr;
272
440k
#if SIZEOF_SRE_CHAR < 4
273
440k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
440k
        else
276
440k
#endif
277
12.0M
        while (ptr < end && *ptr != c)
278
11.5M
            ptr++;
279
440k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
228M
    }
319
320
228M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
228M
           ptr - (SRE_CHAR*) state->ptr));
322
228M
    return ptr - (SRE_CHAR*) state->ptr;
323
228M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
288M
{
195
288M
    SRE_CODE chr;
196
288M
    SRE_CHAR c;
197
288M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
288M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
288M
    Py_ssize_t i;
200
288M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
288M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
59.4M
        end = ptr + maxcount;
205
206
288M
    switch (pattern[0]) {
207
208
226M
    case SRE_OP_IN:
209
        /* repeated set */
210
226M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
359M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
133M
            ptr++;
213
226M
        break;
214
215
44.9M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
44.9M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
101M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
56.6M
            ptr++;
220
44.9M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
16.8M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
16.8M
        chr = pattern[1];
232
16.8M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
16.8M
        c = (SRE_CHAR) chr;
234
16.8M
#if SIZEOF_SRE_CHAR < 4
235
16.8M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
16.8M
        else
238
16.8M
#endif
239
18.8M
        while (ptr < end && *ptr == c)
240
1.90M
            ptr++;
241
16.8M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
278k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
278k
        chr = pattern[1];
270
278k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
278k
        c = (SRE_CHAR) chr;
272
278k
#if SIZEOF_SRE_CHAR < 4
273
278k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
278k
        else
276
278k
#endif
277
11.9M
        while (ptr < end && *ptr != c)
278
11.6M
            ptr++;
279
278k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
288M
    }
319
320
288M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
288M
           ptr - (SRE_CHAR*) state->ptr));
322
288M
    return ptr - (SRE_CHAR*) state->ptr;
323
288M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
105M
{
195
105M
    SRE_CODE chr;
196
105M
    SRE_CHAR c;
197
105M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
105M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
105M
    Py_ssize_t i;
200
105M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
105M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
18.9M
        end = ptr + maxcount;
205
206
105M
    switch (pattern[0]) {
207
208
87.9M
    case SRE_OP_IN:
209
        /* repeated set */
210
87.9M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
202M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
114M
            ptr++;
213
87.9M
        break;
214
215
5.08M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
5.08M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
24.5M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
19.5M
            ptr++;
220
5.08M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
12.0M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
12.0M
        chr = pattern[1];
232
12.0M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
12.0M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
13.4M
        while (ptr < end && *ptr == c)
240
1.41M
            ptr++;
241
12.0M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
563k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
563k
        chr = pattern[1];
270
563k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
563k
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
24.5M
        while (ptr < end && *ptr != c)
278
23.9M
            ptr++;
279
563k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
105M
    }
319
320
105M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
105M
           ptr - (SRE_CHAR*) state->ptr));
322
105M
    return ptr - (SRE_CHAR*) state->ptr;
323
105M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
547M
    do { \
355
547M
        ctx->lastmark = state->lastmark; \
356
547M
        ctx->lastindex = state->lastindex; \
357
547M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
358M
    do { \
360
358M
        state->lastmark = ctx->lastmark; \
361
358M
        state->lastindex = ctx->lastindex; \
362
358M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
196M
    do { \
366
196M
        TRACE(("push last_ptr: %zd", \
367
196M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
196M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
196M
    } while (0)
370
#define LAST_PTR_POP()  \
371
196M
    do { \
372
196M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
196M
        TRACE(("pop last_ptr: %zd", \
374
196M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
196M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
860M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
548M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.11G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
112M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
23.6M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.40G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.40G
do { \
390
1.40G
    alloc_pos = state->data_stack_base; \
391
1.40G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.40G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.40G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
162M
        int j = data_stack_grow(state, sizeof(type)); \
395
162M
        if (j < 0) return j; \
396
162M
        if (ctx_pos != -1) \
397
162M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
162M
    } \
399
1.40G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.40G
    state->data_stack_base += sizeof(type); \
401
1.40G
} while (0)
402
403
1.54G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.54G
do { \
405
1.54G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.54G
    ptr = (type*)(state->data_stack+pos); \
407
1.54G
} while (0)
408
409
481M
#define DATA_STACK_PUSH(state, data, size) \
410
481M
do { \
411
481M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
481M
           data, state->data_stack_base, size)); \
413
481M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
86.9k
        int j = data_stack_grow(state, size); \
415
86.9k
        if (j < 0) return j; \
416
86.9k
        if (ctx_pos != -1) \
417
86.9k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
86.9k
    } \
419
481M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
481M
    state->data_stack_base += size; \
421
481M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
304M
#define DATA_STACK_POP(state, data, size, discard) \
427
304M
do { \
428
304M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
304M
           data, state->data_stack_base-size, size)); \
430
304M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
304M
    if (discard) \
432
304M
        state->data_stack_base -= size; \
433
304M
} while (0)
434
435
1.58G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.58G
do { \
437
1.58G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.58G
           state->data_stack_base-size, size)); \
439
1.58G
    state->data_stack_base -= size; \
440
1.58G
} while(0)
441
442
#define DATA_PUSH(x) \
443
196M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
196M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.40G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.40G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.54G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
384M
    do if (lastmark >= 0) { \
473
285M
        MARK_TRACE("push", (lastmark)); \
474
285M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
285M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
384M
    } while (0)
477
#define MARK_POP(lastmark) \
478
122M
    do if (lastmark >= 0) { \
479
107M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
107M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
107M
        MARK_TRACE("pop", (lastmark)); \
482
122M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.29M
    do if (lastmark >= 0) { \
485
896k
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
896k
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
896k
        MARK_TRACE("pop keep", (lastmark)); \
488
1.29M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
262M
    do if (lastmark >= 0) { \
491
177M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
177M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
177M
        MARK_TRACE("pop discard", (lastmark)); \
494
262M
    } while (0)
495
496
485M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
196M
#define JUMP_MAX_UNTIL_2     2
499
112M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
112M
#define JUMP_REPEAT          7
504
12.8M
#define JUMP_REPEAT_ONE_1    8
505
228M
#define JUMP_REPEAT_ONE_2    9
506
63.5M
#define JUMP_MIN_REPEAT_ONE  10
507
149M
#define JUMP_BRANCH          11
508
23.6M
#define JUMP_ASSERT          12
509
25.4M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
923M
    ctx->pattern = pattern; \
516
923M
    ctx->ptr = ptr; \
517
923M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
923M
    nextctx->pattern = nextpattern; \
519
923M
    nextctx->toplevel = toplevel_; \
520
923M
    nextctx->jump = jumpvalue; \
521
923M
    nextctx->last_ctx_pos = ctx_pos; \
522
923M
    pattern = nextpattern; \
523
923M
    ctx_pos = alloc_pos; \
524
923M
    ctx = nextctx; \
525
923M
    goto entrance; \
526
923M
    jumplabel: \
527
923M
    pattern = ctx->pattern; \
528
923M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
874M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
49.1M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.38G
    do {                                                           \
553
2.38G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.38G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.38G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.46G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.38G
        do {                               \
588
2.38G
            MAYBE_CHECK_SIGNALS;           \
589
2.38G
            goto *sre_targets[*pattern++]; \
590
2.38G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
485M
{
601
485M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
485M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
485M
    Py_ssize_t ret = 0;
604
485M
    int jump;
605
485M
    unsigned int sigcount = state->sigcount;
606
607
485M
    SRE(match_context)* ctx;
608
485M
    SRE(match_context)* nextctx;
609
485M
    INIT_TRACE(state);
610
611
485M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
485M
    DATA_ALLOC(SRE(match_context), ctx);
614
485M
    ctx->last_ctx_pos = -1;
615
485M
    ctx->jump = JUMP_NONE;
616
485M
    ctx->toplevel = toplevel;
617
485M
    ctx_pos = alloc_pos;
618
619
485M
#if USE_COMPUTED_GOTOS
620
485M
#include "sre_targets.h"
621
485M
#endif
622
623
1.40G
entrance:
624
625
1.40G
    ;  // Fashion statement.
626
1.40G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.40G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
55.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.66M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.66M
                   end - ptr, (size_t) pattern[3]));
634
3.66M
            RETURN_FAILURE;
635
3.66M
        }
636
51.8M
        pattern += pattern[1] + 1;
637
51.8M
    }
638
639
1.40G
#if USE_COMPUTED_GOTOS
640
1.40G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.40G
    {
647
648
1.40G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
579M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
579M
                   ptr, pattern[0]));
653
579M
            {
654
579M
                int i = pattern[0];
655
579M
                if (i & 1)
656
127M
                    state->lastindex = i/2 + 1;
657
579M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
573M
                    int j = state->lastmark + 1;
663
587M
                    while (j < i)
664
14.4M
                        state->mark[j++] = NULL;
665
573M
                    state->lastmark = i;
666
573M
                }
667
579M
                state->mark[i] = ptr;
668
579M
            }
669
579M
            pattern++;
670
579M
            DISPATCH;
671
672
579M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
130M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
130M
                   ptr, *pattern));
677
130M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
59.5M
                RETURN_FAILURE;
679
71.3M
            pattern++;
680
71.3M
            ptr++;
681
71.3M
            DISPATCH;
682
683
71.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
157M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
157M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
157M
            if (ctx->toplevel &&
698
37.5M
                ((state->match_all && ptr != state->end) ||
699
37.5M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
157M
            state->ptr = ptr;
704
157M
            RETURN_SUCCESS;
705
706
103M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
103M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
103M
            if (!SRE(at)(state, ptr, *pattern))
711
85.8M
                RETURN_FAILURE;
712
17.2M
            pattern++;
713
17.2M
            DISPATCH;
714
715
17.2M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
290M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
290M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
290M
            if (ptr >= end ||
749
288M
                !SRE(charset)(state, pattern + 1, *ptr))
750
96.3M
                RETURN_FAILURE;
751
194M
            pattern += pattern[0];
752
194M
            ptr++;
753
194M
            DISPATCH;
754
755
194M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
7.52M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
7.52M
                   pattern, ptr, pattern[0]));
758
7.52M
            if (ptr >= end ||
759
7.52M
                sre_lower_ascii(*ptr) != *pattern)
760
45.5k
                RETURN_FAILURE;
761
7.47M
            pattern++;
762
7.47M
            ptr++;
763
7.47M
            DISPATCH;
764
765
7.47M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
14
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
14
                   pattern, ptr, pattern[0]));
768
14
            if (ptr >= end ||
769
14
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
14
            pattern++;
772
14
            ptr++;
773
14
            DISPATCH;
774
775
14
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
14
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
14
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
14
            if (ptr >= end
828
10
                || !SRE(charset)(state, pattern+1,
829
10
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
8
                RETURN_FAILURE;
831
6
            pattern += pattern[0];
832
6
            ptr++;
833
6
            DISPATCH;
834
835
6
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
72.2M
        TARGET(SRE_OP_JUMP):
845
72.2M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
72.2M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
72.2M
                   ptr, pattern[0]));
850
72.2M
            pattern += pattern[0];
851
72.2M
            DISPATCH;
852
853
109M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
109M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
109M
            LASTMARK_SAVE();
858
109M
            if (state->repeat)
859
58.2M
                MARK_PUSH(ctx->lastmark);
860
271M
            for (; pattern[0]; pattern += pattern[0]) {
861
232M
                if (pattern[1] == SRE_OP_LITERAL &&
862
103M
                    (ptr >= end ||
863
103M
                     (SRE_CODE) *ptr != pattern[2]))
864
57.3M
                    continue;
865
175M
                if (pattern[1] == SRE_OP_IN &&
866
50.3M
                    (ptr >= end ||
867
50.2M
                     !SRE(charset)(state, pattern + 3,
868
50.2M
                                   (SRE_CODE) *ptr)))
869
26.3M
                    continue;
870
149M
                state->ptr = ptr;
871
149M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
149M
                if (ret) {
873
70.0M
                    if (state->repeat)
874
50.0M
                        MARK_POP_DISCARD(ctx->lastmark);
875
70.0M
                    RETURN_ON_ERROR(ret);
876
70.0M
                    RETURN_SUCCESS;
877
70.0M
                }
878
78.9M
                if (state->repeat)
879
14.0k
                    MARK_POP_KEEP(ctx->lastmark);
880
78.9M
                LASTMARK_RESTORE();
881
78.9M
            }
882
38.9M
            if (state->repeat)
883
8.20M
                MARK_POP_DISCARD(ctx->lastmark);
884
38.9M
            RETURN_FAILURE;
885
886
563M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
563M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
563M
                   pattern[1], pattern[2]));
898
899
563M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.12M
                RETURN_FAILURE; /* cannot match */
901
902
562M
            state->ptr = ptr;
903
904
562M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
562M
            RETURN_ON_ERROR(ret);
906
562M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
562M
            ctx->count = ret;
908
562M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
562M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
348M
                RETURN_FAILURE;
917
918
213M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
6.43M
                ptr == state->end &&
920
85.1k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
85.1k
            {
922
                /* tail is empty.  we're finished */
923
85.1k
                state->ptr = ptr;
924
85.1k
                RETURN_SUCCESS;
925
85.1k
            }
926
927
213M
            LASTMARK_SAVE();
928
213M
            if (state->repeat)
929
104M
                MARK_PUSH(ctx->lastmark);
930
931
213M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
20.7M
                ctx->u.chr = pattern[pattern[0]+1];
935
20.7M
                for (;;) {
936
50.4M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
42.5M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
29.7M
                        ptr--;
939
29.7M
                        ctx->count--;
940
29.7M
                    }
941
20.7M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
7.91M
                        break;
943
12.8M
                    state->ptr = ptr;
944
12.8M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
12.8M
                            pattern+pattern[0]);
946
12.8M
                    if (ret) {
947
12.8M
                        if (state->repeat)
948
11.4M
                            MARK_POP_DISCARD(ctx->lastmark);
949
12.8M
                        RETURN_ON_ERROR(ret);
950
12.8M
                        RETURN_SUCCESS;
951
12.8M
                    }
952
725
                    if (state->repeat)
953
717
                        MARK_POP_KEEP(ctx->lastmark);
954
725
                    LASTMARK_RESTORE();
955
956
725
                    ptr--;
957
725
                    ctx->count--;
958
725
                }
959
7.91M
                if (state->repeat)
960
6.75M
                    MARK_POP_DISCARD(ctx->lastmark);
961
192M
            } else {
962
                /* general case */
963
289M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
228M
                    state->ptr = ptr;
965
228M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
228M
                            pattern+pattern[0]);
967
228M
                    if (ret) {
968
131M
                        if (state->repeat)
969
85.3M
                            MARK_POP_DISCARD(ctx->lastmark);
970
131M
                        RETURN_ON_ERROR(ret);
971
131M
                        RETURN_SUCCESS;
972
131M
                    }
973
97.0M
                    if (state->repeat)
974
1.27M
                        MARK_POP_KEEP(ctx->lastmark);
975
97.0M
                    LASTMARK_RESTORE();
976
977
97.0M
                    ptr--;
978
97.0M
                    ctx->count--;
979
97.0M
                }
980
61.3M
                if (state->repeat)
981
1.02M
                    MARK_POP_DISCARD(ctx->lastmark);
982
61.3M
            }
983
69.2M
            RETURN_FAILURE;
984
985
3.72M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
3.72M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
3.72M
                   pattern[1], pattern[2]));
997
998
3.72M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
3.72M
            state->ptr = ptr;
1002
1003
3.72M
            if (pattern[1] == 0)
1004
3.72M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
3.72M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
3.72M
            } else {
1028
                /* general case */
1029
3.72M
                LASTMARK_SAVE();
1030
3.72M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
63.5M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
63.5M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
63.5M
                    state->ptr = ptr;
1036
63.5M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
63.5M
                            pattern+pattern[0]);
1038
63.5M
                    if (ret) {
1039
3.72M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
3.72M
                        RETURN_ON_ERROR(ret);
1042
3.72M
                        RETURN_SUCCESS;
1043
3.72M
                    }
1044
59.8M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
59.8M
                    LASTMARK_RESTORE();
1047
1048
59.8M
                    state->ptr = ptr;
1049
59.8M
                    ret = SRE(count)(state, pattern+3, 1);
1050
59.8M
                    RETURN_ON_ERROR(ret);
1051
59.8M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
59.8M
                    if (ret == 0)
1053
8
                        break;
1054
59.8M
                    assert(ret == 1);
1055
59.8M
                    ptr++;
1056
59.8M
                    ctx->count++;
1057
59.8M
                }
1058
8
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
8
            }
1061
8
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
112M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
112M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
112M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
112M
            ctx->u.rep = repeat_pool_malloc(state);
1127
112M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
112M
            ctx->u.rep->count = -1;
1131
112M
            ctx->u.rep->pattern = pattern;
1132
112M
            ctx->u.rep->prev = state->repeat;
1133
112M
            ctx->u.rep->last_ptr = NULL;
1134
112M
            state->repeat = ctx->u.rep;
1135
1136
112M
            state->ptr = ptr;
1137
112M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
112M
            state->repeat = ctx->u.rep->prev;
1139
112M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
112M
            if (ret) {
1142
36.6M
                RETURN_ON_ERROR(ret);
1143
36.6M
                RETURN_SUCCESS;
1144
36.6M
            }
1145
75.4M
            RETURN_FAILURE;
1146
1147
212M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
212M
            ctx->u.rep = state->repeat;
1155
212M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
212M
            state->ptr = ptr;
1159
1160
212M
            ctx->count = ctx->u.rep->count+1;
1161
1162
212M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
212M
                   ptr, ctx->count));
1164
1165
212M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
212M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
15.6M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
196M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
196M
                ctx->u.rep->count = ctx->count;
1185
196M
                LASTMARK_SAVE();
1186
196M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
196M
                LAST_PTR_PUSH();
1189
196M
                ctx->u.rep->last_ptr = state->ptr;
1190
196M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
196M
                        ctx->u.rep->pattern+3);
1192
196M
                LAST_PTR_POP();
1193
196M
                if (ret) {
1194
99.6M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
99.6M
                    RETURN_ON_ERROR(ret);
1196
99.6M
                    RETURN_SUCCESS;
1197
99.6M
                }
1198
96.6M
                MARK_POP(ctx->lastmark);
1199
96.6M
                LASTMARK_RESTORE();
1200
96.6M
                ctx->u.rep->count = ctx->count-1;
1201
96.6M
                state->ptr = ptr;
1202
96.6M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
112M
            state->repeat = ctx->u.rep->prev;
1207
112M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
112M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
112M
            RETURN_ON_SUCCESS(ret);
1211
75.7M
            state->ptr = ptr;
1212
75.7M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
23.6M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
23.6M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
23.6M
                   ptr, pattern[1]));
1565
23.6M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
23.6M
            state->ptr = ptr - pattern[1];
1568
23.6M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
23.6M
            RETURN_ON_FAILURE(ret);
1570
17.9M
            pattern += pattern[0];
1571
17.9M
            DISPATCH;
1572
1573
25.4M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
25.4M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
25.4M
                   ptr, pattern[1]));
1578
25.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
25.4M
                state->ptr = ptr - pattern[1];
1580
25.4M
                LASTMARK_SAVE();
1581
25.4M
                if (state->repeat)
1582
25.4M
                    MARK_PUSH(ctx->lastmark);
1583
1584
50.9M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
50.9M
                if (ret) {
1586
8.35k
                    if (state->repeat)
1587
8.35k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
8.35k
                    RETURN_ON_ERROR(ret);
1589
8.35k
                    RETURN_FAILURE;
1590
8.35k
                }
1591
25.4M
                if (state->repeat)
1592
25.4M
                    MARK_POP(ctx->lastmark);
1593
25.4M
                LASTMARK_RESTORE();
1594
25.4M
            }
1595
25.4M
            pattern += pattern[0];
1596
25.4M
            DISPATCH;
1597
1598
25.4M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.40G
exit:
1620
1.40G
    ctx_pos = ctx->last_ctx_pos;
1621
1.40G
    jump = ctx->jump;
1622
1.40G
    DATA_POP_DISCARD(ctx);
1623
1.40G
    if (ctx_pos == -1) {
1624
485M
        state->sigcount = sigcount;
1625
485M
        return ret;
1626
485M
    }
1627
923M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
923M
    switch (jump) {
1630
196M
        case JUMP_MAX_UNTIL_2:
1631
196M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
196M
            goto jump_max_until_2;
1633
112M
        case JUMP_MAX_UNTIL_3:
1634
112M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
112M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
149M
        case JUMP_BRANCH:
1643
149M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
149M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
112M
        case JUMP_REPEAT:
1658
112M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
112M
            goto jump_repeat;
1660
12.8M
        case JUMP_REPEAT_ONE_1:
1661
12.8M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
12.8M
            goto jump_repeat_one_1;
1663
228M
        case JUMP_REPEAT_ONE_2:
1664
228M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
228M
            goto jump_repeat_one_2;
1666
63.5M
        case JUMP_MIN_REPEAT_ONE:
1667
63.5M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
63.5M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
23.6M
        case JUMP_ASSERT:
1673
23.6M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
23.6M
            goto jump_assert;
1675
25.4M
        case JUMP_ASSERT_NOT:
1676
25.4M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
25.4M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
923M
    }
1683
1684
0
    return ret; /* should never get here */
1685
923M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
199M
{
601
199M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
199M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
199M
    Py_ssize_t ret = 0;
604
199M
    int jump;
605
199M
    unsigned int sigcount = state->sigcount;
606
607
199M
    SRE(match_context)* ctx;
608
199M
    SRE(match_context)* nextctx;
609
199M
    INIT_TRACE(state);
610
611
199M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
199M
    DATA_ALLOC(SRE(match_context), ctx);
614
199M
    ctx->last_ctx_pos = -1;
615
199M
    ctx->jump = JUMP_NONE;
616
199M
    ctx->toplevel = toplevel;
617
199M
    ctx_pos = alloc_pos;
618
619
199M
#if USE_COMPUTED_GOTOS
620
199M
#include "sre_targets.h"
621
199M
#endif
622
623
488M
entrance:
624
625
488M
    ;  // Fashion statement.
626
488M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
488M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
30.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.55M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.55M
                   end - ptr, (size_t) pattern[3]));
634
3.55M
            RETURN_FAILURE;
635
3.55M
        }
636
27.2M
        pattern += pattern[1] + 1;
637
27.2M
    }
638
639
484M
#if USE_COMPUTED_GOTOS
640
484M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
484M
    {
647
648
484M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
207M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
207M
                   ptr, pattern[0]));
653
207M
            {
654
207M
                int i = pattern[0];
655
207M
                if (i & 1)
656
41.1M
                    state->lastindex = i/2 + 1;
657
207M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
204M
                    int j = state->lastmark + 1;
663
212M
                    while (j < i)
664
8.72M
                        state->mark[j++] = NULL;
665
204M
                    state->lastmark = i;
666
204M
                }
667
207M
                state->mark[i] = ptr;
668
207M
            }
669
207M
            pattern++;
670
207M
            DISPATCH;
671
672
207M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
67.6M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
67.6M
                   ptr, *pattern));
677
67.6M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
29.1M
                RETURN_FAILURE;
679
38.4M
            pattern++;
680
38.4M
            ptr++;
681
38.4M
            DISPATCH;
682
683
38.4M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
63.7M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
63.7M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
63.7M
            if (ctx->toplevel &&
698
19.9M
                ((state->match_all && ptr != state->end) ||
699
19.9M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
63.7M
            state->ptr = ptr;
704
63.7M
            RETURN_SUCCESS;
705
706
32.6M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
32.6M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
32.6M
            if (!SRE(at)(state, ptr, *pattern))
711
18.1M
                RETURN_FAILURE;
712
14.5M
            pattern++;
713
14.5M
            DISPATCH;
714
715
14.5M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
82.7M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
82.7M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
82.7M
            if (ptr >= end ||
749
82.4M
                !SRE(charset)(state, pattern + 1, *ptr))
750
16.2M
                RETURN_FAILURE;
751
66.4M
            pattern += pattern[0];
752
66.4M
            ptr++;
753
66.4M
            DISPATCH;
754
755
66.4M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
392k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
392k
                   pattern, ptr, pattern[0]));
758
392k
            if (ptr >= end ||
759
392k
                sre_lower_ascii(*ptr) != *pattern)
760
4.71k
                RETURN_FAILURE;
761
387k
            pattern++;
762
387k
            ptr++;
763
387k
            DISPATCH;
764
765
387k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
14
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
14
                   pattern, ptr, pattern[0]));
768
14
            if (ptr >= end ||
769
14
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
14
            pattern++;
772
14
            ptr++;
773
14
            DISPATCH;
774
775
14
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
14
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
14
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
14
            if (ptr >= end
828
10
                || !SRE(charset)(state, pattern+1,
829
10
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
8
                RETURN_FAILURE;
831
6
            pattern += pattern[0];
832
6
            ptr++;
833
6
            DISPATCH;
834
835
6
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
28.9M
        TARGET(SRE_OP_JUMP):
845
28.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
28.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
28.9M
                   ptr, pattern[0]));
850
28.9M
            pattern += pattern[0];
851
28.9M
            DISPATCH;
852
853
54.9M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
54.9M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
54.9M
            LASTMARK_SAVE();
858
54.9M
            if (state->repeat)
859
13.1M
                MARK_PUSH(ctx->lastmark);
860
157M
            for (; pattern[0]; pattern += pattern[0]) {
861
130M
                if (pattern[1] == SRE_OP_LITERAL &&
862
55.0M
                    (ptr >= end ||
863
54.8M
                     (SRE_CODE) *ptr != pattern[2]))
864
25.0M
                    continue;
865
105M
                if (pattern[1] == SRE_OP_IN &&
866
13.1M
                    (ptr >= end ||
867
13.0M
                     !SRE(charset)(state, pattern + 3,
868
13.0M
                                   (SRE_CODE) *ptr)))
869
6.44M
                    continue;
870
98.6M
                state->ptr = ptr;
871
98.6M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
98.6M
                if (ret) {
873
27.7M
                    if (state->repeat)
874
12.9M
                        MARK_POP_DISCARD(ctx->lastmark);
875
27.7M
                    RETURN_ON_ERROR(ret);
876
27.7M
                    RETURN_SUCCESS;
877
27.7M
                }
878
70.9M
                if (state->repeat)
879
6.02k
                    MARK_POP_KEEP(ctx->lastmark);
880
70.9M
                LASTMARK_RESTORE();
881
70.9M
            }
882
27.1M
            if (state->repeat)
883
274k
                MARK_POP_DISCARD(ctx->lastmark);
884
27.1M
            RETURN_FAILURE;
885
886
218M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
218M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
218M
                   pattern[1], pattern[2]));
898
899
218M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
948k
                RETURN_FAILURE; /* cannot match */
901
902
217M
            state->ptr = ptr;
903
904
217M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
217M
            RETURN_ON_ERROR(ret);
906
217M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
217M
            ctx->count = ret;
908
217M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
217M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
153M
                RETURN_FAILURE;
917
918
64.2M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
624k
                ptr == state->end &&
920
63.8k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
63.8k
            {
922
                /* tail is empty.  we're finished */
923
63.8k
                state->ptr = ptr;
924
63.8k
                RETURN_SUCCESS;
925
63.8k
            }
926
927
64.1M
            LASTMARK_SAVE();
928
64.1M
            if (state->repeat)
929
37.2M
                MARK_PUSH(ctx->lastmark);
930
931
64.1M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.45M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.45M
                for (;;) {
936
15.0M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
13.3M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
9.60M
                        ptr--;
939
9.60M
                        ctx->count--;
940
9.60M
                    }
941
5.45M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.69M
                        break;
943
3.76M
                    state->ptr = ptr;
944
3.76M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.76M
                            pattern+pattern[0]);
946
3.76M
                    if (ret) {
947
3.76M
                        if (state->repeat)
948
2.53M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.76M
                        RETURN_ON_ERROR(ret);
950
3.76M
                        RETURN_SUCCESS;
951
3.76M
                    }
952
214
                    if (state->repeat)
953
206
                        MARK_POP_KEEP(ctx->lastmark);
954
214
                    LASTMARK_RESTORE();
955
956
214
                    ptr--;
957
214
                    ctx->count--;
958
214
                }
959
1.69M
                if (state->repeat)
960
538k
                    MARK_POP_DISCARD(ctx->lastmark);
961
58.7M
            } else {
962
                /* general case */
963
78.9M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
67.5M
                    state->ptr = ptr;
965
67.5M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
67.5M
                            pattern+pattern[0]);
967
67.5M
                    if (ret) {
968
47.3M
                        if (state->repeat)
969
33.3M
                            MARK_POP_DISCARD(ctx->lastmark);
970
47.3M
                        RETURN_ON_ERROR(ret);
971
47.3M
                        RETURN_SUCCESS;
972
47.3M
                    }
973
20.1M
                    if (state->repeat)
974
979k
                        MARK_POP_KEEP(ctx->lastmark);
975
20.1M
                    LASTMARK_RESTORE();
976
977
20.1M
                    ptr--;
978
20.1M
                    ctx->count--;
979
20.1M
                }
980
11.3M
                if (state->repeat)
981
834k
                    MARK_POP_DISCARD(ctx->lastmark);
982
11.3M
            }
983
13.0M
            RETURN_FAILURE;
984
985
2.44M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
2.44M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
2.44M
                   pattern[1], pattern[2]));
997
998
2.44M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
2.44M
            state->ptr = ptr;
1002
1003
2.44M
            if (pattern[1] == 0)
1004
2.44M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
2.44M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
2.44M
            } else {
1028
                /* general case */
1029
2.44M
                LASTMARK_SAVE();
1030
2.44M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
12.5M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
12.5M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
12.5M
                    state->ptr = ptr;
1036
12.5M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
12.5M
                            pattern+pattern[0]);
1038
12.5M
                    if (ret) {
1039
2.44M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
2.44M
                        RETURN_ON_ERROR(ret);
1042
2.44M
                        RETURN_SUCCESS;
1043
2.44M
                    }
1044
10.1M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
10.1M
                    LASTMARK_RESTORE();
1047
1048
10.1M
                    state->ptr = ptr;
1049
10.1M
                    ret = SRE(count)(state, pattern+3, 1);
1050
10.1M
                    RETURN_ON_ERROR(ret);
1051
10.1M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
10.1M
                    if (ret == 0)
1053
8
                        break;
1054
10.1M
                    assert(ret == 1);
1055
10.1M
                    ptr++;
1056
10.1M
                    ctx->count++;
1057
10.1M
                }
1058
8
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
8
            }
1061
8
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
22.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
22.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
22.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
22.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
22.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
22.8M
            ctx->u.rep->count = -1;
1131
22.8M
            ctx->u.rep->pattern = pattern;
1132
22.8M
            ctx->u.rep->prev = state->repeat;
1133
22.8M
            ctx->u.rep->last_ptr = NULL;
1134
22.8M
            state->repeat = ctx->u.rep;
1135
1136
22.8M
            state->ptr = ptr;
1137
22.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
22.8M
            state->repeat = ctx->u.rep->prev;
1139
22.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
22.8M
            if (ret) {
1142
9.86M
                RETURN_ON_ERROR(ret);
1143
9.86M
                RETURN_SUCCESS;
1144
9.86M
            }
1145
13.0M
            RETURN_FAILURE;
1146
1147
58.3M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
58.3M
            ctx->u.rep = state->repeat;
1155
58.3M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
58.3M
            state->ptr = ptr;
1159
1160
58.3M
            ctx->count = ctx->u.rep->count+1;
1161
1162
58.3M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
58.3M
                   ptr, ctx->count));
1164
1165
58.3M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
58.3M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
7.40M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
50.9M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
50.9M
                ctx->u.rep->count = ctx->count;
1185
50.9M
                LASTMARK_SAVE();
1186
50.9M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
50.9M
                LAST_PTR_PUSH();
1189
50.9M
                ctx->u.rep->last_ptr = state->ptr;
1190
50.9M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
50.9M
                        ctx->u.rep->pattern+3);
1192
50.9M
                LAST_PTR_POP();
1193
50.9M
                if (ret) {
1194
35.2M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
35.2M
                    RETURN_ON_ERROR(ret);
1196
35.2M
                    RETURN_SUCCESS;
1197
35.2M
                }
1198
15.6M
                MARK_POP(ctx->lastmark);
1199
15.6M
                LASTMARK_RESTORE();
1200
15.6M
                ctx->u.rep->count = ctx->count-1;
1201
15.6M
                state->ptr = ptr;
1202
15.6M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
23.0M
            state->repeat = ctx->u.rep->prev;
1207
23.0M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
23.0M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
23.0M
            RETURN_ON_SUCCESS(ret);
1211
13.2M
            state->ptr = ptr;
1212
13.2M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
2.78M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
2.78M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
2.78M
                   ptr, pattern[1]));
1565
2.78M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
2.78M
            state->ptr = ptr - pattern[1];
1568
2.78M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
2.78M
            RETURN_ON_FAILURE(ret);
1570
2.65M
            pattern += pattern[0];
1571
2.65M
            DISPATCH;
1572
1573
6.33M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
6.33M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
6.33M
                   ptr, pattern[1]));
1578
6.33M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
6.33M
                state->ptr = ptr - pattern[1];
1580
6.33M
                LASTMARK_SAVE();
1581
6.33M
                if (state->repeat)
1582
6.33M
                    MARK_PUSH(ctx->lastmark);
1583
1584
12.6M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
12.6M
                if (ret) {
1586
930
                    if (state->repeat)
1587
930
                        MARK_POP_DISCARD(ctx->lastmark);
1588
930
                    RETURN_ON_ERROR(ret);
1589
930
                    RETURN_FAILURE;
1590
930
                }
1591
6.33M
                if (state->repeat)
1592
6.33M
                    MARK_POP(ctx->lastmark);
1593
6.33M
                LASTMARK_RESTORE();
1594
6.33M
            }
1595
6.33M
            pattern += pattern[0];
1596
6.33M
            DISPATCH;
1597
1598
6.33M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
488M
exit:
1620
488M
    ctx_pos = ctx->last_ctx_pos;
1621
488M
    jump = ctx->jump;
1622
488M
    DATA_POP_DISCARD(ctx);
1623
488M
    if (ctx_pos == -1) {
1624
199M
        state->sigcount = sigcount;
1625
199M
        return ret;
1626
199M
    }
1627
288M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
288M
    switch (jump) {
1630
50.9M
        case JUMP_MAX_UNTIL_2:
1631
50.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
50.9M
            goto jump_max_until_2;
1633
23.0M
        case JUMP_MAX_UNTIL_3:
1634
23.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
23.0M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
98.6M
        case JUMP_BRANCH:
1643
98.6M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
98.6M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
22.8M
        case JUMP_REPEAT:
1658
22.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
22.8M
            goto jump_repeat;
1660
3.76M
        case JUMP_REPEAT_ONE_1:
1661
3.76M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.76M
            goto jump_repeat_one_1;
1663
67.5M
        case JUMP_REPEAT_ONE_2:
1664
67.5M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
67.5M
            goto jump_repeat_one_2;
1666
12.5M
        case JUMP_MIN_REPEAT_ONE:
1667
12.5M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
12.5M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
2.78M
        case JUMP_ASSERT:
1673
2.78M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
2.78M
            goto jump_assert;
1675
6.33M
        case JUMP_ASSERT_NOT:
1676
6.33M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
6.33M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
288M
    }
1683
1684
0
    return ret; /* should never get here */
1685
288M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
221M
{
601
221M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
221M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
221M
    Py_ssize_t ret = 0;
604
221M
    int jump;
605
221M
    unsigned int sigcount = state->sigcount;
606
607
221M
    SRE(match_context)* ctx;
608
221M
    SRE(match_context)* nextctx;
609
221M
    INIT_TRACE(state);
610
611
221M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
221M
    DATA_ALLOC(SRE(match_context), ctx);
614
221M
    ctx->last_ctx_pos = -1;
615
221M
    ctx->jump = JUMP_NONE;
616
221M
    ctx->toplevel = toplevel;
617
221M
    ctx_pos = alloc_pos;
618
619
221M
#if USE_COMPUTED_GOTOS
620
221M
#include "sre_targets.h"
621
221M
#endif
622
623
639M
entrance:
624
625
639M
    ;  // Fashion statement.
626
639M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
639M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
13.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
110k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
110k
                   end - ptr, (size_t) pattern[3]));
634
110k
            RETURN_FAILURE;
635
110k
        }
636
13.6M
        pattern += pattern[1] + 1;
637
13.6M
    }
638
639
639M
#if USE_COMPUTED_GOTOS
640
639M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
639M
    {
647
648
639M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
285M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
285M
                   ptr, pattern[0]));
653
285M
            {
654
285M
                int i = pattern[0];
655
285M
                if (i & 1)
656
62.9M
                    state->lastindex = i/2 + 1;
657
285M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
284M
                    int j = state->lastmark + 1;
663
288M
                    while (j < i)
664
3.68M
                        state->mark[j++] = NULL;
665
284M
                    state->lastmark = i;
666
284M
                }
667
285M
                state->mark[i] = ptr;
668
285M
            }
669
285M
            pattern++;
670
285M
            DISPATCH;
671
672
285M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
32.4M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
32.4M
                   ptr, *pattern));
677
32.4M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
17.1M
                RETURN_FAILURE;
679
15.3M
            pattern++;
680
15.3M
            ptr++;
681
15.3M
            DISPATCH;
682
683
15.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
68.5M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
68.5M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
68.5M
            if (ctx->toplevel &&
698
9.57M
                ((state->match_all && ptr != state->end) ||
699
9.57M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
68.5M
            state->ptr = ptr;
704
68.5M
            RETURN_SUCCESS;
705
706
59.2M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
59.2M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
59.2M
            if (!SRE(at)(state, ptr, *pattern))
711
56.5M
                RETURN_FAILURE;
712
2.70M
            pattern++;
713
2.70M
            DISPATCH;
714
715
2.70M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
144M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
144M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
144M
            if (ptr >= end ||
749
143M
                !SRE(charset)(state, pattern + 1, *ptr))
750
63.7M
                RETURN_FAILURE;
751
80.6M
            pattern += pattern[0];
752
80.6M
            ptr++;
753
80.6M
            DISPATCH;
754
755
80.6M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
3.72M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
3.72M
                   pattern, ptr, pattern[0]));
758
3.72M
            if (ptr >= end ||
759
3.72M
                sre_lower_ascii(*ptr) != *pattern)
760
20.7k
                RETURN_FAILURE;
761
3.70M
            pattern++;
762
3.70M
            ptr++;
763
3.70M
            DISPATCH;
764
765
3.70M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
19.8M
        TARGET(SRE_OP_JUMP):
845
19.8M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
19.8M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
19.8M
                   ptr, pattern[0]));
850
19.8M
            pattern += pattern[0];
851
19.8M
            DISPATCH;
852
853
25.6M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
25.6M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
25.6M
            LASTMARK_SAVE();
858
25.6M
            if (state->repeat)
859
19.0M
                MARK_PUSH(ctx->lastmark);
860
55.1M
            for (; pattern[0]; pattern += pattern[0]) {
861
48.9M
                if (pattern[1] == SRE_OP_LITERAL &&
862
21.5M
                    (ptr >= end ||
863
21.5M
                     (SRE_CODE) *ptr != pattern[2]))
864
13.6M
                    continue;
865
35.2M
                if (pattern[1] == SRE_OP_IN &&
866
16.2M
                    (ptr >= end ||
867
16.2M
                     !SRE(charset)(state, pattern + 3,
868
16.2M
                                   (SRE_CODE) *ptr)))
869
8.65M
                    continue;
870
26.6M
                state->ptr = ptr;
871
26.6M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
26.6M
                if (ret) {
873
19.4M
                    if (state->repeat)
874
16.0M
                        MARK_POP_DISCARD(ctx->lastmark);
875
19.4M
                    RETURN_ON_ERROR(ret);
876
19.4M
                    RETURN_SUCCESS;
877
19.4M
                }
878
7.18M
                if (state->repeat)
879
2.36k
                    MARK_POP_KEEP(ctx->lastmark);
880
7.18M
                LASTMARK_RESTORE();
881
7.18M
            }
882
6.17M
            if (state->repeat)
883
2.99M
                MARK_POP_DISCARD(ctx->lastmark);
884
6.17M
            RETURN_FAILURE;
885
886
243M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
243M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
243M
                   pattern[1], pattern[2]));
898
899
243M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
159k
                RETURN_FAILURE; /* cannot match */
901
902
243M
            state->ptr = ptr;
903
904
243M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
243M
            RETURN_ON_ERROR(ret);
906
243M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
243M
            ctx->count = ret;
908
243M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
243M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
153M
                RETURN_FAILURE;
917
918
89.9M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
3.85M
                ptr == state->end &&
920
17.8k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
17.8k
            {
922
                /* tail is empty.  we're finished */
923
17.8k
                state->ptr = ptr;
924
17.8k
                RETURN_SUCCESS;
925
17.8k
            }
926
927
89.9M
            LASTMARK_SAVE();
928
89.9M
            if (state->repeat)
929
26.7M
                MARK_PUSH(ctx->lastmark);
930
931
89.9M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.09M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.09M
                for (;;) {
936
9.65M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
7.87M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
4.56M
                        ptr--;
939
4.56M
                        ctx->count--;
940
4.56M
                    }
941
5.09M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.77M
                        break;
943
3.31M
                    state->ptr = ptr;
944
3.31M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.31M
                            pattern+pattern[0]);
946
3.31M
                    if (ret) {
947
3.31M
                        if (state->repeat)
948
3.27M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.31M
                        RETURN_ON_ERROR(ret);
950
3.31M
                        RETURN_SUCCESS;
951
3.31M
                    }
952
230
                    if (state->repeat)
953
230
                        MARK_POP_KEEP(ctx->lastmark);
954
230
                    LASTMARK_RESTORE();
955
956
230
                    ptr--;
957
230
                    ctx->count--;
958
230
                }
959
1.77M
                if (state->repeat)
960
1.76M
                    MARK_POP_DISCARD(ctx->lastmark);
961
84.8M
            } else {
962
                /* general case */
963
147M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
102M
                    state->ptr = ptr;
965
102M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
102M
                            pattern+pattern[0]);
967
102M
                    if (ret) {
968
40.0M
                        if (state->repeat)
969
21.6M
                            MARK_POP_DISCARD(ctx->lastmark);
970
40.0M
                        RETURN_ON_ERROR(ret);
971
40.0M
                        RETURN_SUCCESS;
972
40.0M
                    }
973
62.4M
                    if (state->repeat)
974
202k
                        MARK_POP_KEEP(ctx->lastmark);
975
62.4M
                    LASTMARK_RESTORE();
976
977
62.4M
                    ptr--;
978
62.4M
                    ctx->count--;
979
62.4M
                }
980
44.8M
                if (state->repeat)
981
130k
                    MARK_POP_DISCARD(ctx->lastmark);
982
44.8M
            }
983
46.6M
            RETURN_FAILURE;
984
985
1.26M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
1.26M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
1.26M
                   pattern[1], pattern[2]));
997
998
1.26M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
1.26M
            state->ptr = ptr;
1002
1003
1.26M
            if (pattern[1] == 0)
1004
1.26M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
1.26M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
1.26M
            } else {
1028
                /* general case */
1029
1.26M
                LASTMARK_SAVE();
1030
1.26M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
45.9M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
45.9M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
45.9M
                    state->ptr = ptr;
1036
45.9M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
45.9M
                            pattern+pattern[0]);
1038
45.9M
                    if (ret) {
1039
1.26M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
1.26M
                        RETURN_ON_ERROR(ret);
1042
1.26M
                        RETURN_SUCCESS;
1043
1.26M
                    }
1044
44.6M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
44.6M
                    LASTMARK_RESTORE();
1047
1048
44.6M
                    state->ptr = ptr;
1049
44.6M
                    ret = SRE(count)(state, pattern+3, 1);
1050
44.6M
                    RETURN_ON_ERROR(ret);
1051
44.6M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
44.6M
                    if (ret == 0)
1053
0
                        break;
1054
44.6M
                    assert(ret == 1);
1055
44.6M
                    ptr++;
1056
44.6M
                    ctx->count++;
1057
44.6M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
65.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
65.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
65.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
65.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
65.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
65.8M
            ctx->u.rep->count = -1;
1131
65.8M
            ctx->u.rep->pattern = pattern;
1132
65.8M
            ctx->u.rep->prev = state->repeat;
1133
65.8M
            ctx->u.rep->last_ptr = NULL;
1134
65.8M
            state->repeat = ctx->u.rep;
1135
1136
65.8M
            state->ptr = ptr;
1137
65.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
65.8M
            state->repeat = ctx->u.rep->prev;
1139
65.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
65.8M
            if (ret) {
1142
10.8M
                RETURN_ON_ERROR(ret);
1143
10.8M
                RETURN_SUCCESS;
1144
10.8M
            }
1145
55.0M
            RETURN_FAILURE;
1146
1147
94.4M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
94.4M
            ctx->u.rep = state->repeat;
1155
94.4M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
94.4M
            state->ptr = ptr;
1159
1160
94.4M
            ctx->count = ctx->u.rep->count+1;
1161
1162
94.4M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
94.4M
                   ptr, ctx->count));
1164
1165
94.4M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
94.4M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.17M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
91.2M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
91.2M
                ctx->u.rep->count = ctx->count;
1185
91.2M
                LASTMARK_SAVE();
1186
91.2M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
91.2M
                LAST_PTR_PUSH();
1189
91.2M
                ctx->u.rep->last_ptr = state->ptr;
1190
91.2M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
91.2M
                        ctx->u.rep->pattern+3);
1192
91.2M
                LAST_PTR_POP();
1193
91.2M
                if (ret) {
1194
28.4M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
28.4M
                    RETURN_ON_ERROR(ret);
1196
28.4M
                    RETURN_SUCCESS;
1197
28.4M
                }
1198
62.7M
                MARK_POP(ctx->lastmark);
1199
62.7M
                LASTMARK_RESTORE();
1200
62.7M
                ctx->u.rep->count = ctx->count-1;
1201
62.7M
                state->ptr = ptr;
1202
62.7M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
65.9M
            state->repeat = ctx->u.rep->prev;
1207
65.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
65.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
65.9M
            RETURN_ON_SUCCESS(ret);
1211
55.0M
            state->ptr = ptr;
1212
55.0M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
8.31M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
8.31M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
8.31M
                   ptr, pattern[1]));
1565
8.31M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
8.31M
            state->ptr = ptr - pattern[1];
1568
8.31M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
8.31M
            RETURN_ON_FAILURE(ret);
1570
5.27M
            pattern += pattern[0];
1571
5.27M
            DISPATCH;
1572
1573
8.32M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
8.32M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
8.32M
                   ptr, pattern[1]));
1578
8.32M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
8.32M
                state->ptr = ptr - pattern[1];
1580
8.32M
                LASTMARK_SAVE();
1581
8.32M
                if (state->repeat)
1582
8.32M
                    MARK_PUSH(ctx->lastmark);
1583
1584
16.6M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
16.6M
                if (ret) {
1586
2.08k
                    if (state->repeat)
1587
2.08k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.08k
                    RETURN_ON_ERROR(ret);
1589
2.08k
                    RETURN_FAILURE;
1590
2.08k
                }
1591
8.32M
                if (state->repeat)
1592
8.32M
                    MARK_POP(ctx->lastmark);
1593
8.32M
                LASTMARK_RESTORE();
1594
8.32M
            }
1595
8.32M
            pattern += pattern[0];
1596
8.32M
            DISPATCH;
1597
1598
8.32M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
639M
exit:
1620
639M
    ctx_pos = ctx->last_ctx_pos;
1621
639M
    jump = ctx->jump;
1622
639M
    DATA_POP_DISCARD(ctx);
1623
639M
    if (ctx_pos == -1) {
1624
221M
        state->sigcount = sigcount;
1625
221M
        return ret;
1626
221M
    }
1627
418M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
418M
    switch (jump) {
1630
91.2M
        case JUMP_MAX_UNTIL_2:
1631
91.2M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
91.2M
            goto jump_max_until_2;
1633
65.9M
        case JUMP_MAX_UNTIL_3:
1634
65.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
65.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
26.6M
        case JUMP_BRANCH:
1643
26.6M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
26.6M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
65.8M
        case JUMP_REPEAT:
1658
65.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
65.8M
            goto jump_repeat;
1660
3.31M
        case JUMP_REPEAT_ONE_1:
1661
3.31M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.31M
            goto jump_repeat_one_1;
1663
102M
        case JUMP_REPEAT_ONE_2:
1664
102M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
102M
            goto jump_repeat_one_2;
1666
45.9M
        case JUMP_MIN_REPEAT_ONE:
1667
45.9M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
45.9M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
8.31M
        case JUMP_ASSERT:
1673
8.31M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
8.31M
            goto jump_assert;
1675
8.32M
        case JUMP_ASSERT_NOT:
1676
8.32M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
8.32M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
418M
    }
1683
1684
0
    return ret; /* should never get here */
1685
418M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
63.2M
{
601
63.2M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
63.2M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
63.2M
    Py_ssize_t ret = 0;
604
63.2M
    int jump;
605
63.2M
    unsigned int sigcount = state->sigcount;
606
607
63.2M
    SRE(match_context)* ctx;
608
63.2M
    SRE(match_context)* nextctx;
609
63.2M
    INIT_TRACE(state);
610
611
63.2M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
63.2M
    DATA_ALLOC(SRE(match_context), ctx);
614
63.2M
    ctx->last_ctx_pos = -1;
615
63.2M
    ctx->jump = JUMP_NONE;
616
63.2M
    ctx->toplevel = toplevel;
617
63.2M
    ctx_pos = alloc_pos;
618
619
63.2M
#if USE_COMPUTED_GOTOS
620
63.2M
#include "sre_targets.h"
621
63.2M
#endif
622
623
280M
entrance:
624
625
280M
    ;  // Fashion statement.
626
280M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
280M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
10.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.78k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.78k
                   end - ptr, (size_t) pattern[3]));
634
3.78k
            RETURN_FAILURE;
635
3.78k
        }
636
10.8M
        pattern += pattern[1] + 1;
637
10.8M
    }
638
639
280M
#if USE_COMPUTED_GOTOS
640
280M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
280M
    {
647
648
280M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
86.2M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
86.2M
                   ptr, pattern[0]));
653
86.2M
            {
654
86.2M
                int i = pattern[0];
655
86.2M
                if (i & 1)
656
23.4M
                    state->lastindex = i/2 + 1;
657
86.2M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
84.1M
                    int j = state->lastmark + 1;
663
86.2M
                    while (j < i)
664
2.02M
                        state->mark[j++] = NULL;
665
84.1M
                    state->lastmark = i;
666
84.1M
                }
667
86.2M
                state->mark[i] = ptr;
668
86.2M
            }
669
86.2M
            pattern++;
670
86.2M
            DISPATCH;
671
672
86.2M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
30.8M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
30.8M
                   ptr, *pattern));
677
30.8M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
13.2M
                RETURN_FAILURE;
679
17.5M
            pattern++;
680
17.5M
            ptr++;
681
17.5M
            DISPATCH;
682
683
17.5M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
25.1M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
25.1M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
25.1M
            if (ctx->toplevel &&
698
8.03M
                ((state->match_all && ptr != state->end) ||
699
8.03M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
25.1M
            state->ptr = ptr;
704
25.1M
            RETURN_SUCCESS;
705
706
11.1M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
11.1M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
11.1M
            if (!SRE(at)(state, ptr, *pattern))
711
11.1M
                RETURN_FAILURE;
712
28.5k
            pattern++;
713
28.5k
            DISPATCH;
714
715
28.5k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
63.2M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
63.2M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
63.2M
            if (ptr >= end ||
749
63.2M
                !SRE(charset)(state, pattern + 1, *ptr))
750
16.3M
                RETURN_FAILURE;
751
46.8M
            pattern += pattern[0];
752
46.8M
            ptr++;
753
46.8M
            DISPATCH;
754
755
46.8M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
3.40M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
3.40M
                   pattern, ptr, pattern[0]));
758
3.40M
            if (ptr >= end ||
759
3.40M
                sre_lower_ascii(*ptr) != *pattern)
760
20.0k
                RETURN_FAILURE;
761
3.38M
            pattern++;
762
3.38M
            ptr++;
763
3.38M
            DISPATCH;
764
765
3.38M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
23.4M
        TARGET(SRE_OP_JUMP):
845
23.4M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
23.4M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
23.4M
                   ptr, pattern[0]));
850
23.4M
            pattern += pattern[0];
851
23.4M
            DISPATCH;
852
853
28.5M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
28.5M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
28.5M
            LASTMARK_SAVE();
858
28.5M
            if (state->repeat)
859
25.9M
                MARK_PUSH(ctx->lastmark);
860
59.2M
            for (; pattern[0]; pattern += pattern[0]) {
861
53.7M
                if (pattern[1] == SRE_OP_LITERAL &&
862
26.6M
                    (ptr >= end ||
863
26.6M
                     (SRE_CODE) *ptr != pattern[2]))
864
18.6M
                    continue;
865
35.0M
                if (pattern[1] == SRE_OP_IN &&
866
20.9M
                    (ptr >= end ||
867
20.9M
                     !SRE(charset)(state, pattern + 3,
868
20.9M
                                   (SRE_CODE) *ptr)))
869
11.2M
                    continue;
870
23.7M
                state->ptr = ptr;
871
23.7M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
23.7M
                if (ret) {
873
22.9M
                    if (state->repeat)
874
21.0M
                        MARK_POP_DISCARD(ctx->lastmark);
875
22.9M
                    RETURN_ON_ERROR(ret);
876
22.9M
                    RETURN_SUCCESS;
877
22.9M
                }
878
842k
                if (state->repeat)
879
5.65k
                    MARK_POP_KEEP(ctx->lastmark);
880
842k
                LASTMARK_RESTORE();
881
842k
            }
882
5.56M
            if (state->repeat)
883
4.93M
                MARK_POP_DISCARD(ctx->lastmark);
884
5.56M
            RETURN_FAILURE;
885
886
100M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
100M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
100M
                   pattern[1], pattern[2]));
898
899
100M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
12.8k
                RETURN_FAILURE; /* cannot match */
901
902
100M
            state->ptr = ptr;
903
904
100M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
100M
            RETURN_ON_ERROR(ret);
906
100M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
100M
            ctx->count = ret;
908
100M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
100M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
41.4M
                RETURN_FAILURE;
917
918
59.1M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.95M
                ptr == state->end &&
920
3.37k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.37k
            {
922
                /* tail is empty.  we're finished */
923
3.37k
                state->ptr = ptr;
924
3.37k
                RETURN_SUCCESS;
925
3.37k
            }
926
927
59.1M
            LASTMARK_SAVE();
928
59.1M
            if (state->repeat)
929
40.5M
                MARK_PUSH(ctx->lastmark);
930
931
59.1M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
10.2M
                ctx->u.chr = pattern[pattern[0]+1];
935
10.2M
                for (;;) {
936
25.7M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
21.2M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
15.5M
                        ptr--;
939
15.5M
                        ctx->count--;
940
15.5M
                    }
941
10.2M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
4.44M
                        break;
943
5.75M
                    state->ptr = ptr;
944
5.75M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
5.75M
                            pattern+pattern[0]);
946
5.75M
                    if (ret) {
947
5.75M
                        if (state->repeat)
948
5.63M
                            MARK_POP_DISCARD(ctx->lastmark);
949
5.75M
                        RETURN_ON_ERROR(ret);
950
5.75M
                        RETURN_SUCCESS;
951
5.75M
                    }
952
281
                    if (state->repeat)
953
281
                        MARK_POP_KEEP(ctx->lastmark);
954
281
                    LASTMARK_RESTORE();
955
956
281
                    ptr--;
957
281
                    ctx->count--;
958
281
                }
959
4.44M
                if (state->repeat)
960
4.44M
                    MARK_POP_DISCARD(ctx->lastmark);
961
48.9M
            } else {
962
                /* general case */
963
63.2M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
58.0M
                    state->ptr = ptr;
965
58.0M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
58.0M
                            pattern+pattern[0]);
967
58.0M
                    if (ret) {
968
43.7M
                        if (state->repeat)
969
30.4M
                            MARK_POP_DISCARD(ctx->lastmark);
970
43.7M
                        RETURN_ON_ERROR(ret);
971
43.7M
                        RETURN_SUCCESS;
972
43.7M
                    }
973
14.3M
                    if (state->repeat)
974
95.5k
                        MARK_POP_KEEP(ctx->lastmark);
975
14.3M
                    LASTMARK_RESTORE();
976
977
14.3M
                    ptr--;
978
14.3M
                    ctx->count--;
979
14.3M
                }
980
5.14M
                if (state->repeat)
981
61.0k
                    MARK_POP_DISCARD(ctx->lastmark);
982
5.14M
            }
983
9.59M
            RETURN_FAILURE;
984
985
10.5k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
10.5k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
10.5k
                   pattern[1], pattern[2]));
997
998
10.5k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
10.5k
            state->ptr = ptr;
1002
1003
10.5k
            if (pattern[1] == 0)
1004
10.5k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
10.5k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
10.5k
            } else {
1028
                /* general case */
1029
10.5k
                LASTMARK_SAVE();
1030
10.5k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
5.08M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
5.08M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
5.08M
                    state->ptr = ptr;
1036
5.08M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
5.08M
                            pattern+pattern[0]);
1038
5.08M
                    if (ret) {
1039
10.5k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
10.5k
                        RETURN_ON_ERROR(ret);
1042
10.5k
                        RETURN_SUCCESS;
1043
10.5k
                    }
1044
5.07M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
5.07M
                    LASTMARK_RESTORE();
1047
1048
5.07M
                    state->ptr = ptr;
1049
5.07M
                    ret = SRE(count)(state, pattern+3, 1);
1050
5.07M
                    RETURN_ON_ERROR(ret);
1051
5.07M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
5.07M
                    if (ret == 0)
1053
0
                        break;
1054
5.07M
                    assert(ret == 1);
1055
5.07M
                    ptr++;
1056
5.07M
                    ctx->count++;
1057
5.07M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
23.3M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
23.3M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
23.3M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
23.3M
            ctx->u.rep = repeat_pool_malloc(state);
1127
23.3M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
23.3M
            ctx->u.rep->count = -1;
1131
23.3M
            ctx->u.rep->pattern = pattern;
1132
23.3M
            ctx->u.rep->prev = state->repeat;
1133
23.3M
            ctx->u.rep->last_ptr = NULL;
1134
23.3M
            state->repeat = ctx->u.rep;
1135
1136
23.3M
            state->ptr = ptr;
1137
23.3M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
23.3M
            state->repeat = ctx->u.rep->prev;
1139
23.3M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
23.3M
            if (ret) {
1142
15.9M
                RETURN_ON_ERROR(ret);
1143
15.9M
                RETURN_SUCCESS;
1144
15.9M
            }
1145
7.41M
            RETURN_FAILURE;
1146
1147
59.2M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
59.2M
            ctx->u.rep = state->repeat;
1155
59.2M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
59.2M
            state->ptr = ptr;
1159
1160
59.2M
            ctx->count = ctx->u.rep->count+1;
1161
1162
59.2M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
59.2M
                   ptr, ctx->count));
1164
1165
59.2M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
59.2M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
5.12M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
54.1M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
54.1M
                ctx->u.rep->count = ctx->count;
1185
54.1M
                LASTMARK_SAVE();
1186
54.1M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
54.1M
                LAST_PTR_PUSH();
1189
54.1M
                ctx->u.rep->last_ptr = state->ptr;
1190
54.1M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
54.1M
                        ctx->u.rep->pattern+3);
1192
54.1M
                LAST_PTR_POP();
1193
54.1M
                if (ret) {
1194
35.8M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
35.8M
                    RETURN_ON_ERROR(ret);
1196
35.8M
                    RETURN_SUCCESS;
1197
35.8M
                }
1198
18.2M
                MARK_POP(ctx->lastmark);
1199
18.2M
                LASTMARK_RESTORE();
1200
18.2M
                ctx->u.rep->count = ctx->count-1;
1201
18.2M
                state->ptr = ptr;
1202
18.2M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
23.3M
            state->repeat = ctx->u.rep->prev;
1207
23.3M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
23.3M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
23.3M
            RETURN_ON_SUCCESS(ret);
1211
7.44M
            state->ptr = ptr;
1212
7.44M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
12.5M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
12.5M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
12.5M
                   ptr, pattern[1]));
1565
12.5M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
12.5M
            state->ptr = ptr - pattern[1];
1568
12.5M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
12.5M
            RETURN_ON_FAILURE(ret);
1570
9.98M
            pattern += pattern[0];
1571
9.98M
            DISPATCH;
1572
1573
10.8M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
10.8M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
10.8M
                   ptr, pattern[1]));
1578
10.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
10.8M
                state->ptr = ptr - pattern[1];
1580
10.8M
                LASTMARK_SAVE();
1581
10.8M
                if (state->repeat)
1582
10.8M
                    MARK_PUSH(ctx->lastmark);
1583
1584
21.6M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
21.6M
                if (ret) {
1586
5.34k
                    if (state->repeat)
1587
5.34k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
5.34k
                    RETURN_ON_ERROR(ret);
1589
5.34k
                    RETURN_FAILURE;
1590
5.34k
                }
1591
10.8M
                if (state->repeat)
1592
10.8M
                    MARK_POP(ctx->lastmark);
1593
10.8M
                LASTMARK_RESTORE();
1594
10.8M
            }
1595
10.8M
            pattern += pattern[0];
1596
10.8M
            DISPATCH;
1597
1598
10.8M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
280M
exit:
1620
280M
    ctx_pos = ctx->last_ctx_pos;
1621
280M
    jump = ctx->jump;
1622
280M
    DATA_POP_DISCARD(ctx);
1623
280M
    if (ctx_pos == -1) {
1624
63.2M
        state->sigcount = sigcount;
1625
63.2M
        return ret;
1626
63.2M
    }
1627
216M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
216M
    switch (jump) {
1630
54.1M
        case JUMP_MAX_UNTIL_2:
1631
54.1M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
54.1M
            goto jump_max_until_2;
1633
23.3M
        case JUMP_MAX_UNTIL_3:
1634
23.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
23.3M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
23.7M
        case JUMP_BRANCH:
1643
23.7M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
23.7M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
23.3M
        case JUMP_REPEAT:
1658
23.3M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
23.3M
            goto jump_repeat;
1660
5.75M
        case JUMP_REPEAT_ONE_1:
1661
5.75M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
5.75M
            goto jump_repeat_one_1;
1663
58.0M
        case JUMP_REPEAT_ONE_2:
1664
58.0M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
58.0M
            goto jump_repeat_one_2;
1666
5.08M
        case JUMP_MIN_REPEAT_ONE:
1667
5.08M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
5.08M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
12.5M
        case JUMP_ASSERT:
1673
12.5M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
12.5M
            goto jump_assert;
1675
10.8M
        case JUMP_ASSERT_NOT:
1676
10.8M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
10.8M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
216M
    }
1683
1684
0
    return ret; /* should never get here */
1685
216M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
322M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
116M
{
1694
116M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
116M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
116M
    Py_ssize_t status = 0;
1697
116M
    Py_ssize_t prefix_len = 0;
1698
116M
    Py_ssize_t prefix_skip = 0;
1699
116M
    SRE_CODE* prefix = NULL;
1700
116M
    SRE_CODE* charset = NULL;
1701
116M
    SRE_CODE* overlap = NULL;
1702
116M
    int flags = 0;
1703
116M
    INIT_TRACE(state);
1704
1705
116M
    if (ptr > end)
1706
0
        return 0;
1707
1708
116M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
116M
        flags = pattern[2];
1713
1714
116M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.65M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.65M
                   end - ptr, (size_t) pattern[3]));
1717
5.65M
            return 0;
1718
5.65M
        }
1719
111M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
8.62M
            end -= pattern[3] - 1;
1723
8.62M
            if (end <= ptr)
1724
0
                end = ptr;
1725
8.62M
        }
1726
1727
111M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
8.64M
            prefix_len = pattern[5];
1731
8.64M
            prefix_skip = pattern[6];
1732
8.64M
            prefix = pattern + 7;
1733
8.64M
            overlap = prefix + prefix_len - 1;
1734
102M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
93.9M
            charset = pattern + 5;
1738
1739
111M
        pattern += 1 + pattern[1];
1740
111M
    }
1741
1742
111M
    TRACE(("prefix = %p %zd %zd\n",
1743
111M
           prefix, prefix_len, prefix_skip));
1744
111M
    TRACE(("charset = %p\n", charset));
1745
1746
111M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
7.53M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
5.65M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
5.65M
#endif
1753
5.65M
        end = (SRE_CHAR *)state->end;
1754
5.65M
        state->must_advance = 0;
1755
8.12M
        while (ptr < end) {
1756
122M
            while (*ptr != c) {
1757
115M
                if (++ptr >= end)
1758
600k
                    return 0;
1759
115M
            }
1760
7.39M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
7.39M
            state->start = ptr;
1762
7.39M
            state->ptr = ptr + prefix_skip;
1763
7.39M
            if (flags & SRE_INFO_LITERAL)
1764
19.2k
                return 1; /* we got all of it */
1765
7.37M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
7.37M
            if (status != 0)
1767
6.78M
                return status;
1768
590k
            ++ptr;
1769
590k
            RESET_CAPTURE_GROUP();
1770
590k
        }
1771
129k
        return 0;
1772
5.65M
    }
1773
1774
103M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
1.11M
        Py_ssize_t i = 0;
1778
1779
1.11M
        end = (SRE_CHAR *)state->end;
1780
1.11M
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.60M
        for (i = 0; i < prefix_len; i++)
1784
1.07M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
536k
#endif
1787
1.78M
        while (ptr < end) {
1788
1.78M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
15.0M
            while (*ptr++ != c) {
1790
13.2M
                if (ptr >= end)
1791
336
                    return 0;
1792
13.2M
            }
1793
1.78M
            if (ptr >= end)
1794
62
                return 0;
1795
1796
1.78M
            i = 1;
1797
1.78M
            state->must_advance = 0;
1798
1.78M
            do {
1799
1.78M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.67M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.67M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.67M
                    state->start = ptr - (prefix_len - 1);
1808
1.67M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.67M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.67M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.67M
                    if (status != 0)
1813
1.11M
                        return status;
1814
                    /* close but no cigar -- try again */
1815
556k
                    if (++ptr >= end)
1816
65
                        return 0;
1817
556k
                    RESET_CAPTURE_GROUP();
1818
556k
                }
1819
669k
                i = overlap[i];
1820
669k
            } while (i != 0);
1821
1.78M
        }
1822
0
        return 0;
1823
1.11M
    }
1824
1825
102M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
93.9M
        end = (SRE_CHAR *)state->end;
1828
93.9M
        state->must_advance = 0;
1829
96.1M
        for (;;) {
1830
379M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
283M
                ptr++;
1832
96.1M
            if (ptr >= end)
1833
3.62M
                return 0;
1834
92.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
92.4M
            state->start = ptr;
1836
92.4M
            state->ptr = ptr;
1837
92.4M
            status = SRE(match)(state, pattern, 0);
1838
92.4M
            if (status != 0)
1839
90.3M
                break;
1840
2.14M
            ptr++;
1841
2.14M
            RESET_CAPTURE_GROUP();
1842
2.14M
        }
1843
93.9M
    } else {
1844
        /* general case */
1845
8.54M
        assert(ptr <= end);
1846
8.54M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
8.54M
        state->start = state->ptr = ptr;
1848
8.54M
        status = SRE(match)(state, pattern, 1);
1849
8.54M
        state->must_advance = 0;
1850
8.54M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.05M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
80
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
4.05M
        {
1854
4.05M
            state->start = state->ptr = ptr = end;
1855
4.05M
            return 0;
1856
4.05M
        }
1857
324M
        while (status == 0 && ptr < end) {
1858
319M
            ptr++;
1859
319M
            RESET_CAPTURE_GROUP();
1860
319M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
319M
            state->start = state->ptr = ptr;
1862
319M
            status = SRE(match)(state, pattern, 0);
1863
319M
        }
1864
4.49M
    }
1865
1866
94.8M
    return status;
1867
102M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
53.1M
{
1694
53.1M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
53.1M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
53.1M
    Py_ssize_t status = 0;
1697
53.1M
    Py_ssize_t prefix_len = 0;
1698
53.1M
    Py_ssize_t prefix_skip = 0;
1699
53.1M
    SRE_CODE* prefix = NULL;
1700
53.1M
    SRE_CODE* charset = NULL;
1701
53.1M
    SRE_CODE* overlap = NULL;
1702
53.1M
    int flags = 0;
1703
53.1M
    INIT_TRACE(state);
1704
1705
53.1M
    if (ptr > end)
1706
0
        return 0;
1707
1708
53.1M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
53.1M
        flags = pattern[2];
1713
1714
53.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.55M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.55M
                   end - ptr, (size_t) pattern[3]));
1717
5.55M
            return 0;
1718
5.55M
        }
1719
47.6M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.41M
            end -= pattern[3] - 1;
1723
2.41M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.41M
        }
1726
1727
47.6M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.41M
            prefix_len = pattern[5];
1731
2.41M
            prefix_skip = pattern[6];
1732
2.41M
            prefix = pattern + 7;
1733
2.41M
            overlap = prefix + prefix_len - 1;
1734
45.2M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
39.3M
            charset = pattern + 5;
1738
1739
47.6M
        pattern += 1 + pattern[1];
1740
47.6M
    }
1741
1742
47.6M
    TRACE(("prefix = %p %zd %zd\n",
1743
47.6M
           prefix, prefix_len, prefix_skip));
1744
47.6M
    TRACE(("charset = %p\n", charset));
1745
1746
47.6M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.39M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.39M
#if SIZEOF_SRE_CHAR < 4
1750
2.39M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.39M
#endif
1753
2.39M
        end = (SRE_CHAR *)state->end;
1754
2.39M
        state->must_advance = 0;
1755
2.77M
        while (ptr < end) {
1756
29.4M
            while (*ptr != c) {
1757
27.3M
                if (++ptr >= end)
1758
508k
                    return 0;
1759
27.3M
            }
1760
2.14M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.14M
            state->start = ptr;
1762
2.14M
            state->ptr = ptr + prefix_skip;
1763
2.14M
            if (flags & SRE_INFO_LITERAL)
1764
1.59k
                return 1; /* we got all of it */
1765
2.13M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.13M
            if (status != 0)
1767
1.75M
                return status;
1768
378k
            ++ptr;
1769
378k
            RESET_CAPTURE_GROUP();
1770
378k
        }
1771
125k
        return 0;
1772
2.39M
    }
1773
1774
45.2M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
16.6k
        Py_ssize_t i = 0;
1778
1779
16.6k
        end = (SRE_CHAR *)state->end;
1780
16.6k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
16.6k
#if SIZEOF_SRE_CHAR < 4
1783
49.8k
        for (i = 0; i < prefix_len; i++)
1784
33.2k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
16.6k
#endif
1787
102k
        while (ptr < end) {
1788
102k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.82M
            while (*ptr++ != c) {
1790
2.72M
                if (ptr >= end)
1791
62
                    return 0;
1792
2.72M
            }
1793
102k
            if (ptr >= end)
1794
22
                return 0;
1795
1796
102k
            i = 1;
1797
102k
            state->must_advance = 0;
1798
102k
            do {
1799
102k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
82.4k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
82.4k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
82.4k
                    state->start = ptr - (prefix_len - 1);
1808
82.4k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
82.4k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
82.4k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
82.4k
                    if (status != 0)
1813
16.5k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
65.9k
                    if (++ptr >= end)
1816
27
                        return 0;
1817
65.9k
                    RESET_CAPTURE_GROUP();
1818
65.9k
                }
1819
86.2k
                i = overlap[i];
1820
86.2k
            } while (i != 0);
1821
102k
        }
1822
0
        return 0;
1823
16.6k
    }
1824
1825
45.2M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
39.3M
        end = (SRE_CHAR *)state->end;
1828
39.3M
        state->must_advance = 0;
1829
40.5M
        for (;;) {
1830
103M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
63.2M
                ptr++;
1832
40.5M
            if (ptr >= end)
1833
2.59M
                return 0;
1834
37.9M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
37.9M
            state->start = ptr;
1836
37.9M
            state->ptr = ptr;
1837
37.9M
            status = SRE(match)(state, pattern, 0);
1838
37.9M
            if (status != 0)
1839
36.7M
                break;
1840
1.22M
            ptr++;
1841
1.22M
            RESET_CAPTURE_GROUP();
1842
1.22M
        }
1843
39.3M
    } else {
1844
        /* general case */
1845
5.92M
        assert(ptr <= end);
1846
5.92M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
5.92M
        state->start = state->ptr = ptr;
1848
5.92M
        status = SRE(match)(state, pattern, 1);
1849
5.92M
        state->must_advance = 0;
1850
5.92M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
2.63M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
29
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
2.63M
        {
1854
2.63M
            state->start = state->ptr = ptr = end;
1855
2.63M
            return 0;
1856
2.63M
        }
1857
126M
        while (status == 0 && ptr < end) {
1858
123M
            ptr++;
1859
123M
            RESET_CAPTURE_GROUP();
1860
123M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
123M
            state->start = state->ptr = ptr;
1862
123M
            status = SRE(match)(state, pattern, 0);
1863
123M
        }
1864
3.28M
    }
1865
1866
39.9M
    return status;
1867
45.2M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
56.4M
{
1694
56.4M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
56.4M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
56.4M
    Py_ssize_t status = 0;
1697
56.4M
    Py_ssize_t prefix_len = 0;
1698
56.4M
    Py_ssize_t prefix_skip = 0;
1699
56.4M
    SRE_CODE* prefix = NULL;
1700
56.4M
    SRE_CODE* charset = NULL;
1701
56.4M
    SRE_CODE* overlap = NULL;
1702
56.4M
    int flags = 0;
1703
56.4M
    INIT_TRACE(state);
1704
1705
56.4M
    if (ptr > end)
1706
0
        return 0;
1707
1708
56.4M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
56.4M
        flags = pattern[2];
1713
1714
56.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
97.0k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
97.0k
                   end - ptr, (size_t) pattern[3]));
1717
97.0k
            return 0;
1718
97.0k
        }
1719
56.3M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.77M
            end -= pattern[3] - 1;
1723
3.77M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.77M
        }
1726
1727
56.3M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.78M
            prefix_len = pattern[5];
1731
3.78M
            prefix_skip = pattern[6];
1732
3.78M
            prefix = pattern + 7;
1733
3.78M
            overlap = prefix + prefix_len - 1;
1734
52.5M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
50.0M
            charset = pattern + 5;
1738
1739
56.3M
        pattern += 1 + pattern[1];
1740
56.3M
    }
1741
1742
56.3M
    TRACE(("prefix = %p %zd %zd\n",
1743
56.3M
           prefix, prefix_len, prefix_skip));
1744
56.3M
    TRACE(("charset = %p\n", charset));
1745
1746
56.3M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.26M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
3.26M
#if SIZEOF_SRE_CHAR < 4
1750
3.26M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
3.26M
#endif
1753
3.26M
        end = (SRE_CHAR *)state->end;
1754
3.26M
        state->must_advance = 0;
1755
3.40M
        while (ptr < end) {
1756
65.0M
            while (*ptr != c) {
1757
61.7M
                if (++ptr >= end)
1758
85.9k
                    return 0;
1759
61.7M
            }
1760
3.31M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.31M
            state->start = ptr;
1762
3.31M
            state->ptr = ptr + prefix_skip;
1763
3.31M
            if (flags & SRE_INFO_LITERAL)
1764
10.6k
                return 1; /* we got all of it */
1765
3.30M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.30M
            if (status != 0)
1767
3.16M
                return status;
1768
144k
            ++ptr;
1769
144k
            RESET_CAPTURE_GROUP();
1770
144k
        }
1771
3.81k
        return 0;
1772
3.26M
    }
1773
1774
53.0M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
519k
        Py_ssize_t i = 0;
1778
1779
519k
        end = (SRE_CHAR *)state->end;
1780
519k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
519k
#if SIZEOF_SRE_CHAR < 4
1783
1.55M
        for (i = 0; i < prefix_len; i++)
1784
1.03M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
519k
#endif
1787
797k
        while (ptr < end) {
1788
797k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.92M
            while (*ptr++ != c) {
1790
3.12M
                if (ptr >= end)
1791
133
                    return 0;
1792
3.12M
            }
1793
797k
            if (ptr >= end)
1794
19
                return 0;
1795
1796
797k
            i = 1;
1797
797k
            state->must_advance = 0;
1798
798k
            do {
1799
798k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
776k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
776k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
776k
                    state->start = ptr - (prefix_len - 1);
1808
776k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
776k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
776k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
776k
                    if (status != 0)
1813
519k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
257k
                    if (++ptr >= end)
1816
19
                        return 0;
1817
257k
                    RESET_CAPTURE_GROUP();
1818
257k
                }
1819
278k
                i = overlap[i];
1820
278k
            } while (i != 0);
1821
797k
        }
1822
0
        return 0;
1823
519k
    }
1824
1825
52.5M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
50.0M
        end = (SRE_CHAR *)state->end;
1828
50.0M
        state->must_advance = 0;
1829
50.4M
        for (;;) {
1830
203M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
152M
                ptr++;
1832
50.4M
            if (ptr >= end)
1833
979k
                return 0;
1834
49.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
49.4M
            state->start = ptr;
1836
49.4M
            state->ptr = ptr;
1837
49.4M
            status = SRE(match)(state, pattern, 0);
1838
49.4M
            if (status != 0)
1839
49.1M
                break;
1840
388k
            ptr++;
1841
388k
            RESET_CAPTURE_GROUP();
1842
388k
        }
1843
50.0M
    } else {
1844
        /* general case */
1845
2.46M
        assert(ptr <= end);
1846
2.46M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.46M
        state->start = state->ptr = ptr;
1848
2.46M
        status = SRE(match)(state, pattern, 1);
1849
2.46M
        state->must_advance = 0;
1850
2.46M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
1.40M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
29
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
1.40M
        {
1854
1.40M
            state->start = state->ptr = ptr = end;
1855
1.40M
            return 0;
1856
1.40M
        }
1857
153M
        while (status == 0 && ptr < end) {
1858
152M
            ptr++;
1859
152M
            RESET_CAPTURE_GROUP();
1860
152M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
152M
            state->start = state->ptr = ptr;
1862
152M
            status = SRE(match)(state, pattern, 0);
1863
152M
        }
1864
1.05M
    }
1865
1866
50.1M
    return status;
1867
52.5M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.18M
{
1694
7.18M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.18M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.18M
    Py_ssize_t status = 0;
1697
7.18M
    Py_ssize_t prefix_len = 0;
1698
7.18M
    Py_ssize_t prefix_skip = 0;
1699
7.18M
    SRE_CODE* prefix = NULL;
1700
7.18M
    SRE_CODE* charset = NULL;
1701
7.18M
    SRE_CODE* overlap = NULL;
1702
7.18M
    int flags = 0;
1703
7.18M
    INIT_TRACE(state);
1704
1705
7.18M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.18M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.18M
        flags = pattern[2];
1713
1714
7.18M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.52k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.52k
                   end - ptr, (size_t) pattern[3]));
1717
5.52k
            return 0;
1718
5.52k
        }
1719
7.17M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.44M
            end -= pattern[3] - 1;
1723
2.44M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.44M
        }
1726
1727
7.17M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.45M
            prefix_len = pattern[5];
1731
2.45M
            prefix_skip = pattern[6];
1732
2.45M
            prefix = pattern + 7;
1733
2.45M
            overlap = prefix + prefix_len - 1;
1734
4.72M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
4.56M
            charset = pattern + 5;
1738
1739
7.17M
        pattern += 1 + pattern[1];
1740
7.17M
    }
1741
1742
7.17M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.17M
           prefix, prefix_len, prefix_skip));
1744
7.17M
    TRACE(("charset = %p\n", charset));
1745
1746
7.17M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.87M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
1.87M
        end = (SRE_CHAR *)state->end;
1754
1.87M
        state->must_advance = 0;
1755
1.93M
        while (ptr < end) {
1756
28.0M
            while (*ptr != c) {
1757
26.1M
                if (++ptr >= end)
1758
5.50k
                    return 0;
1759
26.1M
            }
1760
1.93M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.93M
            state->start = ptr;
1762
1.93M
            state->ptr = ptr + prefix_skip;
1763
1.93M
            if (flags & SRE_INFO_LITERAL)
1764
7.08k
                return 1; /* we got all of it */
1765
1.92M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.92M
            if (status != 0)
1767
1.85M
                return status;
1768
67.1k
            ++ptr;
1769
67.1k
            RESET_CAPTURE_GROUP();
1770
67.1k
        }
1771
863
        return 0;
1772
1.87M
    }
1773
1774
5.30M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
582k
        Py_ssize_t i = 0;
1778
1779
582k
        end = (SRE_CHAR *)state->end;
1780
582k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
885k
        while (ptr < end) {
1788
885k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
8.30M
            while (*ptr++ != c) {
1790
7.41M
                if (ptr >= end)
1791
141
                    return 0;
1792
7.41M
            }
1793
884k
            if (ptr >= end)
1794
21
                return 0;
1795
1796
884k
            i = 1;
1797
884k
            state->must_advance = 0;
1798
886k
            do {
1799
886k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
815k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
815k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
815k
                    state->start = ptr - (prefix_len - 1);
1808
815k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
815k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
815k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
815k
                    if (status != 0)
1813
582k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
233k
                    if (++ptr >= end)
1816
19
                        return 0;
1817
233k
                    RESET_CAPTURE_GROUP();
1818
233k
                }
1819
304k
                i = overlap[i];
1820
304k
            } while (i != 0);
1821
884k
        }
1822
0
        return 0;
1823
582k
    }
1824
1825
4.72M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
4.56M
        end = (SRE_CHAR *)state->end;
1828
4.56M
        state->must_advance = 0;
1829
5.09M
        for (;;) {
1830
72.0M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
66.9M
                ptr++;
1832
5.09M
            if (ptr >= end)
1833
48.2k
                return 0;
1834
5.04M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
5.04M
            state->start = ptr;
1836
5.04M
            state->ptr = ptr;
1837
5.04M
            status = SRE(match)(state, pattern, 0);
1838
5.04M
            if (status != 0)
1839
4.51M
                break;
1840
530k
            ptr++;
1841
530k
            RESET_CAPTURE_GROUP();
1842
530k
        }
1843
4.56M
    } else {
1844
        /* general case */
1845
161k
        assert(ptr <= end);
1846
161k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
161k
        state->start = state->ptr = ptr;
1848
161k
        status = SRE(match)(state, pattern, 1);
1849
161k
        state->must_advance = 0;
1850
161k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
13.8k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
22
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
13.8k
        {
1854
13.8k
            state->start = state->ptr = ptr = end;
1855
13.8k
            return 0;
1856
13.8k
        }
1857
44.5M
        while (status == 0 && ptr < end) {
1858
44.3M
            ptr++;
1859
44.3M
            RESET_CAPTURE_GROUP();
1860
44.3M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
44.3M
            state->start = state->ptr = ptr;
1862
44.3M
            status = SRE(match)(state, pattern, 0);
1863
44.3M
        }
1864
147k
    }
1865
1866
4.66M
    return status;
1867
4.72M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/