Coverage Report

Created: 2026-01-17 06:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
90.9M
{
18
    /* check if pointer is at given position */
19
20
90.9M
    Py_ssize_t thisp, thatp;
21
22
90.9M
    switch (at) {
23
24
11.6M
    case SRE_AT_BEGINNING:
25
11.6M
    case SRE_AT_BEGINNING_STRING:
26
11.6M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
74.4M
    case SRE_AT_END:
33
74.4M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
1.29M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
74.4M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
4.77M
    case SRE_AT_END_STRING:
42
4.77M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
90.9M
    }
87
88
0
    return 0;
89
90.9M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
31.7M
{
18
    /* check if pointer is at given position */
19
20
31.7M
    Py_ssize_t thisp, thatp;
21
22
31.7M
    switch (at) {
23
24
10.4M
    case SRE_AT_BEGINNING:
25
10.4M
    case SRE_AT_BEGINNING_STRING:
26
10.4M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
19.7M
    case SRE_AT_END:
33
19.7M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
313k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
19.7M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.56M
    case SRE_AT_END_STRING:
42
1.56M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
31.7M
    }
87
88
0
    return 0;
89
31.7M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
50.4M
{
18
    /* check if pointer is at given position */
19
20
50.4M
    Py_ssize_t thisp, thatp;
21
22
50.4M
    switch (at) {
23
24
1.23M
    case SRE_AT_BEGINNING:
25
1.23M
    case SRE_AT_BEGINNING_STRING:
26
1.23M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
47.0M
    case SRE_AT_END:
33
47.0M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
978k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
47.0M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.15M
    case SRE_AT_END_STRING:
42
2.15M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
50.4M
    }
87
88
0
    return 0;
89
50.4M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
8.75M
{
18
    /* check if pointer is at given position */
19
20
8.75M
    Py_ssize_t thisp, thatp;
21
22
8.75M
    switch (at) {
23
24
17.6k
    case SRE_AT_BEGINNING:
25
17.6k
    case SRE_AT_BEGINNING_STRING:
26
17.6k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
7.67M
    case SRE_AT_END:
33
7.67M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
7.95k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
7.67M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.05M
    case SRE_AT_END_STRING:
42
1.05M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
8.75M
    }
87
88
0
    return 0;
89
8.75M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.58G
{
94
    /* check if character is a member of the given set */
95
96
1.58G
    int ok = 1;
97
98
3.64G
    for (;;) {
99
3.64G
        switch (*set++) {
100
101
1.08G
        case SRE_OP_FAILURE:
102
1.08G
            return !ok;
103
104
1.29G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.29G
            if (ch == set[0])
107
9.99M
                return ok;
108
1.28G
            set++;
109
1.28G
            break;
110
111
93.2M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
93.2M
            if (sre_category(set[0], (int) ch))
114
39.9M
                return ok;
115
53.2M
            set++;
116
53.2M
            break;
117
118
524M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
524M
            if (ch < 256 &&
121
499M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
211M
                return ok;
123
312M
            set += 256/SRE_CODE_BITS;
124
312M
            break;
125
126
379M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
379M
            if (set[0] <= ch && ch <= set[1])
129
236M
                return ok;
130
142M
            set += 2;
131
142M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
272M
        case SRE_OP_NEGATE:
148
272M
            ok = !ok;
149
272M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.64G
        }
175
3.64G
    }
176
1.58G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
445M
{
94
    /* check if character is a member of the given set */
95
96
445M
    int ok = 1;
97
98
927M
    for (;;) {
99
927M
        switch (*set++) {
100
101
254M
        case SRE_OP_FAILURE:
102
254M
            return !ok;
103
104
293M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
293M
            if (ch == set[0])
107
5.67M
                return ok;
108
288M
            set++;
109
288M
            break;
110
111
35.7M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
35.7M
            if (sre_category(set[0], (int) ch))
114
19.1M
                return ok;
115
16.6M
            set++;
116
16.6M
            break;
117
118
107M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
107M
            if (ch < 256 &&
121
107M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
53.2M
                return ok;
123
54.4M
            set += 256/SRE_CODE_BITS;
124
54.4M
            break;
125
126
188M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
188M
            if (set[0] <= ch && ch <= set[1])
129
112M
                return ok;
130
75.5M
            set += 2;
131
75.5M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
46.7M
        case SRE_OP_NEGATE:
148
46.7M
            ok = !ok;
149
46.7M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
927M
        }
175
927M
    }
176
445M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
732M
{
94
    /* check if character is a member of the given set */
95
96
732M
    int ok = 1;
97
98
1.76G
    for (;;) {
99
1.76G
        switch (*set++) {
100
101
538M
        case SRE_OP_FAILURE:
102
538M
            return !ok;
103
104
718M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
718M
            if (ch == set[0])
107
2.92M
                return ok;
108
715M
            set++;
109
715M
            break;
110
111
49.1M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
49.1M
            if (sre_category(set[0], (int) ch))
114
17.5M
                return ok;
115
31.6M
            set++;
116
31.6M
            break;
117
118
189M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
189M
            if (ch < 256 &&
121
178M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
68.9M
                return ok;
123
120M
            set += 256/SRE_CODE_BITS;
124
120M
            break;
125
126
161M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
161M
            if (set[0] <= ch && ch <= set[1])
129
104M
                return ok;
130
57.1M
            set += 2;
131
57.1M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
108M
        case SRE_OP_NEGATE:
148
108M
            ok = !ok;
149
108M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.76G
        }
175
1.76G
    }
176
732M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
401M
{
94
    /* check if character is a member of the given set */
95
96
401M
    int ok = 1;
97
98
953M
    for (;;) {
99
953M
        switch (*set++) {
100
101
288M
        case SRE_OP_FAILURE:
102
288M
            return !ok;
103
104
282M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
282M
            if (ch == set[0])
107
1.39M
                return ok;
108
281M
            set++;
109
281M
            break;
110
111
8.33M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
8.33M
            if (sre_category(set[0], (int) ch))
114
3.36M
                return ok;
115
4.96M
            set++;
116
4.96M
            break;
117
118
226M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
226M
            if (ch < 256 &&
121
213M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
89.2M
                return ok;
123
137M
            set += 256/SRE_CODE_BITS;
124
137M
            break;
125
126
29.4M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
29.4M
            if (set[0] <= ch && ch <= set[1])
129
19.1M
                return ok;
130
10.2M
            set += 2;
131
10.2M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
117M
        case SRE_OP_NEGATE:
148
117M
            ok = !ok;
149
117M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
953M
        }
175
953M
    }
176
401M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
654M
{
195
654M
    SRE_CODE chr;
196
654M
    SRE_CHAR c;
197
654M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
654M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
654M
    Py_ssize_t i;
200
654M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
654M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
88.1M
        end = ptr + maxcount;
205
206
654M
    switch (pattern[0]) {
207
208
514M
    case SRE_OP_IN:
209
        /* repeated set */
210
514M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
898M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
384M
            ptr++;
213
514M
        break;
214
215
47.2M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
47.2M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
123M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
76.4M
            ptr++;
220
47.2M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
91.0M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
91.0M
        chr = pattern[1];
232
91.0M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
91.0M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
79.0M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
79.0M
        else
238
79.0M
#endif
239
96.0M
        while (ptr < end && *ptr == c)
240
4.94M
            ptr++;
241
91.0M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.89M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.89M
        chr = pattern[1];
270
1.89M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.89M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
903k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
903k
        else
276
903k
#endif
277
48.6M
        while (ptr < end && *ptr != c)
278
46.7M
            ptr++;
279
1.89M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
654M
    }
319
320
654M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
654M
           ptr - (SRE_CHAR*) state->ptr));
322
654M
    return ptr - (SRE_CHAR*) state->ptr;
323
654M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
222M
{
195
222M
    SRE_CODE chr;
196
222M
    SRE_CHAR c;
197
222M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
222M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
222M
    Py_ssize_t i;
200
222M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
222M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
24.6M
        end = ptr + maxcount;
205
206
222M
    switch (pattern[0]) {
207
208
144M
    case SRE_OP_IN:
209
        /* repeated set */
210
144M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
271M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
126M
            ptr++;
213
144M
        break;
214
215
11.6M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
11.6M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
26.9M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
15.3M
            ptr++;
220
11.6M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
65.1M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
65.1M
        chr = pattern[1];
232
65.1M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
65.1M
        c = (SRE_CHAR) chr;
234
65.1M
#if SIZEOF_SRE_CHAR < 4
235
65.1M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
65.1M
        else
238
65.1M
#endif
239
67.0M
        while (ptr < end && *ptr == c)
240
1.89M
            ptr++;
241
65.1M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
560k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
560k
        chr = pattern[1];
270
560k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
560k
        c = (SRE_CHAR) chr;
272
560k
#if SIZEOF_SRE_CHAR < 4
273
560k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
560k
        else
276
560k
#endif
277
6.99M
        while (ptr < end && *ptr != c)
278
6.43M
            ptr++;
279
560k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
222M
    }
319
320
222M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
222M
           ptr - (SRE_CHAR*) state->ptr));
322
222M
    return ptr - (SRE_CHAR*) state->ptr;
323
222M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
289M
{
195
289M
    SRE_CODE chr;
196
289M
    SRE_CHAR c;
197
289M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
289M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
289M
    Py_ssize_t i;
200
289M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
289M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
43.2M
        end = ptr + maxcount;
205
206
289M
    switch (pattern[0]) {
207
208
244M
    case SRE_OP_IN:
209
        /* repeated set */
210
244M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
380M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
135M
            ptr++;
213
244M
        break;
214
215
30.9M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
30.9M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
75.4M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
44.5M
            ptr++;
220
30.9M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
13.9M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
13.9M
        chr = pattern[1];
232
13.9M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
13.9M
        c = (SRE_CHAR) chr;
234
13.9M
#if SIZEOF_SRE_CHAR < 4
235
13.9M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
13.9M
        else
238
13.9M
#endif
239
15.8M
        while (ptr < end && *ptr == c)
240
1.92M
            ptr++;
241
13.9M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
343k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
343k
        chr = pattern[1];
270
343k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
343k
        c = (SRE_CHAR) chr;
272
343k
#if SIZEOF_SRE_CHAR < 4
273
343k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
343k
        else
276
343k
#endif
277
12.5M
        while (ptr < end && *ptr != c)
278
12.1M
            ptr++;
279
343k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
289M
    }
319
320
289M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
289M
           ptr - (SRE_CHAR*) state->ptr));
322
289M
    return ptr - (SRE_CHAR*) state->ptr;
323
289M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
142M
{
195
142M
    SRE_CODE chr;
196
142M
    SRE_CHAR c;
197
142M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
142M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
142M
    Py_ssize_t i;
200
142M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
142M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
20.2M
        end = ptr + maxcount;
205
206
142M
    switch (pattern[0]) {
207
208
124M
    case SRE_OP_IN:
209
        /* repeated set */
210
124M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
247M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
122M
            ptr++;
213
124M
        break;
214
215
4.69M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
4.69M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
21.2M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
16.5M
            ptr++;
220
4.69M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
12.0M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
12.0M
        chr = pattern[1];
232
12.0M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
12.0M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
13.1M
        while (ptr < end && *ptr == c)
240
1.11M
            ptr++;
241
12.0M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
993k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
993k
        chr = pattern[1];
270
993k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
993k
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
29.1M
        while (ptr < end && *ptr != c)
278
28.1M
            ptr++;
279
993k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
142M
    }
319
320
142M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
142M
           ptr - (SRE_CHAR*) state->ptr));
322
142M
    return ptr - (SRE_CHAR*) state->ptr;
323
142M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
563M
    do { \
355
563M
        ctx->lastmark = state->lastmark; \
356
563M
        ctx->lastindex = state->lastindex; \
357
563M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
335M
    do { \
360
335M
        state->lastmark = ctx->lastmark; \
361
335M
        state->lastindex = ctx->lastindex; \
362
335M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
195M
    do { \
366
195M
        TRACE(("push last_ptr: %zd", \
367
195M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
195M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
195M
    } while (0)
370
#define LAST_PTR_POP()  \
371
195M
    do { \
372
195M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
195M
        TRACE(("pop last_ptr: %zd", \
374
195M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
195M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
871M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
588M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.17G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
110M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
28.0M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.45G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.45G
do { \
390
1.45G
    alloc_pos = state->data_stack_base; \
391
1.45G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.45G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.45G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
162M
        int j = data_stack_grow(state, sizeof(type)); \
395
162M
        if (j < 0) return j; \
396
162M
        if (ctx_pos != -1) \
397
162M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
162M
    } \
399
1.45G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.45G
    state->data_stack_base += sizeof(type); \
401
1.45G
} while (0)
402
403
1.58G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.58G
do { \
405
1.58G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.58G
    ptr = (type*)(state->data_stack+pos); \
407
1.58G
} while (0)
408
409
487M
#define DATA_STACK_PUSH(state, data, size) \
410
487M
do { \
411
487M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
487M
           data, state->data_stack_base, size)); \
413
487M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
96.0k
        int j = data_stack_grow(state, size); \
415
96.0k
        if (j < 0) return j; \
416
96.0k
        if (ctx_pos != -1) \
417
96.0k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
96.0k
    } \
419
487M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
487M
    state->data_stack_base += size; \
421
487M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
302M
#define DATA_STACK_POP(state, data, size, discard) \
427
302M
do { \
428
302M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
302M
           data, state->data_stack_base-size, size)); \
430
302M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
302M
    if (discard) \
432
302M
        state->data_stack_base -= size; \
433
302M
} while (0)
434
435
1.64G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.64G
do { \
437
1.64G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.64G
           state->data_stack_base-size, size)); \
439
1.64G
    state->data_stack_base -= size; \
440
1.64G
} while(0)
441
442
#define DATA_PUSH(x) \
443
195M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
195M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.45G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.45G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.57G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
402M
    do if (lastmark >= 0) { \
473
292M
        MARK_TRACE("push", (lastmark)); \
474
292M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
292M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
402M
    } while (0)
477
#define MARK_POP(lastmark) \
478
121M
    do if (lastmark >= 0) { \
479
105M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
105M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
105M
        MARK_TRACE("pop", (lastmark)); \
482
121M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.12M
    do if (lastmark >= 0) { \
485
1.83M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.83M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.83M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.12M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
281M
    do if (lastmark >= 0) { \
491
186M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
186M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
186M
        MARK_TRACE("pop discard", (lastmark)); \
494
281M
    } while (0)
495
496
534M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
195M
#define JUMP_MAX_UNTIL_2     2
499
110M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
110M
#define JUMP_REPEAT          7
504
14.2M
#define JUMP_REPEAT_ONE_1    8
505
223M
#define JUMP_REPEAT_ONE_2    9
506
48.2M
#define JUMP_MIN_REPEAT_ONE  10
507
166M
#define JUMP_BRANCH          11
508
28.0M
#define JUMP_ASSERT          12
509
28.8M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
925M
    ctx->pattern = pattern; \
516
925M
    ctx->ptr = ptr; \
517
925M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
925M
    nextctx->pattern = nextpattern; \
519
925M
    nextctx->toplevel = toplevel_; \
520
925M
    nextctx->jump = jumpvalue; \
521
925M
    nextctx->last_ctx_pos = ctx_pos; \
522
925M
    pattern = nextpattern; \
523
925M
    ctx_pos = alloc_pos; \
524
925M
    ctx = nextctx; \
525
925M
    goto entrance; \
526
925M
    jumplabel: \
527
925M
    pattern = ctx->pattern; \
528
925M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
868M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
56.8M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.52G
    do {                                                           \
553
2.52G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.52G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.52G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.60G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.52G
        do {                               \
588
2.52G
            MAYBE_CHECK_SIGNALS;           \
589
2.52G
            goto *sre_targets[*pattern++]; \
590
2.52G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
534M
{
601
534M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
534M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
534M
    Py_ssize_t ret = 0;
604
534M
    int jump;
605
534M
    unsigned int sigcount = state->sigcount;
606
607
534M
    SRE(match_context)* ctx;
608
534M
    SRE(match_context)* nextctx;
609
534M
    INIT_TRACE(state);
610
611
534M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
534M
    DATA_ALLOC(SRE(match_context), ctx);
614
534M
    ctx->last_ctx_pos = -1;
615
534M
    ctx->jump = JUMP_NONE;
616
534M
    ctx->toplevel = toplevel;
617
534M
    ctx_pos = alloc_pos;
618
619
534M
#if USE_COMPUTED_GOTOS
620
534M
#include "sre_targets.h"
621
534M
#endif
622
623
1.45G
entrance:
624
625
1.45G
    ;  // Fashion statement.
626
1.45G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.45G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
66.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.80M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.80M
                   end - ptr, (size_t) pattern[3]));
634
4.80M
            RETURN_FAILURE;
635
4.80M
        }
636
61.4M
        pattern += pattern[1] + 1;
637
61.4M
    }
638
639
1.45G
#if USE_COMPUTED_GOTOS
640
1.45G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.45G
    {
647
648
1.45G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
630M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
630M
                   ptr, pattern[0]));
653
630M
            {
654
630M
                int i = pattern[0];
655
630M
                if (i & 1)
656
119M
                    state->lastindex = i/2 + 1;
657
630M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
622M
                    int j = state->lastmark + 1;
663
640M
                    while (j < i)
664
17.2M
                        state->mark[j++] = NULL;
665
622M
                    state->lastmark = i;
666
622M
                }
667
630M
                state->mark[i] = ptr;
668
630M
            }
669
630M
            pattern++;
670
630M
            DISPATCH;
671
672
630M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
154M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
154M
                   ptr, *pattern));
677
154M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
67.1M
                RETURN_FAILURE;
679
87.8M
            pattern++;
680
87.8M
            ptr++;
681
87.8M
            DISPATCH;
682
683
87.8M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
158M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
158M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
158M
            if (ctx->toplevel &&
698
46.3M
                ((state->match_all && ptr != state->end) ||
699
46.3M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
158M
            state->ptr = ptr;
704
158M
            RETURN_SUCCESS;
705
706
90.9M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
90.9M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
90.9M
            if (!SRE(at)(state, ptr, *pattern))
711
71.3M
                RETURN_FAILURE;
712
19.6M
            pattern++;
713
19.6M
            DISPATCH;
714
715
19.6M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
273M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
273M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
273M
            if (ptr >= end ||
749
272M
                !SRE(charset)(state, pattern + 1, *ptr))
750
86.4M
                RETURN_FAILURE;
751
187M
            pattern += pattern[0];
752
187M
            ptr++;
753
187M
            DISPATCH;
754
755
187M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
6.27M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
6.27M
                   pattern, ptr, pattern[0]));
758
6.27M
            if (ptr >= end ||
759
6.27M
                sre_lower_ascii(*ptr) != *pattern)
760
82.7k
                RETURN_FAILURE;
761
6.19M
            pattern++;
762
6.19M
            ptr++;
763
6.19M
            DISPATCH;
764
765
6.19M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
84.9M
        TARGET(SRE_OP_JUMP):
845
84.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
84.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
84.9M
                   ptr, pattern[0]));
850
84.9M
            pattern += pattern[0];
851
84.9M
            DISPATCH;
852
853
125M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
125M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
125M
            LASTMARK_SAVE();
858
125M
            if (state->repeat)
859
66.2M
                MARK_PUSH(ctx->lastmark);
860
311M
            for (; pattern[0]; pattern += pattern[0]) {
861
267M
                if (pattern[1] == SRE_OP_LITERAL &&
862
130M
                    (ptr >= end ||
863
130M
                     (SRE_CODE) *ptr != pattern[2]))
864
69.3M
                    continue;
865
198M
                if (pattern[1] == SRE_OP_IN &&
866
57.3M
                    (ptr >= end ||
867
57.2M
                     !SRE(charset)(state, pattern + 3,
868
57.2M
                                   (SRE_CODE) *ptr)))
869
32.0M
                    continue;
870
166M
                state->ptr = ptr;
871
166M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
166M
                if (ret) {
873
81.2M
                    if (state->repeat)
874
55.5M
                        MARK_POP_DISCARD(ctx->lastmark);
875
81.2M
                    RETURN_ON_ERROR(ret);
876
81.2M
                    RETURN_SUCCESS;
877
81.2M
                }
878
84.8M
                if (state->repeat)
879
27.0k
                    MARK_POP_KEEP(ctx->lastmark);
880
84.8M
                LASTMARK_RESTORE();
881
84.8M
            }
882
43.9M
            if (state->repeat)
883
10.6M
                MARK_POP_DISCARD(ctx->lastmark);
884
43.9M
            RETURN_FAILURE;
885
886
612M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
612M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
612M
                   pattern[1], pattern[2]));
898
899
612M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.24M
                RETURN_FAILURE; /* cannot match */
901
902
610M
            state->ptr = ptr;
903
904
610M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
610M
            RETURN_ON_ERROR(ret);
906
610M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
610M
            ctx->count = ret;
908
610M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
610M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
401M
                RETURN_FAILURE;
917
918
209M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
6.29M
                ptr == state->end &&
920
87.5k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
87.5k
            {
922
                /* tail is empty.  we're finished */
923
87.5k
                state->ptr = ptr;
924
87.5k
                RETURN_SUCCESS;
925
87.5k
            }
926
927
209M
            LASTMARK_SAVE();
928
209M
            if (state->repeat)
929
112M
                MARK_PUSH(ctx->lastmark);
930
931
209M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
26.2M
                ctx->u.chr = pattern[pattern[0]+1];
935
26.2M
                for (;;) {
936
63.6M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
51.5M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
37.3M
                        ptr--;
939
37.3M
                        ctx->count--;
940
37.3M
                    }
941
26.2M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
12.0M
                        break;
943
14.2M
                    state->ptr = ptr;
944
14.2M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
14.2M
                            pattern+pattern[0]);
946
14.2M
                    if (ret) {
947
14.2M
                        if (state->repeat)
948
12.9M
                            MARK_POP_DISCARD(ctx->lastmark);
949
14.2M
                        RETURN_ON_ERROR(ret);
950
14.2M
                        RETURN_SUCCESS;
951
14.2M
                    }
952
699
                    if (state->repeat)
953
699
                        MARK_POP_KEEP(ctx->lastmark);
954
699
                    LASTMARK_RESTORE();
955
956
699
                    ptr--;
957
699
                    ctx->count--;
958
699
                }
959
12.0M
                if (state->repeat)
960
10.6M
                    MARK_POP_DISCARD(ctx->lastmark);
961
183M
            } else {
962
                /* general case */
963
268M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
223M
                    state->ptr = ptr;
965
223M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
223M
                            pattern+pattern[0]);
967
223M
                    if (ret) {
968
138M
                        if (state->repeat)
969
87.3M
                            MARK_POP_DISCARD(ctx->lastmark);
970
138M
                        RETURN_ON_ERROR(ret);
971
138M
                        RETURN_SUCCESS;
972
138M
                    }
973
85.3M
                    if (state->repeat)
974
2.09M
                        MARK_POP_KEEP(ctx->lastmark);
975
85.3M
                    LASTMARK_RESTORE();
976
977
85.3M
                    ptr--;
978
85.3M
                    ctx->count--;
979
85.3M
                }
980
45.2M
                if (state->repeat)
981
1.33M
                    MARK_POP_DISCARD(ctx->lastmark);
982
45.2M
            }
983
57.2M
            RETURN_FAILURE;
984
985
4.76M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
4.76M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
4.76M
                   pattern[1], pattern[2]));
997
998
4.76M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
4.76M
            state->ptr = ptr;
1002
1003
4.76M
            if (pattern[1] == 0)
1004
4.76M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
4.76M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
4.76M
            } else {
1028
                /* general case */
1029
4.76M
                LASTMARK_SAVE();
1030
4.76M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
48.2M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
48.2M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
48.2M
                    state->ptr = ptr;
1036
48.2M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
48.2M
                            pattern+pattern[0]);
1038
48.2M
                    if (ret) {
1039
4.76M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
4.76M
                        RETURN_ON_ERROR(ret);
1042
4.76M
                        RETURN_SUCCESS;
1043
4.76M
                    }
1044
43.4M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
43.4M
                    LASTMARK_RESTORE();
1047
1048
43.4M
                    state->ptr = ptr;
1049
43.4M
                    ret = SRE(count)(state, pattern+3, 1);
1050
43.4M
                    RETURN_ON_ERROR(ret);
1051
43.4M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
43.4M
                    if (ret == 0)
1053
0
                        break;
1054
43.4M
                    assert(ret == 1);
1055
43.4M
                    ptr++;
1056
43.4M
                    ctx->count++;
1057
43.4M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
110M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
110M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
110M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
110M
            ctx->u.rep = repeat_pool_malloc(state);
1127
110M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
110M
            ctx->u.rep->count = -1;
1131
110M
            ctx->u.rep->pattern = pattern;
1132
110M
            ctx->u.rep->prev = state->repeat;
1133
110M
            ctx->u.rep->last_ptr = NULL;
1134
110M
            state->repeat = ctx->u.rep;
1135
1136
110M
            state->ptr = ptr;
1137
110M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
110M
            state->repeat = ctx->u.rep->prev;
1139
110M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
110M
            if (ret) {
1142
44.1M
                RETURN_ON_ERROR(ret);
1143
44.1M
                RETURN_SUCCESS;
1144
44.1M
            }
1145
65.8M
            RETURN_FAILURE;
1146
1147
213M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
213M
            ctx->u.rep = state->repeat;
1155
213M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
213M
            state->ptr = ptr;
1159
1160
213M
            ctx->count = ctx->u.rep->count+1;
1161
1162
213M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
213M
                   ptr, ctx->count));
1164
1165
213M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
213M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
18.1M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
195M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
195M
                ctx->u.rep->count = ctx->count;
1185
195M
                LASTMARK_SAVE();
1186
195M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
195M
                LAST_PTR_PUSH();
1189
195M
                ctx->u.rep->last_ptr = state->ptr;
1190
195M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
195M
                        ctx->u.rep->pattern+3);
1192
195M
                LAST_PTR_POP();
1193
195M
                if (ret) {
1194
102M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
102M
                    RETURN_ON_ERROR(ret);
1196
102M
                    RETURN_SUCCESS;
1197
102M
                }
1198
92.7M
                MARK_POP(ctx->lastmark);
1199
92.7M
                LASTMARK_RESTORE();
1200
92.7M
                ctx->u.rep->count = ctx->count-1;
1201
92.7M
                state->ptr = ptr;
1202
92.7M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
110M
            state->repeat = ctx->u.rep->prev;
1207
110M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
110M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
110M
            RETURN_ON_SUCCESS(ret);
1211
66.6M
            state->ptr = ptr;
1212
66.6M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
28.0M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
28.0M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
28.0M
                   ptr, pattern[1]));
1565
28.0M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
28.0M
            state->ptr = ptr - pattern[1];
1568
28.0M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
28.0M
            RETURN_ON_FAILURE(ret);
1570
22.4M
            pattern += pattern[0];
1571
22.4M
            DISPATCH;
1572
1573
28.8M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
28.8M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
28.8M
                   ptr, pattern[1]));
1578
28.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
28.8M
                state->ptr = ptr - pattern[1];
1580
28.8M
                LASTMARK_SAVE();
1581
28.8M
                if (state->repeat)
1582
28.8M
                    MARK_PUSH(ctx->lastmark);
1583
1584
57.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
57.7M
                if (ret) {
1586
22.4k
                    if (state->repeat)
1587
22.4k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
22.4k
                    RETURN_ON_ERROR(ret);
1589
22.4k
                    RETURN_FAILURE;
1590
22.4k
                }
1591
28.8M
                if (state->repeat)
1592
28.8M
                    MARK_POP(ctx->lastmark);
1593
28.8M
                LASTMARK_RESTORE();
1594
28.8M
            }
1595
28.8M
            pattern += pattern[0];
1596
28.8M
            DISPATCH;
1597
1598
28.8M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.45G
exit:
1620
1.45G
    ctx_pos = ctx->last_ctx_pos;
1621
1.45G
    jump = ctx->jump;
1622
1.45G
    DATA_POP_DISCARD(ctx);
1623
1.45G
    if (ctx_pos == -1) {
1624
534M
        state->sigcount = sigcount;
1625
534M
        return ret;
1626
534M
    }
1627
925M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
925M
    switch (jump) {
1630
195M
        case JUMP_MAX_UNTIL_2:
1631
195M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
195M
            goto jump_max_until_2;
1633
110M
        case JUMP_MAX_UNTIL_3:
1634
110M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
110M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
166M
        case JUMP_BRANCH:
1643
166M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
166M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
110M
        case JUMP_REPEAT:
1658
110M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
110M
            goto jump_repeat;
1660
14.2M
        case JUMP_REPEAT_ONE_1:
1661
14.2M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
14.2M
            goto jump_repeat_one_1;
1663
223M
        case JUMP_REPEAT_ONE_2:
1664
223M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
223M
            goto jump_repeat_one_2;
1666
48.2M
        case JUMP_MIN_REPEAT_ONE:
1667
48.2M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
48.2M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
28.0M
        case JUMP_ASSERT:
1673
28.0M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
28.0M
            goto jump_assert;
1675
28.8M
        case JUMP_ASSERT_NOT:
1676
28.8M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
28.8M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
925M
    }
1683
1684
0
    return ret; /* should never get here */
1685
925M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
196M
{
601
196M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
196M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
196M
    Py_ssize_t ret = 0;
604
196M
    int jump;
605
196M
    unsigned int sigcount = state->sigcount;
606
607
196M
    SRE(match_context)* ctx;
608
196M
    SRE(match_context)* nextctx;
609
196M
    INIT_TRACE(state);
610
611
196M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
196M
    DATA_ALLOC(SRE(match_context), ctx);
614
196M
    ctx->last_ctx_pos = -1;
615
196M
    ctx->jump = JUMP_NONE;
616
196M
    ctx->toplevel = toplevel;
617
196M
    ctx_pos = alloc_pos;
618
619
196M
#if USE_COMPUTED_GOTOS
620
196M
#include "sre_targets.h"
621
196M
#endif
622
623
499M
entrance:
624
625
499M
    ;  // Fashion statement.
626
499M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
499M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
40.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.66M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.66M
                   end - ptr, (size_t) pattern[3]));
634
4.66M
            RETURN_FAILURE;
635
4.66M
        }
636
35.5M
        pattern += pattern[1] + 1;
637
35.5M
    }
638
639
494M
#if USE_COMPUTED_GOTOS
640
494M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
494M
    {
647
648
494M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
212M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
212M
                   ptr, pattern[0]));
653
212M
            {
654
212M
                int i = pattern[0];
655
212M
                if (i & 1)
656
42.8M
                    state->lastindex = i/2 + 1;
657
212M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
208M
                    int j = state->lastmark + 1;
663
220M
                    while (j < i)
664
11.5M
                        state->mark[j++] = NULL;
665
208M
                    state->lastmark = i;
666
208M
                }
667
212M
                state->mark[i] = ptr;
668
212M
            }
669
212M
            pattern++;
670
212M
            DISPATCH;
671
672
212M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
86.0M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
86.0M
                   ptr, *pattern));
677
86.0M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
35.4M
                RETURN_FAILURE;
679
50.5M
            pattern++;
680
50.5M
            ptr++;
681
50.5M
            DISPATCH;
682
683
50.5M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
64.5M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
64.5M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
64.5M
            if (ctx->toplevel &&
698
27.0M
                ((state->match_all && ptr != state->end) ||
699
27.0M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
64.5M
            state->ptr = ptr;
704
64.5M
            RETURN_SUCCESS;
705
706
31.7M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
31.7M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
31.7M
            if (!SRE(at)(state, ptr, *pattern))
711
14.4M
                RETURN_FAILURE;
712
17.2M
            pattern++;
713
17.2M
            DISPATCH;
714
715
17.2M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
75.6M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
75.6M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
75.6M
            if (ptr >= end ||
749
75.3M
                !SRE(charset)(state, pattern + 1, *ptr))
750
15.9M
                RETURN_FAILURE;
751
59.6M
            pattern += pattern[0];
752
59.6M
            ptr++;
753
59.6M
            DISPATCH;
754
755
59.6M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
492k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
492k
                   pattern, ptr, pattern[0]));
758
492k
            if (ptr >= end ||
759
492k
                sre_lower_ascii(*ptr) != *pattern)
760
4.77k
                RETURN_FAILURE;
761
487k
            pattern++;
762
487k
            ptr++;
763
487k
            DISPATCH;
764
765
487k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
34.1M
        TARGET(SRE_OP_JUMP):
845
34.1M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
34.1M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
34.1M
                   ptr, pattern[0]));
850
34.1M
            pattern += pattern[0];
851
34.1M
            DISPATCH;
852
853
61.4M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
61.4M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
61.4M
            LASTMARK_SAVE();
858
61.4M
            if (state->repeat)
859
13.9M
                MARK_PUSH(ctx->lastmark);
860
173M
            for (; pattern[0]; pattern += pattern[0]) {
861
144M
                if (pattern[1] == SRE_OP_LITERAL &&
862
69.8M
                    (ptr >= end ||
863
69.7M
                     (SRE_CODE) *ptr != pattern[2]))
864
28.7M
                    continue;
865
115M
                if (pattern[1] == SRE_OP_IN &&
866
13.3M
                    (ptr >= end ||
867
13.2M
                     !SRE(charset)(state, pattern + 3,
868
13.2M
                                   (SRE_CODE) *ptr)))
869
7.26M
                    continue;
870
108M
                state->ptr = ptr;
871
108M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
108M
                if (ret) {
873
32.5M
                    if (state->repeat)
874
13.2M
                        MARK_POP_DISCARD(ctx->lastmark);
875
32.5M
                    RETURN_ON_ERROR(ret);
876
32.5M
                    RETURN_SUCCESS;
877
32.5M
                }
878
75.8M
                if (state->repeat)
879
5.41k
                    MARK_POP_KEEP(ctx->lastmark);
880
75.8M
                LASTMARK_RESTORE();
881
75.8M
            }
882
28.9M
            if (state->repeat)
883
767k
                MARK_POP_DISCARD(ctx->lastmark);
884
28.9M
            RETURN_FAILURE;
885
886
215M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
215M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
215M
                   pattern[1], pattern[2]));
898
899
215M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.01M
                RETURN_FAILURE; /* cannot match */
901
902
214M
            state->ptr = ptr;
903
904
214M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
214M
            RETURN_ON_ERROR(ret);
906
214M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
214M
            ctx->count = ret;
908
214M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
214M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
147M
                RETURN_FAILURE;
917
918
66.2M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
722k
                ptr == state->end &&
920
65.1k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
65.1k
            {
922
                /* tail is empty.  we're finished */
923
65.1k
                state->ptr = ptr;
924
65.1k
                RETURN_SUCCESS;
925
65.1k
            }
926
927
66.1M
            LASTMARK_SAVE();
928
66.1M
            if (state->repeat)
929
39.6M
                MARK_PUSH(ctx->lastmark);
930
931
66.1M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.69M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.69M
                for (;;) {
936
15.7M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
13.3M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
9.04M
                        ptr--;
939
9.04M
                        ctx->count--;
940
9.04M
                    }
941
6.69M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.40M
                        break;
943
4.29M
                    state->ptr = ptr;
944
4.29M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.29M
                            pattern+pattern[0]);
946
4.29M
                    if (ret) {
947
4.29M
                        if (state->repeat)
948
2.98M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.29M
                        RETURN_ON_ERROR(ret);
950
4.29M
                        RETURN_SUCCESS;
951
4.29M
                    }
952
152
                    if (state->repeat)
953
152
                        MARK_POP_KEEP(ctx->lastmark);
954
152
                    LASTMARK_RESTORE();
955
956
152
                    ptr--;
957
152
                    ctx->count--;
958
152
                }
959
2.40M
                if (state->repeat)
960
1.00M
                    MARK_POP_DISCARD(ctx->lastmark);
961
59.4M
            } else {
962
                /* general case */
963
77.7M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
68.4M
                    state->ptr = ptr;
965
68.4M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
68.4M
                            pattern+pattern[0]);
967
68.4M
                    if (ret) {
968
50.2M
                        if (state->repeat)
969
34.8M
                            MARK_POP_DISCARD(ctx->lastmark);
970
50.2M
                        RETURN_ON_ERROR(ret);
971
50.2M
                        RETURN_SUCCESS;
972
50.2M
                    }
973
18.2M
                    if (state->repeat)
974
1.14M
                        MARK_POP_KEEP(ctx->lastmark);
975
18.2M
                    LASTMARK_RESTORE();
976
977
18.2M
                    ptr--;
978
18.2M
                    ctx->count--;
979
18.2M
                }
980
9.24M
                if (state->repeat)
981
751k
                    MARK_POP_DISCARD(ctx->lastmark);
982
9.24M
            }
983
11.6M
            RETURN_FAILURE;
984
985
3.67M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
3.67M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
3.67M
                   pattern[1], pattern[2]));
997
998
3.67M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
3.67M
            state->ptr = ptr;
1002
1003
3.67M
            if (pattern[1] == 0)
1004
3.67M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
3.67M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
3.67M
            } else {
1028
                /* general case */
1029
3.67M
                LASTMARK_SAVE();
1030
3.67M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
11.7M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
11.7M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
11.7M
                    state->ptr = ptr;
1036
11.7M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
11.7M
                            pattern+pattern[0]);
1038
11.7M
                    if (ret) {
1039
3.67M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
3.67M
                        RETURN_ON_ERROR(ret);
1042
3.67M
                        RETURN_SUCCESS;
1043
3.67M
                    }
1044
8.11M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
8.11M
                    LASTMARK_RESTORE();
1047
1048
8.11M
                    state->ptr = ptr;
1049
8.11M
                    ret = SRE(count)(state, pattern+3, 1);
1050
8.11M
                    RETURN_ON_ERROR(ret);
1051
8.11M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
8.11M
                    if (ret == 0)
1053
0
                        break;
1054
8.11M
                    assert(ret == 1);
1055
8.11M
                    ptr++;
1056
8.11M
                    ctx->count++;
1057
8.11M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
24.2M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
24.2M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
24.2M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
24.2M
            ctx->u.rep = repeat_pool_malloc(state);
1127
24.2M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
24.2M
            ctx->u.rep->count = -1;
1131
24.2M
            ctx->u.rep->pattern = pattern;
1132
24.2M
            ctx->u.rep->prev = state->repeat;
1133
24.2M
            ctx->u.rep->last_ptr = NULL;
1134
24.2M
            state->repeat = ctx->u.rep;
1135
1136
24.2M
            state->ptr = ptr;
1137
24.2M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
24.2M
            state->repeat = ctx->u.rep->prev;
1139
24.2M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
24.2M
            if (ret) {
1142
11.7M
                RETURN_ON_ERROR(ret);
1143
11.7M
                RETURN_SUCCESS;
1144
11.7M
            }
1145
12.5M
            RETURN_FAILURE;
1146
1147
59.7M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
59.7M
            ctx->u.rep = state->repeat;
1155
59.7M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
59.7M
            state->ptr = ptr;
1159
1160
59.7M
            ctx->count = ctx->u.rep->count+1;
1161
1162
59.7M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
59.7M
                   ptr, ctx->count));
1164
1165
59.7M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
59.7M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
8.94M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
50.7M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
50.7M
                ctx->u.rep->count = ctx->count;
1185
50.7M
                LASTMARK_SAVE();
1186
50.7M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
50.7M
                LAST_PTR_PUSH();
1189
50.7M
                ctx->u.rep->last_ptr = state->ptr;
1190
50.7M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
50.7M
                        ctx->u.rep->pattern+3);
1192
50.7M
                LAST_PTR_POP();
1193
50.7M
                if (ret) {
1194
35.0M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
35.0M
                    RETURN_ON_ERROR(ret);
1196
35.0M
                    RETURN_SUCCESS;
1197
35.0M
                }
1198
15.7M
                MARK_POP(ctx->lastmark);
1199
15.7M
                LASTMARK_RESTORE();
1200
15.7M
                ctx->u.rep->count = ctx->count-1;
1201
15.7M
                state->ptr = ptr;
1202
15.7M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
24.7M
            state->repeat = ctx->u.rep->prev;
1207
24.7M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
24.7M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
24.7M
            RETURN_ON_SUCCESS(ret);
1211
13.0M
            state->ptr = ptr;
1212
13.0M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.70M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.70M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.70M
                   ptr, pattern[1]));
1565
3.70M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.70M
            state->ptr = ptr - pattern[1];
1568
3.70M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.70M
            RETURN_ON_FAILURE(ret);
1570
3.48M
            pattern += pattern[0];
1571
3.48M
            DISPATCH;
1572
1573
6.86M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
6.86M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
6.86M
                   ptr, pattern[1]));
1578
6.86M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
6.86M
                state->ptr = ptr - pattern[1];
1580
6.86M
                LASTMARK_SAVE();
1581
6.86M
                if (state->repeat)
1582
6.86M
                    MARK_PUSH(ctx->lastmark);
1583
1584
13.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
13.7M
                if (ret) {
1586
1.39k
                    if (state->repeat)
1587
1.39k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.39k
                    RETURN_ON_ERROR(ret);
1589
1.39k
                    RETURN_FAILURE;
1590
1.39k
                }
1591
6.86M
                if (state->repeat)
1592
6.86M
                    MARK_POP(ctx->lastmark);
1593
6.86M
                LASTMARK_RESTORE();
1594
6.86M
            }
1595
6.86M
            pattern += pattern[0];
1596
6.86M
            DISPATCH;
1597
1598
6.86M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
499M
exit:
1620
499M
    ctx_pos = ctx->last_ctx_pos;
1621
499M
    jump = ctx->jump;
1622
499M
    DATA_POP_DISCARD(ctx);
1623
499M
    if (ctx_pos == -1) {
1624
196M
        state->sigcount = sigcount;
1625
196M
        return ret;
1626
196M
    }
1627
303M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
303M
    switch (jump) {
1630
50.7M
        case JUMP_MAX_UNTIL_2:
1631
50.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
50.7M
            goto jump_max_until_2;
1633
24.7M
        case JUMP_MAX_UNTIL_3:
1634
24.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
24.7M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
108M
        case JUMP_BRANCH:
1643
108M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
108M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
24.2M
        case JUMP_REPEAT:
1658
24.2M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
24.2M
            goto jump_repeat;
1660
4.29M
        case JUMP_REPEAT_ONE_1:
1661
4.29M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.29M
            goto jump_repeat_one_1;
1663
68.4M
        case JUMP_REPEAT_ONE_2:
1664
68.4M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
68.4M
            goto jump_repeat_one_2;
1666
11.7M
        case JUMP_MIN_REPEAT_ONE:
1667
11.7M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
11.7M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.70M
        case JUMP_ASSERT:
1673
3.70M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.70M
            goto jump_assert;
1675
6.86M
        case JUMP_ASSERT_NOT:
1676
6.86M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
6.86M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
303M
    }
1683
1684
0
    return ret; /* should never get here */
1685
303M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
250M
{
601
250M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
250M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
250M
    Py_ssize_t ret = 0;
604
250M
    int jump;
605
250M
    unsigned int sigcount = state->sigcount;
606
607
250M
    SRE(match_context)* ctx;
608
250M
    SRE(match_context)* nextctx;
609
250M
    INIT_TRACE(state);
610
611
250M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
250M
    DATA_ALLOC(SRE(match_context), ctx);
614
250M
    ctx->last_ctx_pos = -1;
615
250M
    ctx->jump = JUMP_NONE;
616
250M
    ctx->toplevel = toplevel;
617
250M
    ctx_pos = alloc_pos;
618
619
250M
#if USE_COMPUTED_GOTOS
620
250M
#include "sre_targets.h"
621
250M
#endif
622
623
629M
entrance:
624
625
629M
    ;  // Fashion statement.
626
629M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
629M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
15.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
137k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
137k
                   end - ptr, (size_t) pattern[3]));
634
137k
            RETURN_FAILURE;
635
137k
        }
636
15.2M
        pattern += pattern[1] + 1;
637
15.2M
    }
638
639
628M
#if USE_COMPUTED_GOTOS
640
628M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
628M
    {
647
648
628M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
297M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
297M
                   ptr, pattern[0]));
653
297M
            {
654
297M
                int i = pattern[0];
655
297M
                if (i & 1)
656
51.7M
                    state->lastindex = i/2 + 1;
657
297M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
296M
                    int j = state->lastmark + 1;
663
299M
                    while (j < i)
664
3.46M
                        state->mark[j++] = NULL;
665
296M
                    state->lastmark = i;
666
296M
                }
667
297M
                state->mark[i] = ptr;
668
297M
            }
669
297M
            pattern++;
670
297M
            DISPATCH;
671
672
297M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
38.3M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
38.3M
                   ptr, *pattern));
677
38.3M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
19.4M
                RETURN_FAILURE;
679
18.8M
            pattern++;
680
18.8M
            ptr++;
681
18.8M
            DISPATCH;
682
683
18.8M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
65.2M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
65.2M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
65.2M
            if (ctx->toplevel &&
698
10.4M
                ((state->match_all && ptr != state->end) ||
699
10.4M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
65.2M
            state->ptr = ptr;
704
65.2M
            RETURN_SUCCESS;
705
706
50.4M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
50.4M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
50.4M
            if (!SRE(at)(state, ptr, *pattern))
711
48.1M
                RETURN_FAILURE;
712
2.31M
            pattern++;
713
2.31M
            DISPATCH;
714
715
2.31M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
133M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
133M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
133M
            if (ptr >= end ||
749
132M
                !SRE(charset)(state, pattern + 1, *ptr))
750
55.2M
                RETURN_FAILURE;
751
77.9M
            pattern += pattern[0];
752
77.9M
            ptr++;
753
77.9M
            DISPATCH;
754
755
77.9M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.91M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.91M
                   pattern, ptr, pattern[0]));
758
2.91M
            if (ptr >= end ||
759
2.91M
                sre_lower_ascii(*ptr) != *pattern)
760
31.2k
                RETURN_FAILURE;
761
2.88M
            pattern++;
762
2.88M
            ptr++;
763
2.88M
            DISPATCH;
764
765
2.88M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
24.4M
        TARGET(SRE_OP_JUMP):
845
24.4M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
24.4M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
24.4M
                   ptr, pattern[0]));
850
24.4M
            pattern += pattern[0];
851
24.4M
            DISPATCH;
852
853
30.8M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
30.8M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
30.8M
            LASTMARK_SAVE();
858
30.8M
            if (state->repeat)
859
23.7M
                MARK_PUSH(ctx->lastmark);
860
67.3M
            for (; pattern[0]; pattern += pattern[0]) {
861
59.2M
                if (pattern[1] == SRE_OP_LITERAL &&
862
27.9M
                    (ptr >= end ||
863
27.9M
                     (SRE_CODE) *ptr != pattern[2]))
864
17.2M
                    continue;
865
42.0M
                if (pattern[1] == SRE_OP_IN &&
866
20.8M
                    (ptr >= end ||
867
20.8M
                     !SRE(charset)(state, pattern + 3,
868
20.8M
                                   (SRE_CODE) *ptr)))
869
11.2M
                    continue;
870
30.7M
                state->ptr = ptr;
871
30.7M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
30.7M
                if (ret) {
873
22.8M
                    if (state->repeat)
874
20.2M
                        MARK_POP_DISCARD(ctx->lastmark);
875
22.8M
                    RETURN_ON_ERROR(ret);
876
22.8M
                    RETURN_SUCCESS;
877
22.8M
                }
878
7.94M
                if (state->repeat)
879
14.6k
                    MARK_POP_KEEP(ctx->lastmark);
880
7.94M
                LASTMARK_RESTORE();
881
7.94M
            }
882
8.05M
            if (state->repeat)
883
3.57M
                MARK_POP_DISCARD(ctx->lastmark);
884
8.05M
            RETURN_FAILURE;
885
886
259M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
259M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
259M
                   pattern[1], pattern[2]));
898
899
259M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
203k
                RETURN_FAILURE; /* cannot match */
901
902
258M
            state->ptr = ptr;
903
904
258M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
258M
            RETURN_ON_ERROR(ret);
906
258M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
258M
            ctx->count = ret;
908
258M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
258M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
183M
                RETURN_FAILURE;
917
918
74.9M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
3.87M
                ptr == state->end &&
920
19.0k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
19.0k
            {
922
                /* tail is empty.  we're finished */
923
19.0k
                state->ptr = ptr;
924
19.0k
                RETURN_SUCCESS;
925
19.0k
            }
926
927
74.9M
            LASTMARK_SAVE();
928
74.9M
            if (state->repeat)
929
26.1M
                MARK_PUSH(ctx->lastmark);
930
931
74.9M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.87M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.87M
                for (;;) {
936
12.3M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
9.89M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
6.43M
                        ptr--;
939
6.43M
                        ctx->count--;
940
6.43M
                    }
941
5.87M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.42M
                        break;
943
3.45M
                    state->ptr = ptr;
944
3.45M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.45M
                            pattern+pattern[0]);
946
3.45M
                    if (ret) {
947
3.45M
                        if (state->repeat)
948
3.43M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.45M
                        RETURN_ON_ERROR(ret);
950
3.45M
                        RETURN_SUCCESS;
951
3.45M
                    }
952
254
                    if (state->repeat)
953
254
                        MARK_POP_KEEP(ctx->lastmark);
954
254
                    LASTMARK_RESTORE();
955
956
254
                    ptr--;
957
254
                    ctx->count--;
958
254
                }
959
2.42M
                if (state->repeat)
960
2.41M
                    MARK_POP_DISCARD(ctx->lastmark);
961
69.0M
            } else {
962
                /* general case */
963
123M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
92.4M
                    state->ptr = ptr;
965
92.4M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
92.4M
                            pattern+pattern[0]);
967
92.4M
                    if (ret) {
968
37.9M
                        if (state->repeat)
969
19.8M
                            MARK_POP_DISCARD(ctx->lastmark);
970
37.9M
                        RETURN_ON_ERROR(ret);
971
37.9M
                        RETURN_SUCCESS;
972
37.9M
                    }
973
54.4M
                    if (state->repeat)
974
706k
                        MARK_POP_KEEP(ctx->lastmark);
975
54.4M
                    LASTMARK_RESTORE();
976
977
54.4M
                    ptr--;
978
54.4M
                    ctx->count--;
979
54.4M
                }
980
31.1M
                if (state->repeat)
981
415k
                    MARK_POP_DISCARD(ctx->lastmark);
982
31.1M
            }
983
33.5M
            RETURN_FAILURE;
984
985
1.08M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
1.08M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
1.08M
                   pattern[1], pattern[2]));
997
998
1.08M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
1.08M
            state->ptr = ptr;
1002
1003
1.08M
            if (pattern[1] == 0)
1004
1.08M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
1.08M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
1.08M
            } else {
1028
                /* general case */
1029
1.08M
                LASTMARK_SAVE();
1030
1.08M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
31.7M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
31.7M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
31.7M
                    state->ptr = ptr;
1036
31.7M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
31.7M
                            pattern+pattern[0]);
1038
31.7M
                    if (ret) {
1039
1.08M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
1.08M
                        RETURN_ON_ERROR(ret);
1042
1.08M
                        RETURN_SUCCESS;
1043
1.08M
                    }
1044
30.6M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
30.6M
                    LASTMARK_RESTORE();
1047
1048
30.6M
                    state->ptr = ptr;
1049
30.6M
                    ret = SRE(count)(state, pattern+3, 1);
1050
30.6M
                    RETURN_ON_ERROR(ret);
1051
30.6M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
30.6M
                    if (ret == 0)
1053
0
                        break;
1054
30.6M
                    assert(ret == 1);
1055
30.6M
                    ptr++;
1056
30.6M
                    ctx->count++;
1057
30.6M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
57.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
57.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
57.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
57.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
57.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
57.8M
            ctx->u.rep->count = -1;
1131
57.8M
            ctx->u.rep->pattern = pattern;
1132
57.8M
            ctx->u.rep->prev = state->repeat;
1133
57.8M
            ctx->u.rep->last_ptr = NULL;
1134
57.8M
            state->repeat = ctx->u.rep;
1135
1136
57.8M
            state->ptr = ptr;
1137
57.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
57.8M
            state->repeat = ctx->u.rep->prev;
1139
57.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
57.8M
            if (ret) {
1142
12.1M
                RETURN_ON_ERROR(ret);
1143
12.1M
                RETURN_SUCCESS;
1144
12.1M
            }
1145
45.6M
            RETURN_FAILURE;
1146
1147
88.6M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
88.6M
            ctx->u.rep = state->repeat;
1155
88.6M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
88.6M
            state->ptr = ptr;
1159
1160
88.6M
            ctx->count = ctx->u.rep->count+1;
1161
1162
88.6M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
88.6M
                   ptr, ctx->count));
1164
1165
88.6M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
88.6M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.53M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
85.0M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
85.0M
                ctx->u.rep->count = ctx->count;
1185
85.0M
                LASTMARK_SAVE();
1186
85.0M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
85.0M
                LAST_PTR_PUSH();
1189
85.0M
                ctx->u.rep->last_ptr = state->ptr;
1190
85.0M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
85.0M
                        ctx->u.rep->pattern+3);
1192
85.0M
                LAST_PTR_POP();
1193
85.0M
                if (ret) {
1194
30.5M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
30.5M
                    RETURN_ON_ERROR(ret);
1196
30.5M
                    RETURN_SUCCESS;
1197
30.5M
                }
1198
54.5M
                MARK_POP(ctx->lastmark);
1199
54.5M
                LASTMARK_RESTORE();
1200
54.5M
                ctx->u.rep->count = ctx->count-1;
1201
54.5M
                state->ptr = ptr;
1202
54.5M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
58.0M
            state->repeat = ctx->u.rep->prev;
1207
58.0M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
58.0M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
58.0M
            RETURN_ON_SUCCESS(ret);
1211
45.9M
            state->ptr = ptr;
1212
45.9M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
9.15M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
9.15M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
9.15M
                   ptr, pattern[1]));
1565
9.15M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
9.15M
            state->ptr = ptr - pattern[1];
1568
9.15M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
9.15M
            RETURN_ON_FAILURE(ret);
1570
5.91M
            pattern += pattern[0];
1571
5.91M
            DISPATCH;
1572
1573
10.3M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
10.3M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
10.3M
                   ptr, pattern[1]));
1578
10.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
10.3M
                state->ptr = ptr - pattern[1];
1580
10.3M
                LASTMARK_SAVE();
1581
10.3M
                if (state->repeat)
1582
10.3M
                    MARK_PUSH(ctx->lastmark);
1583
1584
20.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
20.7M
                if (ret) {
1586
14.3k
                    if (state->repeat)
1587
14.3k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
14.3k
                    RETURN_ON_ERROR(ret);
1589
14.3k
                    RETURN_FAILURE;
1590
14.3k
                }
1591
10.3M
                if (state->repeat)
1592
10.3M
                    MARK_POP(ctx->lastmark);
1593
10.3M
                LASTMARK_RESTORE();
1594
10.3M
            }
1595
10.3M
            pattern += pattern[0];
1596
10.3M
            DISPATCH;
1597
1598
10.3M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
629M
exit:
1620
629M
    ctx_pos = ctx->last_ctx_pos;
1621
629M
    jump = ctx->jump;
1622
629M
    DATA_POP_DISCARD(ctx);
1623
629M
    if (ctx_pos == -1) {
1624
250M
        state->sigcount = sigcount;
1625
250M
        return ret;
1626
250M
    }
1627
378M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
378M
    switch (jump) {
1630
85.0M
        case JUMP_MAX_UNTIL_2:
1631
85.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
85.0M
            goto jump_max_until_2;
1633
58.0M
        case JUMP_MAX_UNTIL_3:
1634
58.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
58.0M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
30.7M
        case JUMP_BRANCH:
1643
30.7M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
30.7M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
57.8M
        case JUMP_REPEAT:
1658
57.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
57.8M
            goto jump_repeat;
1660
3.45M
        case JUMP_REPEAT_ONE_1:
1661
3.45M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.45M
            goto jump_repeat_one_1;
1663
92.4M
        case JUMP_REPEAT_ONE_2:
1664
92.4M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
92.4M
            goto jump_repeat_one_2;
1666
31.7M
        case JUMP_MIN_REPEAT_ONE:
1667
31.7M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
31.7M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
9.15M
        case JUMP_ASSERT:
1673
9.15M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
9.15M
            goto jump_assert;
1675
10.3M
        case JUMP_ASSERT_NOT:
1676
10.3M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
10.3M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
378M
    }
1683
1684
0
    return ret; /* should never get here */
1685
378M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
88.3M
{
601
88.3M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
88.3M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
88.3M
    Py_ssize_t ret = 0;
604
88.3M
    int jump;
605
88.3M
    unsigned int sigcount = state->sigcount;
606
607
88.3M
    SRE(match_context)* ctx;
608
88.3M
    SRE(match_context)* nextctx;
609
88.3M
    INIT_TRACE(state);
610
611
88.3M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
88.3M
    DATA_ALLOC(SRE(match_context), ctx);
614
88.3M
    ctx->last_ctx_pos = -1;
615
88.3M
    ctx->jump = JUMP_NONE;
616
88.3M
    ctx->toplevel = toplevel;
617
88.3M
    ctx_pos = alloc_pos;
618
619
88.3M
#if USE_COMPUTED_GOTOS
620
88.3M
#include "sre_targets.h"
621
88.3M
#endif
622
623
331M
entrance:
624
625
331M
    ;  // Fashion statement.
626
331M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
331M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
10.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.73k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.73k
                   end - ptr, (size_t) pattern[3]));
634
4.73k
            RETURN_FAILURE;
635
4.73k
        }
636
10.6M
        pattern += pattern[1] + 1;
637
10.6M
    }
638
639
331M
#if USE_COMPUTED_GOTOS
640
331M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
331M
    {
647
648
331M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
120M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
120M
                   ptr, pattern[0]));
653
120M
            {
654
120M
                int i = pattern[0];
655
120M
                if (i & 1)
656
24.7M
                    state->lastindex = i/2 + 1;
657
120M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
118M
                    int j = state->lastmark + 1;
663
120M
                    while (j < i)
664
2.21M
                        state->mark[j++] = NULL;
665
118M
                    state->lastmark = i;
666
118M
                }
667
120M
                state->mark[i] = ptr;
668
120M
            }
669
120M
            pattern++;
670
120M
            DISPATCH;
671
672
120M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
30.6M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
30.6M
                   ptr, *pattern));
677
30.6M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
12.1M
                RETURN_FAILURE;
679
18.4M
            pattern++;
680
18.4M
            ptr++;
681
18.4M
            DISPATCH;
682
683
18.4M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
29.1M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
29.1M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
29.1M
            if (ctx->toplevel &&
698
8.86M
                ((state->match_all && ptr != state->end) ||
699
8.86M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
29.1M
            state->ptr = ptr;
704
29.1M
            RETURN_SUCCESS;
705
706
8.75M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
8.75M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
8.75M
            if (!SRE(at)(state, ptr, *pattern))
711
8.72M
                RETURN_FAILURE;
712
29.3k
            pattern++;
713
29.3k
            DISPATCH;
714
715
29.3k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
64.9M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
64.9M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
64.9M
            if (ptr >= end ||
749
64.9M
                !SRE(charset)(state, pattern + 1, *ptr))
750
15.2M
                RETURN_FAILURE;
751
49.7M
            pattern += pattern[0];
752
49.7M
            ptr++;
753
49.7M
            DISPATCH;
754
755
49.7M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.86M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.86M
                   pattern, ptr, pattern[0]));
758
2.86M
            if (ptr >= end ||
759
2.86M
                sre_lower_ascii(*ptr) != *pattern)
760
46.6k
                RETURN_FAILURE;
761
2.81M
            pattern++;
762
2.81M
            ptr++;
763
2.81M
            DISPATCH;
764
765
2.81M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
26.3M
        TARGET(SRE_OP_JUMP):
845
26.3M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
26.3M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
26.3M
                   ptr, pattern[0]));
850
26.3M
            pattern += pattern[0];
851
26.3M
            DISPATCH;
852
853
32.8M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
32.8M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
32.8M
            LASTMARK_SAVE();
858
32.8M
            if (state->repeat)
859
28.4M
                MARK_PUSH(ctx->lastmark);
860
70.7M
            for (; pattern[0]; pattern += pattern[0]) {
861
63.7M
                if (pattern[1] == SRE_OP_LITERAL &&
862
32.9M
                    (ptr >= end ||
863
32.9M
                     (SRE_CODE) *ptr != pattern[2]))
864
23.3M
                    continue;
865
40.4M
                if (pattern[1] == SRE_OP_IN &&
866
23.0M
                    (ptr >= end ||
867
23.0M
                     !SRE(charset)(state, pattern + 3,
868
23.0M
                                   (SRE_CODE) *ptr)))
869
13.4M
                    continue;
870
26.9M
                state->ptr = ptr;
871
26.9M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
26.9M
                if (ret) {
873
25.8M
                    if (state->repeat)
874
22.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
25.8M
                    RETURN_ON_ERROR(ret);
876
25.8M
                    RETURN_SUCCESS;
877
25.8M
                }
878
1.11M
                if (state->repeat)
879
6.97k
                    MARK_POP_KEEP(ctx->lastmark);
880
1.11M
                LASTMARK_RESTORE();
881
1.11M
            }
882
7.02M
            if (state->repeat)
883
6.30M
                MARK_POP_DISCARD(ctx->lastmark);
884
7.02M
            RETURN_FAILURE;
885
886
137M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
137M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
137M
                   pattern[1], pattern[2]));
898
899
137M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
19.2k
                RETURN_FAILURE; /* cannot match */
901
902
137M
            state->ptr = ptr;
903
904
137M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
137M
            RETURN_ON_ERROR(ret);
906
137M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
137M
            ctx->count = ret;
908
137M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
137M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
69.4M
                RETURN_FAILURE;
917
918
68.4M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.69M
                ptr == state->end &&
920
3.43k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.43k
            {
922
                /* tail is empty.  we're finished */
923
3.43k
                state->ptr = ptr;
924
3.43k
                RETURN_SUCCESS;
925
3.43k
            }
926
927
68.4M
            LASTMARK_SAVE();
928
68.4M
            if (state->repeat)
929
46.4M
                MARK_PUSH(ctx->lastmark);
930
931
68.4M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
13.7M
                ctx->u.chr = pattern[pattern[0]+1];
935
13.7M
                for (;;) {
936
35.5M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
28.3M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
21.8M
                        ptr--;
939
21.8M
                        ctx->count--;
940
21.8M
                    }
941
13.7M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
7.20M
                        break;
943
6.50M
                    state->ptr = ptr;
944
6.50M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
6.50M
                            pattern+pattern[0]);
946
6.50M
                    if (ret) {
947
6.50M
                        if (state->repeat)
948
6.50M
                            MARK_POP_DISCARD(ctx->lastmark);
949
6.50M
                        RETURN_ON_ERROR(ret);
950
6.50M
                        RETURN_SUCCESS;
951
6.50M
                    }
952
293
                    if (state->repeat)
953
293
                        MARK_POP_KEEP(ctx->lastmark);
954
293
                    LASTMARK_RESTORE();
955
956
293
                    ptr--;
957
293
                    ctx->count--;
958
293
                }
959
7.20M
                if (state->repeat)
960
7.20M
                    MARK_POP_DISCARD(ctx->lastmark);
961
54.7M
            } else {
962
                /* general case */
963
67.4M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
62.5M
                    state->ptr = ptr;
965
62.5M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
62.5M
                            pattern+pattern[0]);
967
62.5M
                    if (ret) {
968
49.8M
                        if (state->repeat)
969
32.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
49.8M
                        RETURN_ON_ERROR(ret);
971
49.8M
                        RETURN_SUCCESS;
972
49.8M
                    }
973
12.7M
                    if (state->repeat)
974
244k
                        MARK_POP_KEEP(ctx->lastmark);
975
12.7M
                    LASTMARK_RESTORE();
976
977
12.7M
                    ptr--;
978
12.7M
                    ctx->count--;
979
12.7M
                }
980
4.88M
                if (state->repeat)
981
166k
                    MARK_POP_DISCARD(ctx->lastmark);
982
4.88M
            }
983
12.0M
            RETURN_FAILURE;
984
985
11.1k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
11.1k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
11.1k
                   pattern[1], pattern[2]));
997
998
11.1k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
11.1k
            state->ptr = ptr;
1002
1003
11.1k
            if (pattern[1] == 0)
1004
11.1k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
11.1k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
11.1k
            } else {
1028
                /* general case */
1029
11.1k
                LASTMARK_SAVE();
1030
11.1k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
4.69M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
4.69M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
4.69M
                    state->ptr = ptr;
1036
4.69M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
4.69M
                            pattern+pattern[0]);
1038
4.69M
                    if (ret) {
1039
11.1k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
11.1k
                        RETURN_ON_ERROR(ret);
1042
11.1k
                        RETURN_SUCCESS;
1043
11.1k
                    }
1044
4.68M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
4.68M
                    LASTMARK_RESTORE();
1047
1048
4.68M
                    state->ptr = ptr;
1049
4.68M
                    ret = SRE(count)(state, pattern+3, 1);
1050
4.68M
                    RETURN_ON_ERROR(ret);
1051
4.68M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
4.68M
                    if (ret == 0)
1053
0
                        break;
1054
4.68M
                    assert(ret == 1);
1055
4.68M
                    ptr++;
1056
4.68M
                    ctx->count++;
1057
4.68M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
27.9M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
27.9M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
27.9M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
27.9M
            ctx->u.rep = repeat_pool_malloc(state);
1127
27.9M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
27.9M
            ctx->u.rep->count = -1;
1131
27.9M
            ctx->u.rep->pattern = pattern;
1132
27.9M
            ctx->u.rep->prev = state->repeat;
1133
27.9M
            ctx->u.rep->last_ptr = NULL;
1134
27.9M
            state->repeat = ctx->u.rep;
1135
1136
27.9M
            state->ptr = ptr;
1137
27.9M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
27.9M
            state->repeat = ctx->u.rep->prev;
1139
27.9M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
27.9M
            if (ret) {
1142
20.3M
                RETURN_ON_ERROR(ret);
1143
20.3M
                RETURN_SUCCESS;
1144
20.3M
            }
1145
7.59M
            RETURN_FAILURE;
1146
1147
65.2M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
65.2M
            ctx->u.rep = state->repeat;
1155
65.2M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
65.2M
            state->ptr = ptr;
1159
1160
65.2M
            ctx->count = ctx->u.rep->count+1;
1161
1162
65.2M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
65.2M
                   ptr, ctx->count));
1164
1165
65.2M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
65.2M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
5.62M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
59.5M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
59.5M
                ctx->u.rep->count = ctx->count;
1185
59.5M
                LASTMARK_SAVE();
1186
59.5M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
59.5M
                LAST_PTR_PUSH();
1189
59.5M
                ctx->u.rep->last_ptr = state->ptr;
1190
59.5M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
59.5M
                        ctx->u.rep->pattern+3);
1192
59.5M
                LAST_PTR_POP();
1193
59.5M
                if (ret) {
1194
37.2M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
37.2M
                    RETURN_ON_ERROR(ret);
1196
37.2M
                    RETURN_SUCCESS;
1197
37.2M
                }
1198
22.3M
                MARK_POP(ctx->lastmark);
1199
22.3M
                LASTMARK_RESTORE();
1200
22.3M
                ctx->u.rep->count = ctx->count-1;
1201
22.3M
                state->ptr = ptr;
1202
22.3M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
28.0M
            state->repeat = ctx->u.rep->prev;
1207
28.0M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
28.0M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
28.0M
            RETURN_ON_SUCCESS(ret);
1211
7.66M
            state->ptr = ptr;
1212
7.66M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
15.1M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
15.1M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
15.1M
                   ptr, pattern[1]));
1565
15.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
15.1M
            state->ptr = ptr - pattern[1];
1568
15.1M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
15.1M
            RETURN_ON_FAILURE(ret);
1570
13.0M
            pattern += pattern[0];
1571
13.0M
            DISPATCH;
1572
1573
13.0M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
11.6M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
11.6M
                   ptr, pattern[1]));
1578
11.6M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
11.6M
                state->ptr = ptr - pattern[1];
1580
11.6M
                LASTMARK_SAVE();
1581
11.6M
                if (state->repeat)
1582
11.6M
                    MARK_PUSH(ctx->lastmark);
1583
1584
23.2M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
23.2M
                if (ret) {
1586
6.64k
                    if (state->repeat)
1587
6.64k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
6.64k
                    RETURN_ON_ERROR(ret);
1589
6.64k
                    RETURN_FAILURE;
1590
6.64k
                }
1591
11.6M
                if (state->repeat)
1592
11.6M
                    MARK_POP(ctx->lastmark);
1593
11.6M
                LASTMARK_RESTORE();
1594
11.6M
            }
1595
11.6M
            pattern += pattern[0];
1596
11.6M
            DISPATCH;
1597
1598
11.6M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
331M
exit:
1620
331M
    ctx_pos = ctx->last_ctx_pos;
1621
331M
    jump = ctx->jump;
1622
331M
    DATA_POP_DISCARD(ctx);
1623
331M
    if (ctx_pos == -1) {
1624
88.3M
        state->sigcount = sigcount;
1625
88.3M
        return ret;
1626
88.3M
    }
1627
242M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
242M
    switch (jump) {
1630
59.5M
        case JUMP_MAX_UNTIL_2:
1631
59.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
59.5M
            goto jump_max_until_2;
1633
28.0M
        case JUMP_MAX_UNTIL_3:
1634
28.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
28.0M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
26.9M
        case JUMP_BRANCH:
1643
26.9M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
26.9M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
27.9M
        case JUMP_REPEAT:
1658
27.9M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
27.9M
            goto jump_repeat;
1660
6.50M
        case JUMP_REPEAT_ONE_1:
1661
6.50M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
6.50M
            goto jump_repeat_one_1;
1663
62.5M
        case JUMP_REPEAT_ONE_2:
1664
62.5M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
62.5M
            goto jump_repeat_one_2;
1666
4.69M
        case JUMP_MIN_REPEAT_ONE:
1667
4.69M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
4.69M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
15.1M
        case JUMP_ASSERT:
1673
15.1M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
15.1M
            goto jump_assert;
1675
11.6M
        case JUMP_ASSERT_NOT:
1676
11.6M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
11.6M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
242M
    }
1683
1684
0
    return ret; /* should never get here */
1685
242M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
372M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
107M
{
1694
107M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
107M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
107M
    Py_ssize_t status = 0;
1697
107M
    Py_ssize_t prefix_len = 0;
1698
107M
    Py_ssize_t prefix_skip = 0;
1699
107M
    SRE_CODE* prefix = NULL;
1700
107M
    SRE_CODE* charset = NULL;
1701
107M
    SRE_CODE* overlap = NULL;
1702
107M
    int flags = 0;
1703
107M
    INIT_TRACE(state);
1704
1705
107M
    if (ptr > end)
1706
0
        return 0;
1707
1708
107M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
107M
        flags = pattern[2];
1713
1714
107M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.22M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.22M
                   end - ptr, (size_t) pattern[3]));
1717
6.22M
            return 0;
1718
6.22M
        }
1719
101M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
9.42M
            end -= pattern[3] - 1;
1723
9.42M
            if (end <= ptr)
1724
0
                end = ptr;
1725
9.42M
        }
1726
1727
101M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
9.43M
            prefix_len = pattern[5];
1731
9.43M
            prefix_skip = pattern[6];
1732
9.43M
            prefix = pattern + 7;
1733
9.43M
            overlap = prefix + prefix_len - 1;
1734
92.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
82.0M
            charset = pattern + 5;
1738
1739
101M
        pattern += 1 + pattern[1];
1740
101M
    }
1741
1742
101M
    TRACE(("prefix = %p %zd %zd\n",
1743
101M
           prefix, prefix_len, prefix_skip));
1744
101M
    TRACE(("charset = %p\n", charset));
1745
1746
101M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
8.73M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
5.18M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
5.18M
#endif
1753
5.18M
        end = (SRE_CHAR *)state->end;
1754
5.18M
        state->must_advance = 0;
1755
9.74M
        while (ptr < end) {
1756
105M
            while (*ptr != c) {
1757
97.3M
                if (++ptr >= end)
1758
1.22M
                    return 0;
1759
97.3M
            }
1760
8.42M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
8.42M
            state->start = ptr;
1762
8.42M
            state->ptr = ptr + prefix_skip;
1763
8.42M
            if (flags & SRE_INFO_LITERAL)
1764
11.9k
                return 1; /* we got all of it */
1765
8.40M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
8.40M
            if (status != 0)
1767
7.39M
                return status;
1768
1.00M
            ++ptr;
1769
1.00M
            RESET_CAPTURE_GROUP();
1770
1.00M
        }
1771
97.3k
        return 0;
1772
5.18M
    }
1773
1774
92.8M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
701k
        Py_ssize_t i = 0;
1778
1779
701k
        end = (SRE_CHAR *)state->end;
1780
701k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.18M
        for (i = 0; i < prefix_len; i++)
1784
788k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
394k
#endif
1787
1.41M
        while (ptr < end) {
1788
1.41M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
8.63M
            while (*ptr++ != c) {
1790
7.21M
                if (ptr >= end)
1791
302
                    return 0;
1792
7.21M
            }
1793
1.41M
            if (ptr >= end)
1794
60
                return 0;
1795
1796
1.41M
            i = 1;
1797
1.41M
            state->must_advance = 0;
1798
1.41M
            do {
1799
1.41M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.31M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.31M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.31M
                    state->start = ptr - (prefix_len - 1);
1808
1.31M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.31M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.31M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.31M
                    if (status != 0)
1813
701k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
616k
                    if (++ptr >= end)
1816
65
                        return 0;
1817
616k
                    RESET_CAPTURE_GROUP();
1818
616k
                }
1819
717k
                i = overlap[i];
1820
717k
            } while (i != 0);
1821
1.41M
        }
1822
0
        return 0;
1823
701k
    }
1824
1825
92.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
82.0M
        end = (SRE_CHAR *)state->end;
1828
82.0M
        state->must_advance = 0;
1829
85.7M
        for (;;) {
1830
364M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
278M
                ptr++;
1832
85.7M
            if (ptr >= end)
1833
4.17M
                return 0;
1834
81.6M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
81.6M
            state->start = ptr;
1836
81.6M
            state->ptr = ptr;
1837
81.6M
            status = SRE(match)(state, pattern, 0);
1838
81.6M
            if (status != 0)
1839
77.8M
                break;
1840
3.72M
            ptr++;
1841
3.72M
            RESET_CAPTURE_GROUP();
1842
3.72M
        }
1843
82.0M
    } else {
1844
        /* general case */
1845
10.0M
        assert(ptr <= end);
1846
10.0M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
10.0M
        state->start = state->ptr = ptr;
1848
10.0M
        status = SRE(match)(state, pattern, 1);
1849
10.0M
        state->must_advance = 0;
1850
10.0M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
5.02M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
71
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
5.02M
        {
1854
5.02M
            state->start = state->ptr = ptr = end;
1855
5.02M
            return 0;
1856
5.02M
        }
1857
372M
        while (status == 0 && ptr < end) {
1858
367M
            ptr++;
1859
367M
            RESET_CAPTURE_GROUP();
1860
367M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
367M
            state->start = state->ptr = ptr;
1862
367M
            status = SRE(match)(state, pattern, 0);
1863
367M
        }
1864
5.07M
    }
1865
1866
82.9M
    return status;
1867
92.1M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
48.7M
{
1694
48.7M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
48.7M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
48.7M
    Py_ssize_t status = 0;
1697
48.7M
    Py_ssize_t prefix_len = 0;
1698
48.7M
    Py_ssize_t prefix_skip = 0;
1699
48.7M
    SRE_CODE* prefix = NULL;
1700
48.7M
    SRE_CODE* charset = NULL;
1701
48.7M
    SRE_CODE* overlap = NULL;
1702
48.7M
    int flags = 0;
1703
48.7M
    INIT_TRACE(state);
1704
1705
48.7M
    if (ptr > end)
1706
0
        return 0;
1707
1708
48.7M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
48.7M
        flags = pattern[2];
1713
1714
48.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.08M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.08M
                   end - ptr, (size_t) pattern[3]));
1717
6.08M
            return 0;
1718
6.08M
        }
1719
42.6M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.73M
            end -= pattern[3] - 1;
1723
2.73M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.73M
        }
1726
1727
42.6M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.73M
            prefix_len = pattern[5];
1731
2.73M
            prefix_skip = pattern[6];
1732
2.73M
            prefix = pattern + 7;
1733
2.73M
            overlap = prefix + prefix_len - 1;
1734
39.8M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
32.5M
            charset = pattern + 5;
1738
1739
42.6M
        pattern += 1 + pattern[1];
1740
42.6M
    }
1741
1742
42.6M
    TRACE(("prefix = %p %zd %zd\n",
1743
42.6M
           prefix, prefix_len, prefix_skip));
1744
42.6M
    TRACE(("charset = %p\n", charset));
1745
1746
42.6M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.68M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.68M
#if SIZEOF_SRE_CHAR < 4
1750
2.68M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.68M
#endif
1753
2.68M
        end = (SRE_CHAR *)state->end;
1754
2.68M
        state->must_advance = 0;
1755
3.04M
        while (ptr < end) {
1756
23.9M
            while (*ptr != c) {
1757
22.1M
                if (++ptr >= end)
1758
1.14M
                    return 0;
1759
22.1M
            }
1760
1.79M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.79M
            state->start = ptr;
1762
1.79M
            state->ptr = ptr + prefix_skip;
1763
1.79M
            if (flags & SRE_INFO_LITERAL)
1764
447
                return 1; /* we got all of it */
1765
1.79M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.79M
            if (status != 0)
1767
1.44M
                return status;
1768
355k
            ++ptr;
1769
355k
            RESET_CAPTURE_GROUP();
1770
355k
        }
1771
94.1k
        return 0;
1772
2.68M
    }
1773
1774
39.9M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
51.3k
        Py_ssize_t i = 0;
1778
1779
51.3k
        end = (SRE_CHAR *)state->end;
1780
51.3k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
51.3k
#if SIZEOF_SRE_CHAR < 4
1783
154k
        for (i = 0; i < prefix_len; i++)
1784
102k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
51.3k
#endif
1787
109k
        while (ptr < end) {
1788
109k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
935k
            while (*ptr++ != c) {
1790
825k
                if (ptr >= end)
1791
62
                    return 0;
1792
825k
            }
1793
109k
            if (ptr >= end)
1794
20
                return 0;
1795
1796
109k
            i = 1;
1797
109k
            state->must_advance = 0;
1798
110k
            do {
1799
110k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
101k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
101k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
101k
                    state->start = ptr - (prefix_len - 1);
1808
101k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
101k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
101k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
101k
                    if (status != 0)
1813
51.2k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
50.0k
                    if (++ptr >= end)
1816
26
                        return 0;
1817
49.9k
                    RESET_CAPTURE_GROUP();
1818
49.9k
                }
1819
58.9k
                i = overlap[i];
1820
58.9k
            } while (i != 0);
1821
109k
        }
1822
0
        return 0;
1823
51.3k
    }
1824
1825
39.8M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
32.5M
        end = (SRE_CHAR *)state->end;
1828
32.5M
        state->must_advance = 0;
1829
34.0M
        for (;;) {
1830
95.7M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
61.7M
                ptr++;
1832
34.0M
            if (ptr >= end)
1833
2.91M
                return 0;
1834
31.1M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
31.1M
            state->start = ptr;
1836
31.1M
            state->ptr = ptr;
1837
31.1M
            status = SRE(match)(state, pattern, 0);
1838
31.1M
            if (status != 0)
1839
29.6M
                break;
1840
1.54M
            ptr++;
1841
1.54M
            RESET_CAPTURE_GROUP();
1842
1.54M
        }
1843
32.5M
    } else {
1844
        /* general case */
1845
7.36M
        assert(ptr <= end);
1846
7.36M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
7.36M
        state->start = state->ptr = ptr;
1848
7.36M
        status = SRE(match)(state, pattern, 1);
1849
7.36M
        state->must_advance = 0;
1850
7.36M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
3.80M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
20
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
3.80M
        {
1854
3.80M
            state->start = state->ptr = ptr = end;
1855
3.80M
            return 0;
1856
3.80M
        }
1857
119M
        while (status == 0 && ptr < end) {
1858
115M
            ptr++;
1859
115M
            RESET_CAPTURE_GROUP();
1860
115M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
115M
            state->start = state->ptr = ptr;
1862
115M
            status = SRE(match)(state, pattern, 0);
1863
115M
        }
1864
3.55M
    }
1865
1866
33.1M
    return status;
1867
39.8M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
51.7M
{
1694
51.7M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
51.7M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
51.7M
    Py_ssize_t status = 0;
1697
51.7M
    Py_ssize_t prefix_len = 0;
1698
51.7M
    Py_ssize_t prefix_skip = 0;
1699
51.7M
    SRE_CODE* prefix = NULL;
1700
51.7M
    SRE_CODE* charset = NULL;
1701
51.7M
    SRE_CODE* overlap = NULL;
1702
51.7M
    int flags = 0;
1703
51.7M
    INIT_TRACE(state);
1704
1705
51.7M
    if (ptr > end)
1706
0
        return 0;
1707
1708
51.7M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
51.7M
        flags = pattern[2];
1713
1714
51.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
122k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
122k
                   end - ptr, (size_t) pattern[3]));
1717
122k
            return 0;
1718
122k
        }
1719
51.6M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.83M
            end -= pattern[3] - 1;
1723
2.83M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.83M
        }
1726
1727
51.6M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.84M
            prefix_len = pattern[5];
1731
2.84M
            prefix_skip = pattern[6];
1732
2.84M
            prefix = pattern + 7;
1733
2.84M
            overlap = prefix + prefix_len - 1;
1734
48.8M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
46.2M
            charset = pattern + 5;
1738
1739
51.6M
        pattern += 1 + pattern[1];
1740
51.6M
    }
1741
1742
51.6M
    TRACE(("prefix = %p %zd %zd\n",
1743
51.6M
           prefix, prefix_len, prefix_skip));
1744
51.6M
    TRACE(("charset = %p\n", charset));
1745
1746
51.6M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.50M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.50M
#if SIZEOF_SRE_CHAR < 4
1750
2.50M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.50M
#endif
1753
2.50M
        end = (SRE_CHAR *)state->end;
1754
2.50M
        state->must_advance = 0;
1755
3.05M
        while (ptr < end) {
1756
49.7M
            while (*ptr != c) {
1757
46.7M
                if (++ptr >= end)
1758
72.1k
                    return 0;
1759
46.7M
            }
1760
2.97M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.97M
            state->start = ptr;
1762
2.97M
            state->ptr = ptr + prefix_skip;
1763
2.97M
            if (flags & SRE_INFO_LITERAL)
1764
8.45k
                return 1; /* we got all of it */
1765
2.97M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.97M
            if (status != 0)
1767
2.41M
                return status;
1768
551k
            ++ptr;
1769
551k
            RESET_CAPTURE_GROUP();
1770
551k
        }
1771
2.27k
        return 0;
1772
2.50M
    }
1773
1774
49.1M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
342k
        Py_ssize_t i = 0;
1778
1779
342k
        end = (SRE_CHAR *)state->end;
1780
342k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
342k
#if SIZEOF_SRE_CHAR < 4
1783
1.02M
        for (i = 0; i < prefix_len; i++)
1784
685k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
342k
#endif
1787
641k
        while (ptr < end) {
1788
641k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.15M
            while (*ptr++ != c) {
1790
2.51M
                if (ptr >= end)
1791
98
                    return 0;
1792
2.51M
            }
1793
641k
            if (ptr >= end)
1794
22
                return 0;
1795
1796
641k
            i = 1;
1797
641k
            state->must_advance = 0;
1798
642k
            do {
1799
642k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
612k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
612k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
612k
                    state->start = ptr - (prefix_len - 1);
1808
612k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
612k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
612k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
612k
                    if (status != 0)
1813
342k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
270k
                    if (++ptr >= end)
1816
24
                        return 0;
1817
270k
                    RESET_CAPTURE_GROUP();
1818
270k
                }
1819
299k
                i = overlap[i];
1820
299k
            } while (i != 0);
1821
641k
        }
1822
0
        return 0;
1823
342k
    }
1824
1825
48.8M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
46.2M
        end = (SRE_CHAR *)state->end;
1828
46.2M
        state->must_advance = 0;
1829
47.9M
        for (;;) {
1830
202M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
154M
                ptr++;
1832
47.9M
            if (ptr >= end)
1833
1.21M
                return 0;
1834
46.7M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
46.7M
            state->start = ptr;
1836
46.7M
            state->ptr = ptr;
1837
46.7M
            status = SRE(match)(state, pattern, 0);
1838
46.7M
            if (status != 0)
1839
45.0M
                break;
1840
1.64M
            ptr++;
1841
1.64M
            RESET_CAPTURE_GROUP();
1842
1.64M
        }
1843
46.2M
    } else {
1844
        /* general case */
1845
2.52M
        assert(ptr <= end);
1846
2.52M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.52M
        state->start = state->ptr = ptr;
1848
2.52M
        status = SRE(match)(state, pattern, 1);
1849
2.52M
        state->must_advance = 0;
1850
2.52M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
1.20M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
29
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
1.20M
        {
1854
1.20M
            state->start = state->ptr = ptr = end;
1855
1.20M
            return 0;
1856
1.20M
        }
1857
183M
        while (status == 0 && ptr < end) {
1858
181M
            ptr++;
1859
181M
            RESET_CAPTURE_GROUP();
1860
181M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
181M
            state->start = state->ptr = ptr;
1862
181M
            status = SRE(match)(state, pattern, 0);
1863
181M
        }
1864
1.31M
    }
1865
1866
46.3M
    return status;
1867
48.8M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.32M
{
1694
7.32M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.32M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.32M
    Py_ssize_t status = 0;
1697
7.32M
    Py_ssize_t prefix_len = 0;
1698
7.32M
    Py_ssize_t prefix_skip = 0;
1699
7.32M
    SRE_CODE* prefix = NULL;
1700
7.32M
    SRE_CODE* charset = NULL;
1701
7.32M
    SRE_CODE* overlap = NULL;
1702
7.32M
    int flags = 0;
1703
7.32M
    INIT_TRACE(state);
1704
1705
7.32M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.32M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.32M
        flags = pattern[2];
1713
1714
7.32M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
16.9k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
16.9k
                   end - ptr, (size_t) pattern[3]));
1717
16.9k
            return 0;
1718
16.9k
        }
1719
7.31M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.85M
            end -= pattern[3] - 1;
1723
3.85M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.85M
        }
1726
1727
7.31M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.85M
            prefix_len = pattern[5];
1731
3.85M
            prefix_skip = pattern[6];
1732
3.85M
            prefix = pattern + 7;
1733
3.85M
            overlap = prefix + prefix_len - 1;
1734
3.85M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.24M
            charset = pattern + 5;
1738
1739
7.31M
        pattern += 1 + pattern[1];
1740
7.31M
    }
1741
1742
7.31M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.31M
           prefix, prefix_len, prefix_skip));
1744
7.31M
    TRACE(("charset = %p\n", charset));
1745
1746
7.31M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.54M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
3.54M
        end = (SRE_CHAR *)state->end;
1754
3.54M
        state->must_advance = 0;
1755
3.64M
        while (ptr < end) {
1756
32.0M
            while (*ptr != c) {
1757
28.4M
                if (++ptr >= end)
1758
3.96k
                    return 0;
1759
28.4M
            }
1760
3.64M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.64M
            state->start = ptr;
1762
3.64M
            state->ptr = ptr + prefix_skip;
1763
3.64M
            if (flags & SRE_INFO_LITERAL)
1764
3.05k
                return 1; /* we got all of it */
1765
3.63M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.63M
            if (status != 0)
1767
3.53M
                return status;
1768
101k
            ++ptr;
1769
101k
            RESET_CAPTURE_GROUP();
1770
101k
        }
1771
945
        return 0;
1772
3.54M
    }
1773
1774
3.76M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
307k
        Py_ssize_t i = 0;
1778
1779
307k
        end = (SRE_CHAR *)state->end;
1780
307k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
666k
        while (ptr < end) {
1788
666k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
4.53M
            while (*ptr++ != c) {
1790
3.87M
                if (ptr >= end)
1791
142
                    return 0;
1792
3.87M
            }
1793
666k
            if (ptr >= end)
1794
18
                return 0;
1795
1796
665k
            i = 1;
1797
665k
            state->must_advance = 0;
1798
666k
            do {
1799
666k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
604k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
604k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
604k
                    state->start = ptr - (prefix_len - 1);
1808
604k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
604k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
604k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
604k
                    if (status != 0)
1813
307k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
296k
                    if (++ptr >= end)
1816
15
                        return 0;
1817
296k
                    RESET_CAPTURE_GROUP();
1818
296k
                }
1819
358k
                i = overlap[i];
1820
358k
            } while (i != 0);
1821
665k
        }
1822
0
        return 0;
1823
307k
    }
1824
1825
3.45M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.24M
        end = (SRE_CHAR *)state->end;
1828
3.24M
        state->must_advance = 0;
1829
3.78M
        for (;;) {
1830
66.7M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
62.9M
                ptr++;
1832
3.78M
            if (ptr >= end)
1833
53.2k
                return 0;
1834
3.73M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
3.73M
            state->start = ptr;
1836
3.73M
            state->ptr = ptr;
1837
3.73M
            status = SRE(match)(state, pattern, 0);
1838
3.73M
            if (status != 0)
1839
3.19M
                break;
1840
536k
            ptr++;
1841
536k
            RESET_CAPTURE_GROUP();
1842
536k
        }
1843
3.24M
    } else {
1844
        /* general case */
1845
207k
        assert(ptr <= end);
1846
207k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
207k
        state->start = state->ptr = ptr;
1848
207k
        status = SRE(match)(state, pattern, 1);
1849
207k
        state->must_advance = 0;
1850
207k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
13.9k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
22
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
13.9k
        {
1854
13.9k
            state->start = state->ptr = ptr = end;
1855
13.9k
            return 0;
1856
13.9k
        }
1857
69.7M
        while (status == 0 && ptr < end) {
1858
69.5M
            ptr++;
1859
69.5M
            RESET_CAPTURE_GROUP();
1860
69.5M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
69.5M
            state->start = state->ptr = ptr;
1862
69.5M
            status = SRE(match)(state, pattern, 0);
1863
69.5M
        }
1864
193k
    }
1865
1866
3.38M
    return status;
1867
3.45M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/