Coverage Report

Created: 2026-05-16 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
23.3M
{
18
    /* check if pointer is at given position */
19
20
23.3M
    Py_ssize_t thisp, thatp;
21
22
23.3M
    switch (at) {
23
24
10.3M
    case SRE_AT_BEGINNING:
25
10.3M
    case SRE_AT_BEGINNING_STRING:
26
10.3M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
7.11M
    case SRE_AT_END:
33
7.11M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
22.4k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
7.11M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
5.91M
    case SRE_AT_END_STRING:
42
5.91M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
23.3M
    }
87
88
0
    return 0;
89
23.3M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
17.2M
{
18
    /* check if pointer is at given position */
19
20
17.2M
    Py_ssize_t thisp, thatp;
21
22
17.2M
    switch (at) {
23
24
8.88M
    case SRE_AT_BEGINNING:
25
8.88M
    case SRE_AT_BEGINNING_STRING:
26
8.88M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
5.73M
    case SRE_AT_END:
33
5.73M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
22.3k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
5.73M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.64M
    case SRE_AT_END_STRING:
42
2.64M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
17.2M
    }
87
88
0
    return 0;
89
17.2M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
4.29M
{
18
    /* check if pointer is at given position */
19
20
4.29M
    Py_ssize_t thisp, thatp;
21
22
4.29M
    switch (at) {
23
24
1.44M
    case SRE_AT_BEGINNING:
25
1.44M
    case SRE_AT_BEGINNING_STRING:
26
1.44M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
1.33M
    case SRE_AT_END:
33
1.33M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
49
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
1.33M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.51M
    case SRE_AT_END_STRING:
42
1.51M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
4.29M
    }
87
88
0
    return 0;
89
4.29M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
1.81M
{
18
    /* check if pointer is at given position */
19
20
1.81M
    Py_ssize_t thisp, thatp;
21
22
1.81M
    switch (at) {
23
24
17.6k
    case SRE_AT_BEGINNING:
25
17.6k
    case SRE_AT_BEGINNING_STRING:
26
17.6k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
45.3k
    case SRE_AT_END:
33
45.3k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
81
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
45.3k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.75M
    case SRE_AT_END_STRING:
42
1.75M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
1.81M
    }
87
88
0
    return 0;
89
1.81M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.45G
{
94
    /* check if character is a member of the given set */
95
96
1.45G
    int ok = 1;
97
98
3.20G
    for (;;) {
99
3.20G
        switch (*set++) {
100
101
901M
        case SRE_OP_FAILURE:
102
901M
            return !ok;
103
104
989M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
989M
            if (ch == set[0])
107
10.2M
                return ok;
108
979M
            set++;
109
979M
            break;
110
111
86.7M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
86.7M
            if (sre_category(set[0], (int) ch))
114
75.8M
                return ok;
115
10.8M
            set++;
116
10.8M
            break;
117
118
526M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
526M
            if (ch < 256 &&
121
502M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
196M
                return ok;
123
330M
            set += 256/SRE_CODE_BITS;
124
330M
            break;
125
126
406M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
406M
            if (set[0] <= ch && ch <= set[1])
129
266M
                return ok;
130
140M
            set += 2;
131
140M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
292M
        case SRE_OP_NEGATE:
148
292M
            ok = !ok;
149
292M
            break;
150
151
4
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
4
        {
154
4
            Py_ssize_t count, block;
155
4
            count = *(set++);
156
157
4
            if (ch < 0x10000u)
158
4
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
4
            set += 256/sizeof(SRE_CODE);
162
4
            if (block >=0 &&
163
4
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
4
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
4
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
4
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.20G
        }
175
3.20G
    }
176
1.45G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
442M
{
94
    /* check if character is a member of the given set */
95
96
442M
    int ok = 1;
97
98
859M
    for (;;) {
99
859M
        switch (*set++) {
100
101
218M
        case SRE_OP_FAILURE:
102
218M
            return !ok;
103
104
195M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
195M
            if (ch == set[0])
107
7.02M
                return ok;
108
188M
            set++;
109
188M
            break;
110
111
32.0M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
32.0M
            if (sre_category(set[0], (int) ch))
114
22.4M
                return ok;
115
9.55M
            set++;
116
9.55M
            break;
117
118
131M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
131M
            if (ch < 256 &&
121
131M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
55.1M
                return ok;
123
76.0M
            set += 256/SRE_CODE_BITS;
124
76.0M
            break;
125
126
214M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
214M
            if (set[0] <= ch && ch <= set[1])
129
139M
                return ok;
130
74.9M
            set += 2;
131
74.9M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
68.1M
        case SRE_OP_NEGATE:
148
68.1M
            ok = !ok;
149
68.1M
            break;
150
151
4
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
4
        {
154
4
            Py_ssize_t count, block;
155
4
            count = *(set++);
156
157
4
            if (ch < 0x10000u)
158
4
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
4
            set += 256/sizeof(SRE_CODE);
162
4
            if (block >=0 &&
163
4
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
4
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
4
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
4
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
859M
        }
175
859M
    }
176
442M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
671M
{
94
    /* check if character is a member of the given set */
95
96
671M
    int ok = 1;
97
98
1.56G
    for (;;) {
99
1.56G
        switch (*set++) {
100
101
454M
        case SRE_OP_FAILURE:
102
454M
            return !ok;
103
104
600M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
600M
            if (ch == set[0])
107
2.36M
                return ok;
108
598M
            set++;
109
598M
            break;
110
111
48.8M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
48.8M
            if (sre_category(set[0], (int) ch))
114
47.9M
                return ok;
115
977k
            set++;
116
977k
            break;
117
118
187M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
187M
            if (ch < 256 &&
121
175M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
60.4M
                return ok;
123
126M
            set += 256/SRE_CODE_BITS;
124
126M
            break;
125
126
161M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
161M
            if (set[0] <= ch && ch <= set[1])
129
106M
                return ok;
130
55.6M
            set += 2;
131
55.6M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
114M
        case SRE_OP_NEGATE:
148
114M
            ok = !ok;
149
114M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.56G
        }
175
1.56G
    }
176
671M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
336M
{
94
    /* check if character is a member of the given set */
95
96
336M
    int ok = 1;
97
98
776M
    for (;;) {
99
776M
        switch (*set++) {
100
101
228M
        case SRE_OP_FAILURE:
102
228M
            return !ok;
103
104
193M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
193M
            if (ch == set[0])
107
851k
                return ok;
108
192M
            set++;
109
192M
            break;
110
111
5.77M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
5.77M
            if (sre_category(set[0], (int) ch))
114
5.47M
                return ok;
115
307k
            set++;
116
307k
            break;
117
118
208M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
208M
            if (ch < 256 &&
121
195M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
81.1M
                return ok;
123
127M
            set += 256/SRE_CODE_BITS;
124
127M
            break;
125
126
30.5M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
30.5M
            if (set[0] <= ch && ch <= set[1])
129
21.0M
                return ok;
130
9.50M
            set += 2;
131
9.50M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
109M
        case SRE_OP_NEGATE:
148
109M
            ok = !ok;
149
109M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
776M
        }
175
776M
    }
176
336M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
370M
{
195
370M
    SRE_CODE chr;
196
370M
    SRE_CHAR c;
197
370M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
370M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
370M
    Py_ssize_t i;
200
370M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
370M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
49.7M
        end = ptr + maxcount;
205
206
370M
    switch (pattern[0]) {
207
208
318M
    case SRE_OP_IN:
209
        /* repeated set */
210
318M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
704M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
385M
            ptr++;
213
318M
        break;
214
215
2.61M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
2.61M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
38.6M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
36.0M
            ptr++;
220
2.61M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
49.1M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
49.1M
        chr = pattern[1];
232
49.1M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
49.1M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
37.6M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
37.6M
        else
238
37.6M
#endif
239
51.9M
        while (ptr < end && *ptr == c)
240
2.81M
            ptr++;
241
49.1M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
724k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
724k
        chr = pattern[1];
270
724k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
724k
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
389k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
389k
        else
276
389k
#endif
277
54.6M
        while (ptr < end && *ptr != c)
278
53.9M
            ptr++;
279
724k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
370M
    }
319
320
370M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
370M
           ptr - (SRE_CHAR*) state->ptr));
322
370M
    return ptr - (SRE_CHAR*) state->ptr;
323
370M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
111M
{
195
111M
    SRE_CODE chr;
196
111M
    SRE_CHAR c;
197
111M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
111M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
111M
    Py_ssize_t i;
200
111M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
111M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
21.5M
        end = ptr + maxcount;
205
206
111M
    switch (pattern[0]) {
207
208
84.4M
    case SRE_OP_IN:
209
        /* repeated set */
210
84.4M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
235M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
150M
            ptr++;
213
84.4M
        break;
214
215
2.37M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
2.37M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
11.9M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
9.54M
            ptr++;
220
2.37M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
24.6M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
24.6M
        chr = pattern[1];
232
24.6M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
24.6M
        c = (SRE_CHAR) chr;
234
24.6M
#if SIZEOF_SRE_CHAR < 4
235
24.6M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
24.6M
        else
238
24.6M
#endif
239
25.0M
        while (ptr < end && *ptr == c)
240
359k
            ptr++;
241
24.6M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
135k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
135k
        chr = pattern[1];
270
135k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
135k
        c = (SRE_CHAR) chr;
272
135k
#if SIZEOF_SRE_CHAR < 4
273
135k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
135k
        else
276
135k
#endif
277
11.0M
        while (ptr < end && *ptr != c)
278
10.8M
            ptr++;
279
135k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
111M
    }
319
320
111M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
111M
           ptr - (SRE_CHAR*) state->ptr));
322
111M
    return ptr - (SRE_CHAR*) state->ptr;
323
111M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
172M
{
195
172M
    SRE_CODE chr;
196
172M
    SRE_CHAR c;
197
172M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
172M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
172M
    Py_ssize_t i;
200
172M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
172M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
14.4M
        end = ptr + maxcount;
205
206
172M
    switch (pattern[0]) {
207
208
158M
    case SRE_OP_IN:
209
        /* repeated set */
210
158M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
283M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
124M
            ptr++;
213
158M
        break;
214
215
232k
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
232k
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
12.2M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
12.0M
            ptr++;
220
232k
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
12.9M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
12.9M
        chr = pattern[1];
232
12.9M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
12.9M
        c = (SRE_CHAR) chr;
234
12.9M
#if SIZEOF_SRE_CHAR < 4
235
12.9M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
12.9M
        else
238
12.9M
#endif
239
14.5M
        while (ptr < end && *ptr == c)
240
1.60M
            ptr++;
241
12.9M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
253k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
253k
        chr = pattern[1];
270
253k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
253k
        c = (SRE_CHAR) chr;
272
253k
#if SIZEOF_SRE_CHAR < 4
273
253k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
253k
        else
276
253k
#endif
277
12.4M
        while (ptr < end && *ptr != c)
278
12.2M
            ptr++;
279
253k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
172M
    }
319
320
172M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
172M
           ptr - (SRE_CHAR*) state->ptr));
322
172M
    return ptr - (SRE_CHAR*) state->ptr;
323
172M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
87.0M
{
195
87.0M
    SRE_CODE chr;
196
87.0M
    SRE_CHAR c;
197
87.0M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
87.0M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
87.0M
    Py_ssize_t i;
200
87.0M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
87.0M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
13.7M
        end = ptr + maxcount;
205
206
87.0M
    switch (pattern[0]) {
207
208
75.2M
    case SRE_OP_IN:
209
        /* repeated set */
210
75.2M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
185M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
110M
            ptr++;
213
75.2M
        break;
214
215
8.42k
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
8.42k
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
14.4M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
14.4M
            ptr++;
220
8.42k
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
11.4M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
11.4M
        chr = pattern[1];
232
11.4M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
11.4M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
12.3M
        while (ptr < end && *ptr == c)
240
855k
            ptr++;
241
11.4M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
335k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
335k
        chr = pattern[1];
270
335k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
335k
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
31.1M
        while (ptr < end && *ptr != c)
278
30.8M
            ptr++;
279
335k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
87.0M
    }
319
320
87.0M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
87.0M
           ptr - (SRE_CHAR*) state->ptr));
322
87.0M
    return ptr - (SRE_CHAR*) state->ptr;
323
87.0M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
551M
    do { \
355
551M
        ctx->lastmark = state->lastmark; \
356
551M
        ctx->lastindex = state->lastindex; \
357
551M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
176M
    do { \
360
176M
        state->lastmark = ctx->lastmark; \
361
176M
        state->lastindex = ctx->lastindex; \
362
176M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
195M
    do { \
366
195M
        TRACE(("push last_ptr: %zd", \
367
195M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
195M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
195M
    } while (0)
370
#define LAST_PTR_POP()  \
371
195M
    do { \
372
195M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
195M
        TRACE(("pop last_ptr: %zd", \
374
195M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
195M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
412M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
650M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
890M
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
48.8M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
26.0M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.06G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.06G
do { \
390
1.06G
    alloc_pos = state->data_stack_base; \
391
1.06G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.06G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.06G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
162M
        int j = data_stack_grow(state, sizeof(type)); \
395
162M
        if (j < 0) return j; \
396
162M
        if (ctx_pos != -1) \
397
162M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
162M
    } \
399
1.06G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.06G
    state->data_stack_base += sizeof(type); \
401
1.06G
} while (0)
402
403
1.06G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.06G
do { \
405
1.06G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.06G
    ptr = (type*)(state->data_stack+pos); \
407
1.06G
} while (0)
408
409
545M
#define DATA_STACK_PUSH(state, data, size) \
410
545M
do { \
411
545M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
545M
           data, state->data_stack_base, size)); \
413
545M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
87.2k
        int j = data_stack_grow(state, size); \
415
87.2k
        if (j < 0) return j; \
416
87.2k
        if (ctx_pos != -1) \
417
87.2k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
87.2k
    } \
419
545M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
545M
    state->data_stack_base += size; \
421
545M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
301M
#define DATA_STACK_POP(state, data, size, discard) \
427
301M
do { \
428
301M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
301M
           data, state->data_stack_base-size, size)); \
430
301M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
301M
    if (discard) \
432
301M
        state->data_stack_base -= size; \
433
301M
} while (0)
434
435
1.30G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.30G
do { \
437
1.30G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.30G
           state->data_stack_base-size, size)); \
439
1.30G
    state->data_stack_base -= size; \
440
1.30G
} while(0)
441
442
#define DATA_PUSH(x) \
443
195M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
195M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.06G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.06G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.06G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
466M
    do if (lastmark >= 0) { \
473
349M
        MARK_TRACE("push", (lastmark)); \
474
349M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
349M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
466M
    } while (0)
477
#define MARK_POP(lastmark) \
478
121M
    do if (lastmark >= 0) { \
479
104M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
104M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
104M
        MARK_TRACE("pop", (lastmark)); \
482
121M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.77M
    do if (lastmark >= 0) { \
485
1.15M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.15M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.15M
        MARK_TRACE("pop keep", (lastmark)); \
488
1.77M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
345M
    do if (lastmark >= 0) { \
491
244M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
244M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
244M
        MARK_TRACE("pop discard", (lastmark)); \
494
345M
    } while (0)
495
496
367M
#define JUMP_NONE            0
497
145k
#define JUMP_MAX_UNTIL_1     1
498
195M
#define JUMP_MAX_UNTIL_2     2
499
48.8M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
48.5M
#define JUMP_REPEAT          7
504
14.3M
#define JUMP_REPEAT_ONE_1    8
505
161M
#define JUMP_REPEAT_ONE_2    9
506
832
#define JUMP_MIN_REPEAT_ONE  10
507
108M
#define JUMP_BRANCH          11
508
26.0M
#define JUMP_ASSERT          12
509
92.1M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
695M
    ctx->pattern = pattern; \
516
695M
    ctx->ptr = ptr; \
517
695M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
695M
    nextctx->pattern = nextpattern; \
519
695M
    nextctx->toplevel = toplevel_; \
520
695M
    nextctx->jump = jumpvalue; \
521
695M
    nextctx->last_ctx_pos = ctx_pos; \
522
695M
    pattern = nextpattern; \
523
695M
    ctx_pos = alloc_pos; \
524
695M
    ctx = nextctx; \
525
695M
    goto entrance; \
526
695M
    jumplabel: \
527
695M
    pattern = ctx->pattern; \
528
695M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
577M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
118M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
1.94G
    do {                                                           \
553
1.94G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
1.94G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
1.94G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.02G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
1.94G
        do {                               \
588
1.94G
            MAYBE_CHECK_SIGNALS;           \
589
1.94G
            goto *sre_targets[*pattern++]; \
590
1.94G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
367M
{
601
367M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
367M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
367M
    Py_ssize_t ret = 0;
604
367M
    int jump;
605
367M
    unsigned int sigcount = state->sigcount;
606
607
367M
    SRE(match_context)* ctx;
608
367M
    SRE(match_context)* nextctx;
609
367M
    INIT_TRACE(state);
610
611
367M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
367M
    DATA_ALLOC(SRE(match_context), ctx);
614
367M
    ctx->last_ctx_pos = -1;
615
367M
    ctx->jump = JUMP_NONE;
616
367M
    ctx->toplevel = toplevel;
617
367M
    ctx_pos = alloc_pos;
618
619
367M
#if USE_COMPUTED_GOTOS
620
367M
#include "sre_targets.h"
621
367M
#endif
622
623
1.06G
entrance:
624
625
1.06G
    ;  // Fashion statement.
626
1.06G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.06G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
58.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.78M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.78M
                   end - ptr, (size_t) pattern[3]));
634
3.78M
            RETURN_FAILURE;
635
3.78M
        }
636
54.8M
        pattern += pattern[1] + 1;
637
54.8M
    }
638
639
1.05G
#if USE_COMPUTED_GOTOS
640
1.05G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.05G
    {
647
648
1.05G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
335M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
335M
                   ptr, pattern[0]));
653
335M
            {
654
335M
                int i = pattern[0];
655
335M
                if (i & 1)
656
68.9M
                    state->lastindex = i/2 + 1;
657
335M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
329M
                    int j = state->lastmark + 1;
663
344M
                    while (j < i)
664
14.4M
                        state->mark[j++] = NULL;
665
329M
                    state->lastmark = i;
666
329M
                }
667
335M
                state->mark[i] = ptr;
668
335M
            }
669
335M
            pattern++;
670
335M
            DISPATCH;
671
672
335M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
145M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
145M
                   ptr, *pattern));
677
145M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
65.1M
                RETURN_FAILURE;
679
79.8M
            pattern++;
680
79.8M
            ptr++;
681
79.8M
            DISPATCH;
682
683
79.8M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
159M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
159M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
159M
            if (ctx->toplevel &&
698
40.2M
                ((state->match_all && ptr != state->end) ||
699
40.2M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
159M
            state->ptr = ptr;
704
159M
            RETURN_SUCCESS;
705
706
23.3M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
23.3M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
23.3M
            if (!SRE(at)(state, ptr, *pattern))
711
6.33M
                RETURN_FAILURE;
712
17.0M
            pattern++;
713
17.0M
            DISPATCH;
714
715
17.0M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
343M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
343M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
343M
            if (ptr >= end ||
749
338M
                !SRE(charset)(state, pattern + 1, *ptr))
750
86.1M
                RETURN_FAILURE;
751
256M
            pattern += pattern[0];
752
256M
            ptr++;
753
256M
            DISPATCH;
754
755
256M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
7.87M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
7.87M
                   pattern, ptr, pattern[0]));
758
7.87M
            if (ptr >= end ||
759
7.87M
                sre_lower_ascii(*ptr) != *pattern)
760
68.0k
                RETURN_FAILURE;
761
7.80M
            pattern++;
762
7.80M
            ptr++;
763
7.80M
            DISPATCH;
764
765
7.80M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
28
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
28
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
28
            if (ptr >= end
828
20
                || !SRE(charset)(state, pattern+1,
829
20
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
12
            pattern += pattern[0];
832
12
            ptr++;
833
12
            DISPATCH;
834
835
12
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
78.7M
        TARGET(SRE_OP_JUMP):
845
78.7M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
78.7M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
78.7M
                   ptr, pattern[0]));
850
78.7M
            pattern += pattern[0];
851
78.7M
            DISPATCH;
852
853
99.6M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
99.6M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
99.6M
            LASTMARK_SAVE();
858
99.6M
            if (state->repeat)
859
61.6M
                MARK_PUSH(ctx->lastmark);
860
224M
            for (; pattern[0]; pattern += pattern[0]) {
861
200M
                if (pattern[1] == SRE_OP_LITERAL &&
862
117M
                    (ptr >= end ||
863
117M
                     (SRE_CODE) *ptr != pattern[2]))
864
64.0M
                    continue;
865
136M
                if (pattern[1] == SRE_OP_IN &&
866
52.6M
                    (ptr >= end ||
867
52.4M
                     !SRE(charset)(state, pattern + 3,
868
52.4M
                                   (SRE_CODE) *ptr)))
869
28.4M
                    continue;
870
108M
                state->ptr = ptr;
871
108M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
108M
                if (ret) {
873
76.1M
                    if (state->repeat)
874
52.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
76.1M
                    RETURN_ON_ERROR(ret);
876
76.1M
                    RETURN_SUCCESS;
877
76.1M
                }
878
32.1M
                if (state->repeat)
879
16.2k
                    MARK_POP_KEEP(ctx->lastmark);
880
32.1M
                LASTMARK_RESTORE();
881
32.1M
            }
882
23.4M
            if (state->repeat)
883
8.93M
                MARK_POP_DISCARD(ctx->lastmark);
884
23.4M
            RETURN_FAILURE;
885
886
373M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
373M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
373M
                   pattern[1], pattern[2]));
898
899
373M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
2.29M
                RETURN_FAILURE; /* cannot match */
901
902
370M
            state->ptr = ptr;
903
904
370M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
370M
            RETURN_ON_ERROR(ret);
906
370M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
370M
            ctx->count = ret;
908
370M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
370M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
206M
                RETURN_FAILURE;
917
918
164M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.77M
                ptr == state->end &&
920
83.6k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
83.6k
            {
922
                /* tail is empty.  we're finished */
923
83.6k
                state->ptr = ptr;
924
83.6k
                RETURN_SUCCESS;
925
83.6k
            }
926
927
164M
            LASTMARK_SAVE();
928
164M
            if (state->repeat)
929
117M
                MARK_PUSH(ctx->lastmark);
930
931
164M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
23.7M
                ctx->u.chr = pattern[pattern[0]+1];
935
23.7M
                for (;;) {
936
60.0M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
50.6M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
36.3M
                        ptr--;
939
36.3M
                        ctx->count--;
940
36.3M
                    }
941
23.7M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
9.33M
                        break;
943
14.3M
                    state->ptr = ptr;
944
14.3M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
14.3M
                            pattern+pattern[0]);
946
14.3M
                    if (ret) {
947
14.3M
                        if (state->repeat)
948
12.8M
                            MARK_POP_DISCARD(ctx->lastmark);
949
14.3M
                        RETURN_ON_ERROR(ret);
950
14.3M
                        RETURN_SUCCESS;
951
14.3M
                    }
952
851
                    if (state->repeat)
953
835
                        MARK_POP_KEEP(ctx->lastmark);
954
851
                    LASTMARK_RESTORE();
955
956
851
                    ptr--;
957
851
                    ctx->count--;
958
851
                }
959
9.33M
                if (state->repeat)
960
8.21M
                    MARK_POP_DISCARD(ctx->lastmark);
961
140M
            } else {
962
                /* general case */
963
163M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
161M
                    state->ptr = ptr;
965
161M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
161M
                            pattern+pattern[0]);
967
161M
                    if (ret) {
968
138M
                        if (state->repeat)
969
94.9M
                            MARK_POP_DISCARD(ctx->lastmark);
970
138M
                        RETURN_ON_ERROR(ret);
971
138M
                        RETURN_SUCCESS;
972
138M
                    }
973
22.8M
                    if (state->repeat)
974
1.75M
                        MARK_POP_KEEP(ctx->lastmark);
975
22.8M
                    LASTMARK_RESTORE();
976
977
22.8M
                    ptr--;
978
22.8M
                    ctx->count--;
979
22.8M
                }
980
1.99M
                if (state->repeat)
981
1.45M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.99M
            }
983
11.3M
            RETURN_FAILURE;
984
985
16
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
16
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
16
                   pattern[1], pattern[2]));
997
998
16
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
16
            state->ptr = ptr;
1002
1003
16
            if (pattern[1] == 0)
1004
16
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
16
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
16
            } else {
1028
                /* general case */
1029
16
                LASTMARK_SAVE();
1030
16
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
832
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
832
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
832
                    state->ptr = ptr;
1036
832
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
832
                            pattern+pattern[0]);
1038
832
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
832
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
832
                    LASTMARK_RESTORE();
1047
1048
832
                    state->ptr = ptr;
1049
832
                    ret = SRE(count)(state, pattern+3, 1);
1050
832
                    RETURN_ON_ERROR(ret);
1051
832
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
832
                    if (ret == 0)
1053
16
                        break;
1054
832
                    assert(ret == 1);
1055
816
                    ptr++;
1056
816
                    ctx->count++;
1057
816
                }
1058
16
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
16
            }
1061
16
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
48.5M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
48.5M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
48.5M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
48.5M
            ctx->u.rep = repeat_pool_malloc(state);
1127
48.5M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
48.5M
            ctx->u.rep->count = -1;
1131
48.5M
            ctx->u.rep->pattern = pattern;
1132
48.5M
            ctx->u.rep->prev = state->repeat;
1133
48.5M
            ctx->u.rep->last_ptr = NULL;
1134
48.5M
            state->repeat = ctx->u.rep;
1135
1136
48.5M
            state->ptr = ptr;
1137
48.5M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
48.5M
            state->repeat = ctx->u.rep->prev;
1139
48.5M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
48.5M
            if (ret) {
1142
48.4M
                RETURN_ON_ERROR(ret);
1143
48.4M
                RETURN_SUCCESS;
1144
48.4M
            }
1145
106k
            RETURN_FAILURE;
1146
1147
213M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
213M
            ctx->u.rep = state->repeat;
1155
213M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
213M
            state->ptr = ptr;
1159
1160
213M
            ctx->count = ctx->u.rep->count+1;
1161
1162
213M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
213M
                   ptr, ctx->count));
1164
1165
213M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
145k
                ctx->u.rep->count = ctx->count;
1168
145k
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
145k
                        ctx->u.rep->pattern+3);
1170
145k
                if (ret) {
1171
129k
                    RETURN_ON_ERROR(ret);
1172
129k
                    RETURN_SUCCESS;
1173
129k
                }
1174
15.9k
                ctx->u.rep->count = ctx->count-1;
1175
15.9k
                state->ptr = ptr;
1176
15.9k
                RETURN_FAILURE;
1177
15.9k
            }
1178
1179
213M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
17.6M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
195M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
195M
                ctx->u.rep->count = ctx->count;
1185
195M
                LASTMARK_SAVE();
1186
195M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
195M
                LAST_PTR_PUSH();
1189
195M
                ctx->u.rep->last_ptr = state->ptr;
1190
195M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
195M
                        ctx->u.rep->pattern+3);
1192
195M
                LAST_PTR_POP();
1193
195M
                if (ret) {
1194
164M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
164M
                    RETURN_ON_ERROR(ret);
1196
164M
                    RETURN_SUCCESS;
1197
164M
                }
1198
31.2M
                MARK_POP(ctx->lastmark);
1199
31.2M
                LASTMARK_RESTORE();
1200
31.2M
                ctx->u.rep->count = ctx->count-1;
1201
31.2M
                state->ptr = ptr;
1202
31.2M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
48.8M
            state->repeat = ctx->u.rep->prev;
1207
48.8M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
48.8M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
48.8M
            RETURN_ON_SUCCESS(ret);
1211
431k
            state->ptr = ptr;
1212
431k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
26.0M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
26.0M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
26.0M
                   ptr, pattern[1]));
1565
26.0M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
26.0M
            state->ptr = ptr - pattern[1];
1568
26.0M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
26.0M
            RETURN_ON_FAILURE(ret);
1570
21.2M
            pattern += pattern[0];
1571
21.2M
            DISPATCH;
1572
1573
92.1M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
92.1M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
92.1M
                   ptr, pattern[1]));
1578
92.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
92.1M
                state->ptr = ptr - pattern[1];
1580
92.1M
                LASTMARK_SAVE();
1581
92.1M
                if (state->repeat)
1582
92.1M
                    MARK_PUSH(ctx->lastmark);
1583
1584
184M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
184M
                if (ret) {
1586
2.25M
                    if (state->repeat)
1587
2.25M
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.25M
                    RETURN_ON_ERROR(ret);
1589
2.25M
                    RETURN_FAILURE;
1590
2.25M
                }
1591
89.9M
                if (state->repeat)
1592
89.9M
                    MARK_POP(ctx->lastmark);
1593
89.9M
                LASTMARK_RESTORE();
1594
89.9M
            }
1595
89.9M
            pattern += pattern[0];
1596
89.9M
            DISPATCH;
1597
1598
89.9M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.06G
exit:
1620
1.06G
    ctx_pos = ctx->last_ctx_pos;
1621
1.06G
    jump = ctx->jump;
1622
1.06G
    DATA_POP_DISCARD(ctx);
1623
1.06G
    if (ctx_pos == -1) {
1624
367M
        state->sigcount = sigcount;
1625
367M
        return ret;
1626
367M
    }
1627
695M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
695M
    switch (jump) {
1630
195M
        case JUMP_MAX_UNTIL_2:
1631
195M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
195M
            goto jump_max_until_2;
1633
48.8M
        case JUMP_MAX_UNTIL_3:
1634
48.8M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
48.8M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
108M
        case JUMP_BRANCH:
1643
108M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
108M
            goto jump_branch;
1645
145k
        case JUMP_MAX_UNTIL_1:
1646
145k
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
145k
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
48.5M
        case JUMP_REPEAT:
1658
48.5M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
48.5M
            goto jump_repeat;
1660
14.3M
        case JUMP_REPEAT_ONE_1:
1661
14.3M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
14.3M
            goto jump_repeat_one_1;
1663
161M
        case JUMP_REPEAT_ONE_2:
1664
161M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
161M
            goto jump_repeat_one_2;
1666
832
        case JUMP_MIN_REPEAT_ONE:
1667
832
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
832
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
26.0M
        case JUMP_ASSERT:
1673
26.0M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
26.0M
            goto jump_assert;
1675
92.1M
        case JUMP_ASSERT_NOT:
1676
92.1M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
92.1M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
695M
    }
1683
1684
0
    return ret; /* should never get here */
1685
695M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
128M
{
601
128M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
128M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
128M
    Py_ssize_t ret = 0;
604
128M
    int jump;
605
128M
    unsigned int sigcount = state->sigcount;
606
607
128M
    SRE(match_context)* ctx;
608
128M
    SRE(match_context)* nextctx;
609
128M
    INIT_TRACE(state);
610
611
128M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
128M
    DATA_ALLOC(SRE(match_context), ctx);
614
128M
    ctx->last_ctx_pos = -1;
615
128M
    ctx->jump = JUMP_NONE;
616
128M
    ctx->toplevel = toplevel;
617
128M
    ctx_pos = alloc_pos;
618
619
128M
#if USE_COMPUTED_GOTOS
620
128M
#include "sre_targets.h"
621
128M
#endif
622
623
366M
entrance:
624
625
366M
    ;  // Fashion statement.
626
366M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
366M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
35.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.67M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.67M
                   end - ptr, (size_t) pattern[3]));
634
3.67M
            RETURN_FAILURE;
635
3.67M
        }
636
31.5M
        pattern += pattern[1] + 1;
637
31.5M
    }
638
639
363M
#if USE_COMPUTED_GOTOS
640
363M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
363M
    {
647
648
363M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
113M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
113M
                   ptr, pattern[0]));
653
113M
            {
654
113M
                int i = pattern[0];
655
113M
                if (i & 1)
656
31.6M
                    state->lastindex = i/2 + 1;
657
113M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
110M
                    int j = state->lastmark + 1;
663
118M
                    while (j < i)
664
8.60M
                        state->mark[j++] = NULL;
665
110M
                    state->lastmark = i;
666
110M
                }
667
113M
                state->mark[i] = ptr;
668
113M
            }
669
113M
            pattern++;
670
113M
            DISPATCH;
671
672
113M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
80.2M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
80.2M
                   ptr, *pattern));
677
80.2M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
34.0M
                RETURN_FAILURE;
679
46.2M
            pattern++;
680
46.2M
            ptr++;
681
46.2M
            DISPATCH;
682
683
46.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
68.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
68.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
68.9M
            if (ctx->toplevel &&
698
23.6M
                ((state->match_all && ptr != state->end) ||
699
23.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
68.9M
            state->ptr = ptr;
704
68.9M
            RETURN_SUCCESS;
705
706
17.2M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
17.2M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
17.2M
            if (!SRE(at)(state, ptr, *pattern))
711
2.96M
                RETURN_FAILURE;
712
14.3M
            pattern++;
713
14.3M
            DISPATCH;
714
715
14.3M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
98.3M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
98.3M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
98.3M
            if (ptr >= end ||
749
97.4M
                !SRE(charset)(state, pattern + 1, *ptr))
750
16.0M
                RETURN_FAILURE;
751
82.2M
            pattern += pattern[0];
752
82.2M
            ptr++;
753
82.2M
            DISPATCH;
754
755
82.2M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
659k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
659k
                   pattern, ptr, pattern[0]));
758
659k
            if (ptr >= end ||
759
659k
                sre_lower_ascii(*ptr) != *pattern)
760
3.73k
                RETURN_FAILURE;
761
656k
            pattern++;
762
656k
            ptr++;
763
656k
            DISPATCH;
764
765
656k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
28
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
28
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
28
            if (ptr >= end
828
20
                || !SRE(charset)(state, pattern+1,
829
20
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
12
            pattern += pattern[0];
832
12
            ptr++;
833
12
            DISPATCH;
834
835
12
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
33.7M
        TARGET(SRE_OP_JUMP):
845
33.7M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
33.7M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
33.7M
                   ptr, pattern[0]));
850
33.7M
            pattern += pattern[0];
851
33.7M
            DISPATCH;
852
853
44.0M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
44.0M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
44.0M
            LASTMARK_SAVE();
858
44.0M
            if (state->repeat)
859
14.6M
                MARK_PUSH(ctx->lastmark);
860
105M
            for (; pattern[0]; pattern += pattern[0]) {
861
93.5M
                if (pattern[1] == SRE_OP_LITERAL &&
862
64.4M
                    (ptr >= end ||
863
64.2M
                     (SRE_CODE) *ptr != pattern[2]))
864
27.5M
                    continue;
865
65.9M
                if (pattern[1] == SRE_OP_IN &&
866
14.2M
                    (ptr >= end ||
867
14.0M
                     !SRE(charset)(state, pattern + 3,
868
14.0M
                                   (SRE_CODE) *ptr)))
869
7.23M
                    continue;
870
58.7M
                state->ptr = ptr;
871
58.7M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
58.7M
                if (ret) {
873
32.1M
                    if (state->repeat)
874
14.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
32.1M
                    RETURN_ON_ERROR(ret);
876
32.1M
                    RETURN_SUCCESS;
877
32.1M
                }
878
26.5M
                if (state->repeat)
879
5.71k
                    MARK_POP_KEEP(ctx->lastmark);
880
26.5M
                LASTMARK_RESTORE();
881
26.5M
            }
882
11.8M
            if (state->repeat)
883
486k
                MARK_POP_DISCARD(ctx->lastmark);
884
11.8M
            RETURN_FAILURE;
885
886
112M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
112M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
112M
                   pattern[1], pattern[2]));
898
899
112M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
964k
                RETURN_FAILURE; /* cannot match */
901
902
111M
            state->ptr = ptr;
903
904
111M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
111M
            RETURN_ON_ERROR(ret);
906
111M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
111M
            ctx->count = ret;
908
111M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
111M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
52.3M
                RETURN_FAILURE;
917
918
59.2M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
688k
                ptr == state->end &&
920
59.1k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
59.1k
            {
922
                /* tail is empty.  we're finished */
923
59.1k
                state->ptr = ptr;
924
59.1k
                RETURN_SUCCESS;
925
59.1k
            }
926
927
59.1M
            LASTMARK_SAVE();
928
59.1M
            if (state->repeat)
929
42.6M
                MARK_PUSH(ctx->lastmark);
930
931
59.1M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.97M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.97M
                for (;;) {
936
15.4M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
13.9M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
9.51M
                        ptr--;
939
9.51M
                        ctx->count--;
940
9.51M
                    }
941
5.97M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.51M
                        break;
943
4.46M
                    state->ptr = ptr;
944
4.46M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.46M
                            pattern+pattern[0]);
946
4.46M
                    if (ret) {
947
4.46M
                        if (state->repeat)
948
3.01M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.46M
                        RETURN_ON_ERROR(ret);
950
4.46M
                        RETURN_SUCCESS;
951
4.46M
                    }
952
237
                    if (state->repeat)
953
221
                        MARK_POP_KEEP(ctx->lastmark);
954
237
                    LASTMARK_RESTORE();
955
956
237
                    ptr--;
957
237
                    ctx->count--;
958
237
                }
959
1.51M
                if (state->repeat)
960
405k
                    MARK_POP_DISCARD(ctx->lastmark);
961
53.1M
            } else {
962
                /* general case */
963
61.6M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
59.9M
                    state->ptr = ptr;
965
59.9M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
59.9M
                            pattern+pattern[0]);
967
59.9M
                    if (ret) {
968
51.4M
                        if (state->repeat)
969
38.0M
                            MARK_POP_DISCARD(ctx->lastmark);
970
51.4M
                        RETURN_ON_ERROR(ret);
971
51.4M
                        RETURN_SUCCESS;
972
51.4M
                    }
973
8.45M
                    if (state->repeat)
974
1.42M
                        MARK_POP_KEEP(ctx->lastmark);
975
8.45M
                    LASTMARK_RESTORE();
976
977
8.45M
                    ptr--;
978
8.45M
                    ctx->count--;
979
8.45M
                }
980
1.71M
                if (state->repeat)
981
1.22M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.71M
            }
983
3.23M
            RETURN_FAILURE;
984
985
16
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
16
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
16
                   pattern[1], pattern[2]));
997
998
16
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
16
            state->ptr = ptr;
1002
1003
16
            if (pattern[1] == 0)
1004
16
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
16
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
16
            } else {
1028
                /* general case */
1029
16
                LASTMARK_SAVE();
1030
16
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
832
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
832
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
832
                    state->ptr = ptr;
1036
832
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
832
                            pattern+pattern[0]);
1038
832
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
832
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
832
                    LASTMARK_RESTORE();
1047
1048
832
                    state->ptr = ptr;
1049
832
                    ret = SRE(count)(state, pattern+3, 1);
1050
832
                    RETURN_ON_ERROR(ret);
1051
832
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
832
                    if (ret == 0)
1053
16
                        break;
1054
832
                    assert(ret == 1);
1055
816
                    ptr++;
1056
816
                    ctx->count++;
1057
816
                }
1058
16
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
16
            }
1061
16
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
16.4M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
16.4M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
16.4M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
16.4M
            ctx->u.rep = repeat_pool_malloc(state);
1127
16.4M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
16.4M
            ctx->u.rep->count = -1;
1131
16.4M
            ctx->u.rep->pattern = pattern;
1132
16.4M
            ctx->u.rep->prev = state->repeat;
1133
16.4M
            ctx->u.rep->last_ptr = NULL;
1134
16.4M
            state->repeat = ctx->u.rep;
1135
1136
16.4M
            state->ptr = ptr;
1137
16.4M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
16.4M
            state->repeat = ctx->u.rep->prev;
1139
16.4M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
16.4M
            if (ret) {
1142
16.3M
                RETURN_ON_ERROR(ret);
1143
16.3M
                RETURN_SUCCESS;
1144
16.3M
            }
1145
102k
            RETURN_FAILURE;
1146
1147
65.7M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
65.7M
            ctx->u.rep = state->repeat;
1155
65.7M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
65.7M
            state->ptr = ptr;
1159
1160
65.7M
            ctx->count = ctx->u.rep->count+1;
1161
1162
65.7M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
65.7M
                   ptr, ctx->count));
1164
1165
65.7M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
16.7k
                ctx->u.rep->count = ctx->count;
1168
16.7k
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
16.7k
                        ctx->u.rep->pattern+3);
1170
16.7k
                if (ret) {
1171
3.61k
                    RETURN_ON_ERROR(ret);
1172
3.61k
                    RETURN_SUCCESS;
1173
3.61k
                }
1174
13.1k
                ctx->u.rep->count = ctx->count-1;
1175
13.1k
                state->ptr = ptr;
1176
13.1k
                RETURN_FAILURE;
1177
13.1k
            }
1178
1179
65.7M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
8.13M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
57.6M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
57.6M
                ctx->u.rep->count = ctx->count;
1185
57.6M
                LASTMARK_SAVE();
1186
57.6M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
57.6M
                LAST_PTR_PUSH();
1189
57.6M
                ctx->u.rep->last_ptr = state->ptr;
1190
57.6M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
57.6M
                        ctx->u.rep->pattern+3);
1192
57.6M
                LAST_PTR_POP();
1193
57.6M
                if (ret) {
1194
49.1M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
49.1M
                    RETURN_ON_ERROR(ret);
1196
49.1M
                    RETURN_SUCCESS;
1197
49.1M
                }
1198
8.49M
                MARK_POP(ctx->lastmark);
1199
8.49M
                LASTMARK_RESTORE();
1200
8.49M
                ctx->u.rep->count = ctx->count-1;
1201
8.49M
                state->ptr = ptr;
1202
8.49M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
16.6M
            state->repeat = ctx->u.rep->prev;
1207
16.6M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
16.6M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
16.6M
            RETURN_ON_SUCCESS(ret);
1211
323k
            state->ptr = ptr;
1212
323k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.66M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.66M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.66M
                   ptr, pattern[1]));
1565
3.66M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.66M
            state->ptr = ptr - pattern[1];
1568
3.66M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.66M
            RETURN_ON_FAILURE(ret);
1570
3.36M
            pattern += pattern[0];
1571
3.36M
            DISPATCH;
1572
1573
20.4M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
20.4M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
20.4M
                   ptr, pattern[1]));
1578
20.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
20.4M
                state->ptr = ptr - pattern[1];
1580
20.4M
                LASTMARK_SAVE();
1581
20.4M
                if (state->repeat)
1582
20.4M
                    MARK_PUSH(ctx->lastmark);
1583
1584
40.8M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
40.8M
                if (ret) {
1586
2.16M
                    if (state->repeat)
1587
2.16M
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.16M
                    RETURN_ON_ERROR(ret);
1589
2.16M
                    RETURN_FAILURE;
1590
2.16M
                }
1591
18.2M
                if (state->repeat)
1592
18.2M
                    MARK_POP(ctx->lastmark);
1593
18.2M
                LASTMARK_RESTORE();
1594
18.2M
            }
1595
18.2M
            pattern += pattern[0];
1596
18.2M
            DISPATCH;
1597
1598
18.2M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
366M
exit:
1620
366M
    ctx_pos = ctx->last_ctx_pos;
1621
366M
    jump = ctx->jump;
1622
366M
    DATA_POP_DISCARD(ctx);
1623
366M
    if (ctx_pos == -1) {
1624
128M
        state->sigcount = sigcount;
1625
128M
        return ret;
1626
128M
    }
1627
237M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
237M
    switch (jump) {
1630
57.6M
        case JUMP_MAX_UNTIL_2:
1631
57.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
57.6M
            goto jump_max_until_2;
1633
16.6M
        case JUMP_MAX_UNTIL_3:
1634
16.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
16.6M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
58.7M
        case JUMP_BRANCH:
1643
58.7M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
58.7M
            goto jump_branch;
1645
16.7k
        case JUMP_MAX_UNTIL_1:
1646
16.7k
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
16.7k
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
16.4M
        case JUMP_REPEAT:
1658
16.4M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
16.4M
            goto jump_repeat;
1660
4.46M
        case JUMP_REPEAT_ONE_1:
1661
4.46M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.46M
            goto jump_repeat_one_1;
1663
59.9M
        case JUMP_REPEAT_ONE_2:
1664
59.9M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
59.9M
            goto jump_repeat_one_2;
1666
832
        case JUMP_MIN_REPEAT_ONE:
1667
832
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
832
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.66M
        case JUMP_ASSERT:
1673
3.66M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.66M
            goto jump_assert;
1675
20.4M
        case JUMP_ASSERT_NOT:
1676
20.4M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
20.4M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
237M
    }
1683
1684
0
    return ret; /* should never get here */
1685
237M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
190M
{
601
190M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
190M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
190M
    Py_ssize_t ret = 0;
604
190M
    int jump;
605
190M
    unsigned int sigcount = state->sigcount;
606
607
190M
    SRE(match_context)* ctx;
608
190M
    SRE(match_context)* nextctx;
609
190M
    INIT_TRACE(state);
610
611
190M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
190M
    DATA_ALLOC(SRE(match_context), ctx);
614
190M
    ctx->last_ctx_pos = -1;
615
190M
    ctx->jump = JUMP_NONE;
616
190M
    ctx->toplevel = toplevel;
617
190M
    ctx_pos = alloc_pos;
618
619
190M
#if USE_COMPUTED_GOTOS
620
190M
#include "sre_targets.h"
621
190M
#endif
622
623
446M
entrance:
624
625
446M
    ;  // Fashion statement.
626
446M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
446M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
14.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
110k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
110k
                   end - ptr, (size_t) pattern[3]));
634
110k
            RETURN_FAILURE;
635
110k
        }
636
14.1M
        pattern += pattern[1] + 1;
637
14.1M
    }
638
639
446M
#if USE_COMPUTED_GOTOS
640
446M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
446M
    {
647
648
446M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
158M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
158M
                   ptr, pattern[0]));
653
158M
            {
654
158M
                int i = pattern[0];
655
158M
                if (i & 1)
656
17.6M
                    state->lastindex = i/2 + 1;
657
158M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
157M
                    int j = state->lastmark + 1;
663
160M
                    while (j < i)
664
3.49M
                        state->mark[j++] = NULL;
665
157M
                    state->lastmark = i;
666
157M
                }
667
158M
                state->mark[i] = ptr;
668
158M
            }
669
158M
            pattern++;
670
158M
            DISPATCH;
671
672
158M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
34.0M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
34.0M
                   ptr, *pattern));
677
34.0M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
17.5M
                RETURN_FAILURE;
679
16.4M
            pattern++;
680
16.4M
            ptr++;
681
16.4M
            DISPATCH;
682
683
16.4M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
65.5M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
65.5M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
65.5M
            if (ctx->toplevel &&
698
9.61M
                ((state->match_all && ptr != state->end) ||
699
9.61M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
65.5M
            state->ptr = ptr;
704
65.5M
            RETURN_SUCCESS;
705
706
4.29M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
4.29M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
4.29M
            if (!SRE(at)(state, ptr, *pattern))
711
1.58M
                RETURN_FAILURE;
712
2.70M
            pattern++;
713
2.70M
            DISPATCH;
714
715
2.70M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
178M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
178M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
178M
            if (ptr >= end ||
749
175M
                !SRE(charset)(state, pattern + 1, *ptr))
750
56.0M
                RETURN_FAILURE;
751
122M
            pattern += pattern[0];
752
122M
            ptr++;
753
122M
            DISPATCH;
754
755
122M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.34M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.34M
                   pattern, ptr, pattern[0]));
758
4.34M
            if (ptr >= end ||
759
4.34M
                sre_lower_ascii(*ptr) != *pattern)
760
29.8k
                RETURN_FAILURE;
761
4.31M
            pattern++;
762
4.31M
            ptr++;
763
4.31M
            DISPATCH;
764
765
4.31M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
19.7M
        TARGET(SRE_OP_JUMP):
845
19.7M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
19.7M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
19.7M
                   ptr, pattern[0]));
850
19.7M
            pattern += pattern[0];
851
19.7M
            DISPATCH;
852
853
25.2M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
25.2M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
25.2M
            LASTMARK_SAVE();
858
25.2M
            if (state->repeat)
859
19.7M
                MARK_PUSH(ctx->lastmark);
860
53.7M
            for (; pattern[0]; pattern += pattern[0]) {
861
47.8M
                if (pattern[1] == SRE_OP_LITERAL &&
862
22.6M
                    (ptr >= end ||
863
22.5M
                     (SRE_CODE) *ptr != pattern[2]))
864
14.2M
                    continue;
865
33.6M
                if (pattern[1] == SRE_OP_IN &&
866
17.1M
                    (ptr >= end ||
867
17.1M
                     !SRE(charset)(state, pattern + 3,
868
17.1M
                                   (SRE_CODE) *ptr)))
869
9.55M
                    continue;
870
24.0M
                state->ptr = ptr;
871
24.0M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
24.0M
                if (ret) {
873
19.3M
                    if (state->repeat)
874
16.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
19.3M
                    RETURN_ON_ERROR(ret);
876
19.3M
                    RETURN_SUCCESS;
877
19.3M
                }
878
4.69M
                if (state->repeat)
879
2.71k
                    MARK_POP_KEEP(ctx->lastmark);
880
4.69M
                LASTMARK_RESTORE();
881
4.69M
            }
882
5.91M
            if (state->repeat)
883
3.38M
                MARK_POP_DISCARD(ctx->lastmark);
884
5.91M
            RETURN_FAILURE;
885
886
173M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
173M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
173M
                   pattern[1], pattern[2]));
898
899
173M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.31M
                RETURN_FAILURE; /* cannot match */
901
902
172M
            state->ptr = ptr;
903
904
172M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
172M
            RETURN_ON_ERROR(ret);
906
172M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
172M
            ctx->count = ret;
908
172M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
172M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
124M
                RETURN_FAILURE;
917
918
47.7M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
3.81M
                ptr == state->end &&
920
18.7k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
18.7k
            {
922
                /* tail is empty.  we're finished */
923
18.7k
                state->ptr = ptr;
924
18.7k
                RETURN_SUCCESS;
925
18.7k
            }
926
927
47.7M
            LASTMARK_SAVE();
928
47.7M
            if (state->repeat)
929
30.1M
                MARK_PUSH(ctx->lastmark);
930
931
47.7M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.54M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.54M
                for (;;) {
936
14.1M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
11.0M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
7.56M
                        ptr--;
939
7.56M
                        ctx->count--;
940
7.56M
                    }
941
6.54M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
3.03M
                        break;
943
3.50M
                    state->ptr = ptr;
944
3.50M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.50M
                            pattern+pattern[0]);
946
3.50M
                    if (ret) {
947
3.50M
                        if (state->repeat)
948
3.46M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.50M
                        RETURN_ON_ERROR(ret);
950
3.50M
                        RETURN_SUCCESS;
951
3.50M
                    }
952
310
                    if (state->repeat)
953
310
                        MARK_POP_KEEP(ctx->lastmark);
954
310
                    LASTMARK_RESTORE();
955
956
310
                    ptr--;
957
310
                    ctx->count--;
958
310
                }
959
3.03M
                if (state->repeat)
960
3.02M
                    MARK_POP_DISCARD(ctx->lastmark);
961
41.1M
            } else {
962
                /* general case */
963
48.6M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
48.4M
                    state->ptr = ptr;
965
48.4M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
48.4M
                            pattern+pattern[0]);
967
48.4M
                    if (ret) {
968
40.9M
                        if (state->repeat)
969
23.4M
                            MARK_POP_DISCARD(ctx->lastmark);
970
40.9M
                        RETURN_ON_ERROR(ret);
971
40.9M
                        RETURN_SUCCESS;
972
40.9M
                    }
973
7.44M
                    if (state->repeat)
974
232k
                        MARK_POP_KEEP(ctx->lastmark);
975
7.44M
                    LASTMARK_RESTORE();
976
977
7.44M
                    ptr--;
978
7.44M
                    ctx->count--;
979
7.44M
                }
980
197k
                if (state->repeat)
981
157k
                    MARK_POP_DISCARD(ctx->lastmark);
982
197k
            }
983
3.23M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
15.2M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
15.2M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
15.2M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
15.2M
            ctx->u.rep = repeat_pool_malloc(state);
1127
15.2M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
15.2M
            ctx->u.rep->count = -1;
1131
15.2M
            ctx->u.rep->pattern = pattern;
1132
15.2M
            ctx->u.rep->prev = state->repeat;
1133
15.2M
            ctx->u.rep->last_ptr = NULL;
1134
15.2M
            state->repeat = ctx->u.rep;
1135
1136
15.2M
            state->ptr = ptr;
1137
15.2M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
15.2M
            state->repeat = ctx->u.rep->prev;
1139
15.2M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
15.2M
            if (ret) {
1142
15.2M
                RETURN_ON_ERROR(ret);
1143
15.2M
                RETURN_SUCCESS;
1144
15.2M
            }
1145
3.71k
            RETURN_FAILURE;
1146
1147
88.4M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
88.4M
            ctx->u.rep = state->repeat;
1155
88.4M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
88.4M
            state->ptr = ptr;
1159
1160
88.4M
            ctx->count = ctx->u.rep->count+1;
1161
1162
88.4M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
88.4M
                   ptr, ctx->count));
1164
1165
88.4M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
125k
                ctx->u.rep->count = ctx->count;
1168
125k
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
125k
                        ctx->u.rep->pattern+3);
1170
125k
                if (ret) {
1171
123k
                    RETURN_ON_ERROR(ret);
1172
123k
                    RETURN_SUCCESS;
1173
123k
                }
1174
2.72k
                ctx->u.rep->count = ctx->count-1;
1175
2.72k
                state->ptr = ptr;
1176
2.72k
                RETURN_FAILURE;
1177
2.72k
            }
1178
1179
88.3M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.39M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
84.9M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
84.9M
                ctx->u.rep->count = ctx->count;
1185
84.9M
                LASTMARK_SAVE();
1186
84.9M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
84.9M
                LAST_PTR_PUSH();
1189
84.9M
                ctx->u.rep->last_ptr = state->ptr;
1190
84.9M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
84.9M
                        ctx->u.rep->pattern+3);
1192
84.9M
                LAST_PTR_POP();
1193
84.9M
                if (ret) {
1194
72.9M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
72.9M
                    RETURN_ON_ERROR(ret);
1196
72.9M
                    RETURN_SUCCESS;
1197
72.9M
                }
1198
11.9M
                MARK_POP(ctx->lastmark);
1199
11.9M
                LASTMARK_RESTORE();
1200
11.9M
                ctx->u.rep->count = ctx->count-1;
1201
11.9M
                state->ptr = ptr;
1202
11.9M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
15.3M
            state->repeat = ctx->u.rep->prev;
1207
15.3M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
15.3M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
15.3M
            RETURN_ON_SUCCESS(ret);
1211
73.0k
            state->ptr = ptr;
1212
73.0k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
9.63M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
9.63M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
9.63M
                   ptr, pattern[1]));
1565
9.63M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
9.63M
            state->ptr = ptr - pattern[1];
1568
9.63M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
9.63M
            RETURN_ON_FAILURE(ret);
1570
6.69M
            pattern += pattern[0];
1571
6.69M
            DISPATCH;
1572
1573
54.3M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
54.3M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
54.3M
                   ptr, pattern[1]));
1578
54.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
54.3M
                state->ptr = ptr - pattern[1];
1580
54.3M
                LASTMARK_SAVE();
1581
54.3M
                if (state->repeat)
1582
54.3M
                    MARK_PUSH(ctx->lastmark);
1583
1584
108M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
108M
                if (ret) {
1586
84.2k
                    if (state->repeat)
1587
84.2k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
84.2k
                    RETURN_ON_ERROR(ret);
1589
84.2k
                    RETURN_FAILURE;
1590
84.2k
                }
1591
54.2M
                if (state->repeat)
1592
54.2M
                    MARK_POP(ctx->lastmark);
1593
54.2M
                LASTMARK_RESTORE();
1594
54.2M
            }
1595
54.2M
            pattern += pattern[0];
1596
54.2M
            DISPATCH;
1597
1598
54.2M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
446M
exit:
1620
446M
    ctx_pos = ctx->last_ctx_pos;
1621
446M
    jump = ctx->jump;
1622
446M
    DATA_POP_DISCARD(ctx);
1623
446M
    if (ctx_pos == -1) {
1624
190M
        state->sigcount = sigcount;
1625
190M
        return ret;
1626
190M
    }
1627
255M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
255M
    switch (jump) {
1630
84.9M
        case JUMP_MAX_UNTIL_2:
1631
84.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
84.9M
            goto jump_max_until_2;
1633
15.3M
        case JUMP_MAX_UNTIL_3:
1634
15.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
15.3M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
24.0M
        case JUMP_BRANCH:
1643
24.0M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
24.0M
            goto jump_branch;
1645
125k
        case JUMP_MAX_UNTIL_1:
1646
125k
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
125k
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
15.2M
        case JUMP_REPEAT:
1658
15.2M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
15.2M
            goto jump_repeat;
1660
3.50M
        case JUMP_REPEAT_ONE_1:
1661
3.50M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.50M
            goto jump_repeat_one_1;
1663
48.4M
        case JUMP_REPEAT_ONE_2:
1664
48.4M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
48.4M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
9.63M
        case JUMP_ASSERT:
1673
9.63M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
9.63M
            goto jump_assert;
1675
54.3M
        case JUMP_ASSERT_NOT:
1676
54.3M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
54.3M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
255M
    }
1683
1684
0
    return ret; /* should never get here */
1685
255M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
47.9M
{
601
47.9M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
47.9M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
47.9M
    Py_ssize_t ret = 0;
604
47.9M
    int jump;
605
47.9M
    unsigned int sigcount = state->sigcount;
606
607
47.9M
    SRE(match_context)* ctx;
608
47.9M
    SRE(match_context)* nextctx;
609
47.9M
    INIT_TRACE(state);
610
611
47.9M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
47.9M
    DATA_ALLOC(SRE(match_context), ctx);
614
47.9M
    ctx->last_ctx_pos = -1;
615
47.9M
    ctx->jump = JUMP_NONE;
616
47.9M
    ctx->toplevel = toplevel;
617
47.9M
    ctx_pos = alloc_pos;
618
619
47.9M
#if USE_COMPUTED_GOTOS
620
47.9M
#include "sre_targets.h"
621
47.9M
#endif
622
623
249M
entrance:
624
625
249M
    ;  // Fashion statement.
626
249M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
249M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
9.16M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.82k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.82k
                   end - ptr, (size_t) pattern[3]));
634
3.82k
            RETURN_FAILURE;
635
3.82k
        }
636
9.16M
        pattern += pattern[1] + 1;
637
9.16M
    }
638
639
249M
#if USE_COMPUTED_GOTOS
640
249M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
249M
    {
647
648
249M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
64.4M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
64.4M
                   ptr, pattern[0]));
653
64.4M
            {
654
64.4M
                int i = pattern[0];
655
64.4M
                if (i & 1)
656
19.6M
                    state->lastindex = i/2 + 1;
657
64.4M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
62.1M
                    int j = state->lastmark + 1;
663
64.4M
                    while (j < i)
664
2.30M
                        state->mark[j++] = NULL;
665
62.1M
                    state->lastmark = i;
666
62.1M
                }
667
64.4M
                state->mark[i] = ptr;
668
64.4M
            }
669
64.4M
            pattern++;
670
64.4M
            DISPATCH;
671
672
64.4M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
30.7M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
30.7M
                   ptr, *pattern));
677
30.7M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
13.5M
                RETURN_FAILURE;
679
17.1M
            pattern++;
680
17.1M
            ptr++;
681
17.1M
            DISPATCH;
682
683
17.1M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
25.3M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
25.3M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
25.3M
            if (ctx->toplevel &&
698
7.02M
                ((state->match_all && ptr != state->end) ||
699
7.02M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
25.3M
            state->ptr = ptr;
704
25.3M
            RETURN_SUCCESS;
705
706
1.81M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
1.81M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
1.81M
            if (!SRE(at)(state, ptr, *pattern))
711
1.79M
                RETURN_FAILURE;
712
28.6k
            pattern++;
713
28.6k
            DISPATCH;
714
715
28.6k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
66.1M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
66.1M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
66.1M
            if (ptr >= end ||
749
66.1M
                !SRE(charset)(state, pattern + 1, *ptr))
750
14.0M
                RETURN_FAILURE;
751
52.1M
            pattern += pattern[0];
752
52.1M
            ptr++;
753
52.1M
            DISPATCH;
754
755
52.1M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.86M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.86M
                   pattern, ptr, pattern[0]));
758
2.86M
            if (ptr >= end ||
759
2.86M
                sre_lower_ascii(*ptr) != *pattern)
760
34.5k
                RETURN_FAILURE;
761
2.82M
            pattern++;
762
2.82M
            ptr++;
763
2.82M
            DISPATCH;
764
765
2.82M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
25.2M
        TARGET(SRE_OP_JUMP):
845
25.2M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
25.2M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
25.2M
                   ptr, pattern[0]));
850
25.2M
            pattern += pattern[0];
851
25.2M
            DISPATCH;
852
853
30.3M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
30.3M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
30.3M
            LASTMARK_SAVE();
858
30.3M
            if (state->repeat)
859
27.2M
                MARK_PUSH(ctx->lastmark);
860
65.1M
            for (; pattern[0]; pattern += pattern[0]) {
861
59.4M
                if (pattern[1] == SRE_OP_LITERAL &&
862
30.2M
                    (ptr >= end ||
863
30.2M
                     (SRE_CODE) *ptr != pattern[2]))
864
22.2M
                    continue;
865
37.1M
                if (pattern[1] == SRE_OP_IN &&
866
21.2M
                    (ptr >= end ||
867
21.2M
                     !SRE(charset)(state, pattern + 3,
868
21.2M
                                   (SRE_CODE) *ptr)))
869
11.6M
                    continue;
870
25.4M
                state->ptr = ptr;
871
25.4M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
25.4M
                if (ret) {
873
24.6M
                    if (state->repeat)
874
22.2M
                        MARK_POP_DISCARD(ctx->lastmark);
875
24.6M
                    RETURN_ON_ERROR(ret);
876
24.6M
                    RETURN_SUCCESS;
877
24.6M
                }
878
882k
                if (state->repeat)
879
7.83k
                    MARK_POP_KEEP(ctx->lastmark);
880
882k
                LASTMARK_RESTORE();
881
882k
            }
882
5.74M
            if (state->repeat)
883
5.06M
                MARK_POP_DISCARD(ctx->lastmark);
884
5.74M
            RETURN_FAILURE;
885
886
87.1M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
87.1M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
87.1M
                   pattern[1], pattern[2]));
898
899
87.1M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
16.7k
                RETURN_FAILURE; /* cannot match */
901
902
87.0M
            state->ptr = ptr;
903
904
87.0M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
87.0M
            RETURN_ON_ERROR(ret);
906
87.0M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
87.0M
            ctx->count = ret;
908
87.0M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
87.0M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
29.6M
                RETURN_FAILURE;
917
918
57.4M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.26M
                ptr == state->end &&
920
5.72k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
5.72k
            {
922
                /* tail is empty.  we're finished */
923
5.72k
                state->ptr = ptr;
924
5.72k
                RETURN_SUCCESS;
925
5.72k
            }
926
927
57.4M
            LASTMARK_SAVE();
928
57.4M
            if (state->repeat)
929
44.6M
                MARK_PUSH(ctx->lastmark);
930
931
57.4M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
11.1M
                ctx->u.chr = pattern[pattern[0]+1];
935
11.1M
                for (;;) {
936
30.4M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
25.6M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
19.2M
                        ptr--;
939
19.2M
                        ctx->count--;
940
19.2M
                    }
941
11.1M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
4.78M
                        break;
943
6.39M
                    state->ptr = ptr;
944
6.39M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
6.39M
                            pattern+pattern[0]);
946
6.39M
                    if (ret) {
947
6.39M
                        if (state->repeat)
948
6.39M
                            MARK_POP_DISCARD(ctx->lastmark);
949
6.39M
                        RETURN_ON_ERROR(ret);
950
6.39M
                        RETURN_SUCCESS;
951
6.39M
                    }
952
304
                    if (state->repeat)
953
304
                        MARK_POP_KEEP(ctx->lastmark);
954
304
                    LASTMARK_RESTORE();
955
956
304
                    ptr--;
957
304
                    ctx->count--;
958
304
                }
959
4.78M
                if (state->repeat)
960
4.78M
                    MARK_POP_DISCARD(ctx->lastmark);
961
46.2M
            } else {
962
                /* general case */
963
53.1M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
53.0M
                    state->ptr = ptr;
965
53.0M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
53.0M
                            pattern+pattern[0]);
967
53.0M
                    if (ret) {
968
46.1M
                        if (state->repeat)
969
33.4M
                            MARK_POP_DISCARD(ctx->lastmark);
970
46.1M
                        RETURN_ON_ERROR(ret);
971
46.1M
                        RETURN_SUCCESS;
972
46.1M
                    }
973
6.91M
                    if (state->repeat)
974
100k
                        MARK_POP_KEEP(ctx->lastmark);
975
6.91M
                    LASTMARK_RESTORE();
976
977
6.91M
                    ptr--;
978
6.91M
                    ctx->count--;
979
6.91M
                }
980
77.2k
                if (state->repeat)
981
66.2k
                    MARK_POP_DISCARD(ctx->lastmark);
982
77.2k
            }
983
4.86M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
16.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
16.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
16.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
16.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
16.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
16.8M
            ctx->u.rep->count = -1;
1131
16.8M
            ctx->u.rep->pattern = pattern;
1132
16.8M
            ctx->u.rep->prev = state->repeat;
1133
16.8M
            ctx->u.rep->last_ptr = NULL;
1134
16.8M
            state->repeat = ctx->u.rep;
1135
1136
16.8M
            state->ptr = ptr;
1137
16.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
16.8M
            state->repeat = ctx->u.rep->prev;
1139
16.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
16.8M
            if (ret) {
1142
16.8M
                RETURN_ON_ERROR(ret);
1143
16.8M
                RETURN_SUCCESS;
1144
16.8M
            }
1145
451
            RETURN_FAILURE;
1146
1147
59.1M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
59.1M
            ctx->u.rep = state->repeat;
1155
59.1M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
59.1M
            state->ptr = ptr;
1159
1160
59.1M
            ctx->count = ctx->u.rep->count+1;
1161
1162
59.1M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
59.1M
                   ptr, ctx->count));
1164
1165
59.1M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
2.99k
                ctx->u.rep->count = ctx->count;
1168
2.99k
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
2.99k
                        ctx->u.rep->pattern+3);
1170
2.99k
                if (ret) {
1171
2.94k
                    RETURN_ON_ERROR(ret);
1172
2.94k
                    RETURN_SUCCESS;
1173
2.94k
                }
1174
49
                ctx->u.rep->count = ctx->count-1;
1175
49
                state->ptr = ptr;
1176
49
                RETURN_FAILURE;
1177
49
            }
1178
1179
59.1M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
6.11M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
53.0M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
53.0M
                ctx->u.rep->count = ctx->count;
1185
53.0M
                LASTMARK_SAVE();
1186
53.0M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
53.0M
                LAST_PTR_PUSH();
1189
53.0M
                ctx->u.rep->last_ptr = state->ptr;
1190
53.0M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
53.0M
                        ctx->u.rep->pattern+3);
1192
53.0M
                LAST_PTR_POP();
1193
53.0M
                if (ret) {
1194
42.2M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
42.2M
                    RETURN_ON_ERROR(ret);
1196
42.2M
                    RETURN_SUCCESS;
1197
42.2M
                }
1198
10.7M
                MARK_POP(ctx->lastmark);
1199
10.7M
                LASTMARK_RESTORE();
1200
10.7M
                ctx->u.rep->count = ctx->count-1;
1201
10.7M
                state->ptr = ptr;
1202
10.7M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
16.8M
            state->repeat = ctx->u.rep->prev;
1207
16.8M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
16.8M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
16.8M
            RETURN_ON_SUCCESS(ret);
1211
34.9k
            state->ptr = ptr;
1212
34.9k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
12.7M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
12.7M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
12.7M
                   ptr, pattern[1]));
1565
12.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
12.7M
            state->ptr = ptr - pattern[1];
1568
12.7M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
12.7M
            RETURN_ON_FAILURE(ret);
1570
11.1M
            pattern += pattern[0];
1571
11.1M
            DISPATCH;
1572
1573
17.3M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
17.3M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
17.3M
                   ptr, pattern[1]));
1578
17.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
17.3M
                state->ptr = ptr - pattern[1];
1580
17.3M
                LASTMARK_SAVE();
1581
17.3M
                if (state->repeat)
1582
17.3M
                    MARK_PUSH(ctx->lastmark);
1583
1584
34.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
34.7M
                if (ret) {
1586
13.1k
                    if (state->repeat)
1587
13.1k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
13.1k
                    RETURN_ON_ERROR(ret);
1589
13.1k
                    RETURN_FAILURE;
1590
13.1k
                }
1591
17.3M
                if (state->repeat)
1592
17.3M
                    MARK_POP(ctx->lastmark);
1593
17.3M
                LASTMARK_RESTORE();
1594
17.3M
            }
1595
17.3M
            pattern += pattern[0];
1596
17.3M
            DISPATCH;
1597
1598
17.3M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
249M
exit:
1620
249M
    ctx_pos = ctx->last_ctx_pos;
1621
249M
    jump = ctx->jump;
1622
249M
    DATA_POP_DISCARD(ctx);
1623
249M
    if (ctx_pos == -1) {
1624
47.9M
        state->sigcount = sigcount;
1625
47.9M
        return ret;
1626
47.9M
    }
1627
201M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
201M
    switch (jump) {
1630
53.0M
        case JUMP_MAX_UNTIL_2:
1631
53.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
53.0M
            goto jump_max_until_2;
1633
16.8M
        case JUMP_MAX_UNTIL_3:
1634
16.8M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
16.8M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
25.4M
        case JUMP_BRANCH:
1643
25.4M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
25.4M
            goto jump_branch;
1645
2.99k
        case JUMP_MAX_UNTIL_1:
1646
2.99k
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
2.99k
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
16.8M
        case JUMP_REPEAT:
1658
16.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
16.8M
            goto jump_repeat;
1660
6.39M
        case JUMP_REPEAT_ONE_1:
1661
6.39M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
6.39M
            goto jump_repeat_one_1;
1663
53.0M
        case JUMP_REPEAT_ONE_2:
1664
53.0M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
53.0M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
12.7M
        case JUMP_ASSERT:
1673
12.7M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
12.7M
            goto jump_assert;
1675
17.3M
        case JUMP_ASSERT_NOT:
1676
17.3M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
17.3M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
201M
    }
1683
1684
0
    return ret; /* should never get here */
1685
201M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
208M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
110M
{
1694
110M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
110M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
110M
    Py_ssize_t status = 0;
1697
110M
    Py_ssize_t prefix_len = 0;
1698
110M
    Py_ssize_t prefix_skip = 0;
1699
110M
    SRE_CODE* prefix = NULL;
1700
110M
    SRE_CODE* charset = NULL;
1701
110M
    SRE_CODE* overlap = NULL;
1702
110M
    int flags = 0;
1703
110M
    INIT_TRACE(state);
1704
1705
110M
    if (ptr > end)
1706
0
        return 0;
1707
1708
110M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
110M
        flags = pattern[2];
1713
1714
110M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.62M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.62M
                   end - ptr, (size_t) pattern[3]));
1717
5.62M
            return 0;
1718
5.62M
        }
1719
104M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
8.00M
            end -= pattern[3] - 1;
1723
8.00M
            if (end <= ptr)
1724
0
                end = ptr;
1725
8.00M
        }
1726
1727
104M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
8.01M
            prefix_len = pattern[5];
1731
8.01M
            prefix_skip = pattern[6];
1732
8.01M
            prefix = pattern + 7;
1733
8.01M
            overlap = prefix + prefix_len - 1;
1734
96.7M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
88.7M
            charset = pattern + 5;
1738
1739
104M
        pattern += 1 + pattern[1];
1740
104M
    }
1741
1742
104M
    TRACE(("prefix = %p %zd %zd\n",
1743
104M
           prefix, prefix_len, prefix_skip));
1744
104M
    TRACE(("charset = %p\n", charset));
1745
1746
104M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
7.16M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
4.79M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
4.79M
#endif
1753
4.79M
        end = (SRE_CHAR *)state->end;
1754
4.79M
        state->must_advance = 0;
1755
7.99M
        while (ptr < end) {
1756
117M
            while (*ptr != c) {
1757
109M
                if (++ptr >= end)
1758
464k
                    return 0;
1759
109M
            }
1760
7.37M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
7.37M
            state->start = ptr;
1762
7.37M
            state->ptr = ptr + prefix_skip;
1763
7.37M
            if (flags & SRE_INFO_LITERAL)
1764
7.11k
                return 1; /* we got all of it */
1765
7.37M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
7.37M
            if (status != 0)
1767
6.54M
                return status;
1768
828k
            ++ptr;
1769
828k
            RESET_CAPTURE_GROUP();
1770
828k
        }
1771
147k
        return 0;
1772
4.79M
    }
1773
1774
97.6M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
849k
        Py_ssize_t i = 0;
1778
1779
849k
        end = (SRE_CHAR *)state->end;
1780
849k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.67M
        for (i = 0; i < prefix_len; i++)
1784
1.11M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
559k
#endif
1787
1.76M
        while (ptr < end) {
1788
1.76M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
21.8M
            while (*ptr++ != c) {
1790
20.1M
                if (ptr >= end)
1791
369
                    return 0;
1792
20.1M
            }
1793
1.76M
            if (ptr >= end)
1794
56
                return 0;
1795
1796
1.76M
            i = 1;
1797
1.76M
            state->must_advance = 0;
1798
1.76M
            do {
1799
1.76M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.64M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.64M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.64M
                    state->start = ptr - (prefix_len - 1);
1808
1.64M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.64M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.64M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.64M
                    if (status != 0)
1813
848k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
791k
                    if (++ptr >= end)
1816
66
                        return 0;
1817
791k
                    RESET_CAPTURE_GROUP();
1818
791k
                }
1819
915k
                i = overlap[i];
1820
915k
            } while (i != 0);
1821
1.76M
        }
1822
0
        return 0;
1823
849k
    }
1824
1825
96.7M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
88.7M
        end = (SRE_CHAR *)state->end;
1828
88.7M
        state->must_advance = 0;
1829
91.3M
        for (;;) {
1830
366M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
275M
                ptr++;
1832
91.3M
            if (ptr >= end)
1833
3.33M
                return 0;
1834
87.9M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
87.9M
            state->start = ptr;
1836
87.9M
            state->ptr = ptr;
1837
87.9M
            status = SRE(match)(state, pattern, 0);
1838
87.9M
            if (status != 0)
1839
85.4M
                break;
1840
2.53M
            ptr++;
1841
2.53M
            RESET_CAPTURE_GROUP();
1842
2.53M
        }
1843
88.7M
    } else {
1844
        /* general case */
1845
8.00M
        assert(ptr <= end);
1846
8.00M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
8.00M
        state->start = state->ptr = ptr;
1848
8.00M
        status = SRE(match)(state, pattern, 1);
1849
8.00M
        state->must_advance = 0;
1850
8.00M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.05M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
75
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
4.05M
        {
1854
4.05M
            state->start = state->ptr = ptr = end;
1855
4.05M
            return 0;
1856
4.05M
        }
1857
207M
        while (status == 0 && ptr < end) {
1858
203M
            ptr++;
1859
203M
            RESET_CAPTURE_GROUP();
1860
203M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
203M
            state->start = state->ptr = ptr;
1862
203M
            status = SRE(match)(state, pattern, 0);
1863
203M
        }
1864
3.95M
    }
1865
1866
89.3M
    return status;
1867
96.7M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
51.3M
{
1694
51.3M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
51.3M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
51.3M
    Py_ssize_t status = 0;
1697
51.3M
    Py_ssize_t prefix_len = 0;
1698
51.3M
    Py_ssize_t prefix_skip = 0;
1699
51.3M
    SRE_CODE* prefix = NULL;
1700
51.3M
    SRE_CODE* charset = NULL;
1701
51.3M
    SRE_CODE* overlap = NULL;
1702
51.3M
    int flags = 0;
1703
51.3M
    INIT_TRACE(state);
1704
1705
51.3M
    if (ptr > end)
1706
0
        return 0;
1707
1708
51.3M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
51.3M
        flags = pattern[2];
1713
1714
51.3M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.51M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.51M
                   end - ptr, (size_t) pattern[3]));
1717
5.51M
            return 0;
1718
5.51M
        }
1719
45.7M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
1.94M
            end -= pattern[3] - 1;
1723
1.94M
            if (end <= ptr)
1724
0
                end = ptr;
1725
1.94M
        }
1726
1727
45.7M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
1.94M
            prefix_len = pattern[5];
1731
1.94M
            prefix_skip = pattern[6];
1732
1.94M
            prefix = pattern + 7;
1733
1.94M
            overlap = prefix + prefix_len - 1;
1734
43.8M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
38.1M
            charset = pattern + 5;
1738
1739
45.7M
        pattern += 1 + pattern[1];
1740
45.7M
    }
1741
1742
45.7M
    TRACE(("prefix = %p %zd %zd\n",
1743
45.7M
           prefix, prefix_len, prefix_skip));
1744
45.7M
    TRACE(("charset = %p\n", charset));
1745
1746
45.7M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.86M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.86M
#if SIZEOF_SRE_CHAR < 4
1750
1.86M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.86M
#endif
1753
1.86M
        end = (SRE_CHAR *)state->end;
1754
1.86M
        state->must_advance = 0;
1755
2.24M
        while (ptr < end) {
1756
29.4M
            while (*ptr != c) {
1757
27.7M
                if (++ptr >= end)
1758
371k
                    return 0;
1759
27.7M
            }
1760
1.72M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.72M
            state->start = ptr;
1762
1.72M
            state->ptr = ptr + prefix_skip;
1763
1.72M
            if (flags & SRE_INFO_LITERAL)
1764
582
                return 1; /* we got all of it */
1765
1.72M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.72M
            if (status != 0)
1767
1.35M
                return status;
1768
372k
            ++ptr;
1769
372k
            RESET_CAPTURE_GROUP();
1770
372k
        }
1771
143k
        return 0;
1772
1.86M
    }
1773
1774
43.9M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
75.7k
        Py_ssize_t i = 0;
1778
1779
75.7k
        end = (SRE_CHAR *)state->end;
1780
75.7k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
75.7k
#if SIZEOF_SRE_CHAR < 4
1783
227k
        for (i = 0; i < prefix_len; i++)
1784
151k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
75.7k
#endif
1787
144k
        while (ptr < end) {
1788
144k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.76M
            while (*ptr++ != c) {
1790
2.62M
                if (ptr >= end)
1791
78
                    return 0;
1792
2.62M
            }
1793
144k
            if (ptr >= end)
1794
16
                return 0;
1795
1796
144k
            i = 1;
1797
144k
            state->must_advance = 0;
1798
144k
            do {
1799
144k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
134k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
134k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
134k
                    state->start = ptr - (prefix_len - 1);
1808
134k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
134k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
134k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
134k
                    if (status != 0)
1813
75.6k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
58.7k
                    if (++ptr >= end)
1816
27
                        return 0;
1817
58.7k
                    RESET_CAPTURE_GROUP();
1818
58.7k
                }
1819
68.9k
                i = overlap[i];
1820
68.9k
            } while (i != 0);
1821
144k
        }
1822
0
        return 0;
1823
75.7k
    }
1824
1825
43.8M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
38.1M
        end = (SRE_CHAR *)state->end;
1828
38.1M
        state->must_advance = 0;
1829
39.7M
        for (;;) {
1830
104M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
64.5M
                ptr++;
1832
39.7M
            if (ptr >= end)
1833
2.35M
                return 0;
1834
37.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
37.4M
            state->start = ptr;
1836
37.4M
            state->ptr = ptr;
1837
37.4M
            status = SRE(match)(state, pattern, 0);
1838
37.4M
            if (status != 0)
1839
35.8M
                break;
1840
1.57M
            ptr++;
1841
1.57M
            RESET_CAPTURE_GROUP();
1842
1.57M
        }
1843
38.1M
    } else {
1844
        /* general case */
1845
5.65M
        assert(ptr <= end);
1846
5.65M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
5.65M
        state->start = state->ptr = ptr;
1848
5.65M
        status = SRE(match)(state, pattern, 1);
1849
5.65M
        state->must_advance = 0;
1850
5.65M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
2.63M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
21
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
2.63M
        {
1854
2.63M
            state->start = state->ptr = ptr = end;
1855
2.63M
            return 0;
1856
2.63M
        }
1857
51.8M
        while (status == 0 && ptr < end) {
1858
48.8M
            ptr++;
1859
48.8M
            RESET_CAPTURE_GROUP();
1860
48.8M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
48.8M
            state->start = state->ptr = ptr;
1862
48.8M
            status = SRE(match)(state, pattern, 0);
1863
48.8M
        }
1864
3.02M
    }
1865
1866
38.8M
    return status;
1867
43.8M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
51.8M
{
1694
51.8M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
51.8M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
51.8M
    Py_ssize_t status = 0;
1697
51.8M
    Py_ssize_t prefix_len = 0;
1698
51.8M
    Py_ssize_t prefix_skip = 0;
1699
51.8M
    SRE_CODE* prefix = NULL;
1700
51.8M
    SRE_CODE* charset = NULL;
1701
51.8M
    SRE_CODE* overlap = NULL;
1702
51.8M
    int flags = 0;
1703
51.8M
    INIT_TRACE(state);
1704
1705
51.8M
    if (ptr > end)
1706
0
        return 0;
1707
1708
51.8M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
51.8M
        flags = pattern[2];
1713
1714
51.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
103k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
103k
                   end - ptr, (size_t) pattern[3]));
1717
103k
            return 0;
1718
103k
        }
1719
51.7M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.41M
            end -= pattern[3] - 1;
1723
3.41M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.41M
        }
1726
1727
51.7M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.41M
            prefix_len = pattern[5];
1731
3.41M
            prefix_skip = pattern[6];
1732
3.41M
            prefix = pattern + 7;
1733
3.41M
            overlap = prefix + prefix_len - 1;
1734
48.3M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
46.0M
            charset = pattern + 5;
1738
1739
51.7M
        pattern += 1 + pattern[1];
1740
51.7M
    }
1741
1742
51.7M
    TRACE(("prefix = %p %zd %zd\n",
1743
51.7M
           prefix, prefix_len, prefix_skip));
1744
51.7M
    TRACE(("charset = %p\n", charset));
1745
1746
51.7M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.93M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.93M
#if SIZEOF_SRE_CHAR < 4
1750
2.93M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.93M
#endif
1753
2.93M
        end = (SRE_CHAR *)state->end;
1754
2.93M
        state->must_advance = 0;
1755
3.34M
        while (ptr < end) {
1756
58.9M
            while (*ptr != c) {
1757
55.7M
                if (++ptr >= end)
1758
87.0k
                    return 0;
1759
55.7M
            }
1760
3.24M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.24M
            state->start = ptr;
1762
3.24M
            state->ptr = ptr + prefix_skip;
1763
3.24M
            if (flags & SRE_INFO_LITERAL)
1764
3.73k
                return 1; /* we got all of it */
1765
3.24M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.24M
            if (status != 0)
1767
2.83M
                return status;
1768
409k
            ++ptr;
1769
409k
            RESET_CAPTURE_GROUP();
1770
409k
        }
1771
3.46k
        return 0;
1772
2.93M
    }
1773
1774
48.7M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
483k
        Py_ssize_t i = 0;
1778
1779
483k
        end = (SRE_CHAR *)state->end;
1780
483k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
483k
#if SIZEOF_SRE_CHAR < 4
1783
1.44M
        for (i = 0; i < prefix_len; i++)
1784
966k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
483k
#endif
1787
946k
        while (ptr < end) {
1788
946k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
7.19M
            while (*ptr++ != c) {
1790
6.24M
                if (ptr >= end)
1791
137
                    return 0;
1792
6.24M
            }
1793
946k
            if (ptr >= end)
1794
21
                return 0;
1795
1796
946k
            i = 1;
1797
946k
            state->must_advance = 0;
1798
947k
            do {
1799
947k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
914k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
914k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
914k
                    state->start = ptr - (prefix_len - 1);
1808
914k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
914k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
914k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
914k
                    if (status != 0)
1813
483k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
431k
                    if (++ptr >= end)
1816
19
                        return 0;
1817
430k
                    RESET_CAPTURE_GROUP();
1818
430k
                }
1819
464k
                i = overlap[i];
1820
464k
            } while (i != 0);
1821
946k
        }
1822
0
        return 0;
1823
483k
    }
1824
1825
48.3M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
46.0M
        end = (SRE_CHAR *)state->end;
1828
46.0M
        state->must_advance = 0;
1829
46.4M
        for (;;) {
1830
198M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
151M
                ptr++;
1832
46.4M
            if (ptr >= end)
1833
928k
                return 0;
1834
45.5M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
45.5M
            state->start = ptr;
1836
45.5M
            state->ptr = ptr;
1837
45.5M
            status = SRE(match)(state, pattern, 0);
1838
45.5M
            if (status != 0)
1839
45.1M
                break;
1840
363k
            ptr++;
1841
363k
            RESET_CAPTURE_GROUP();
1842
363k
        }
1843
46.0M
    } else {
1844
        /* general case */
1845
2.21M
        assert(ptr <= end);
1846
2.21M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.21M
        state->start = state->ptr = ptr;
1848
2.21M
        status = SRE(match)(state, pattern, 1);
1849
2.21M
        state->must_advance = 0;
1850
2.21M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
1.40M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
28
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
1.40M
        {
1854
1.40M
            state->start = state->ptr = ptr = end;
1855
1.40M
            return 0;
1856
1.40M
        }
1857
125M
        while (status == 0 && ptr < end) {
1858
124M
            ptr++;
1859
124M
            RESET_CAPTURE_GROUP();
1860
124M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
124M
            state->start = state->ptr = ptr;
1862
124M
            status = SRE(match)(state, pattern, 0);
1863
124M
        }
1864
810k
    }
1865
1866
45.9M
    return status;
1867
48.3M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.28M
{
1694
7.28M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.28M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.28M
    Py_ssize_t status = 0;
1697
7.28M
    Py_ssize_t prefix_len = 0;
1698
7.28M
    Py_ssize_t prefix_skip = 0;
1699
7.28M
    SRE_CODE* prefix = NULL;
1700
7.28M
    SRE_CODE* charset = NULL;
1701
7.28M
    SRE_CODE* overlap = NULL;
1702
7.28M
    int flags = 0;
1703
7.28M
    INIT_TRACE(state);
1704
1705
7.28M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.28M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.28M
        flags = pattern[2];
1713
1714
7.28M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.05k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.05k
                   end - ptr, (size_t) pattern[3]));
1717
5.05k
            return 0;
1718
5.05k
        }
1719
7.28M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.65M
            end -= pattern[3] - 1;
1723
2.65M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.65M
        }
1726
1727
7.28M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.65M
            prefix_len = pattern[5];
1731
2.65M
            prefix_skip = pattern[6];
1732
2.65M
            prefix = pattern + 7;
1733
2.65M
            overlap = prefix + prefix_len - 1;
1734
4.62M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
4.49M
            charset = pattern + 5;
1738
1739
7.28M
        pattern += 1 + pattern[1];
1740
7.28M
    }
1741
1742
7.28M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.28M
           prefix, prefix_len, prefix_skip));
1744
7.28M
    TRACE(("charset = %p\n", charset));
1745
1746
7.28M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.36M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.36M
        end = (SRE_CHAR *)state->end;
1754
2.36M
        state->must_advance = 0;
1755
2.40M
        while (ptr < end) {
1756
28.7M
            while (*ptr != c) {
1757
26.3M
                if (++ptr >= end)
1758
5.45k
                    return 0;
1759
26.3M
            }
1760
2.40M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.40M
            state->start = ptr;
1762
2.40M
            state->ptr = ptr + prefix_skip;
1763
2.40M
            if (flags & SRE_INFO_LITERAL)
1764
2.80k
                return 1; /* we got all of it */
1765
2.40M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.40M
            if (status != 0)
1767
2.35M
                return status;
1768
46.2k
            ++ptr;
1769
46.2k
            RESET_CAPTURE_GROUP();
1770
46.2k
        }
1771
712
        return 0;
1772
2.36M
    }
1773
1774
4.91M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
290k
        Py_ssize_t i = 0;
1778
1779
290k
        end = (SRE_CHAR *)state->end;
1780
290k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
671k
        while (ptr < end) {
1788
671k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
11.9M
            while (*ptr++ != c) {
1790
11.2M
                if (ptr >= end)
1791
154
                    return 0;
1792
11.2M
            }
1793
671k
            if (ptr >= end)
1794
19
                return 0;
1795
1796
671k
            i = 1;
1797
671k
            state->must_advance = 0;
1798
671k
            do {
1799
671k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
592k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
592k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
592k
                    state->start = ptr - (prefix_len - 1);
1808
592k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
592k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
592k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
592k
                    if (status != 0)
1813
289k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
302k
                    if (++ptr >= end)
1816
20
                        return 0;
1817
302k
                    RESET_CAPTURE_GROUP();
1818
302k
                }
1819
381k
                i = overlap[i];
1820
381k
            } while (i != 0);
1821
671k
        }
1822
0
        return 0;
1823
290k
    }
1824
1825
4.62M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
4.49M
        end = (SRE_CHAR *)state->end;
1828
4.49M
        state->must_advance = 0;
1829
5.09M
        for (;;) {
1830
63.9M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
58.8M
                ptr++;
1832
5.09M
            if (ptr >= end)
1833
51.7k
                return 0;
1834
5.03M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
5.03M
            state->start = ptr;
1836
5.03M
            state->ptr = ptr;
1837
5.03M
            status = SRE(match)(state, pattern, 0);
1838
5.03M
            if (status != 0)
1839
4.44M
                break;
1840
595k
            ptr++;
1841
595k
            RESET_CAPTURE_GROUP();
1842
595k
        }
1843
4.49M
    } else {
1844
        /* general case */
1845
133k
        assert(ptr <= end);
1846
133k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
133k
        state->start = state->ptr = ptr;
1848
133k
        status = SRE(match)(state, pattern, 1);
1849
133k
        state->must_advance = 0;
1850
133k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
13.8k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
26
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
13.8k
        {
1854
13.8k
            state->start = state->ptr = ptr = end;
1855
13.8k
            return 0;
1856
13.8k
        }
1857
30.7M
        while (status == 0 && ptr < end) {
1858
30.6M
            ptr++;
1859
30.6M
            RESET_CAPTURE_GROUP();
1860
30.6M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
30.6M
            state->start = state->ptr = ptr;
1862
30.6M
            status = SRE(match)(state, pattern, 0);
1863
30.6M
        }
1864
119k
    }
1865
1866
4.56M
    return status;
1867
4.62M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/