Coverage Report

Created: 2026-06-09 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
15.9M
{
18
    /* check if pointer is at given position */
19
20
15.9M
    Py_ssize_t thisp, thatp;
21
22
15.9M
    switch (at) {
23
24
6.22M
    case SRE_AT_BEGINNING:
25
6.22M
    case SRE_AT_BEGINNING_STRING:
26
6.22M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
3.25M
    case SRE_AT_END:
33
3.25M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
23.1k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
3.25M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
6.43M
    case SRE_AT_END_STRING:
42
6.43M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
15.9M
    }
87
88
0
    return 0;
89
15.9M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
12.3M
{
18
    /* check if pointer is at given position */
19
20
12.3M
    Py_ssize_t thisp, thatp;
21
22
12.3M
    switch (at) {
23
24
6.19M
    case SRE_AT_BEGINNING:
25
6.19M
    case SRE_AT_BEGINNING_STRING:
26
6.19M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
3.16M
    case SRE_AT_END:
33
3.16M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
23.0k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
3.16M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.94M
    case SRE_AT_END_STRING:
42
2.94M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
12.3M
    }
87
88
0
    return 0;
89
12.3M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
1.77M
{
18
    /* check if pointer is at given position */
19
20
1.77M
    Py_ssize_t thisp, thatp;
21
22
1.77M
    switch (at) {
23
24
33.4k
    case SRE_AT_BEGINNING:
25
33.4k
    case SRE_AT_BEGINNING_STRING:
26
33.4k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
51.6k
    case SRE_AT_END:
33
51.6k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
49
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
51.6k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.68M
    case SRE_AT_END_STRING:
42
1.68M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
1.77M
    }
87
88
0
    return 0;
89
1.77M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
1.83M
{
18
    /* check if pointer is at given position */
19
20
1.83M
    Py_ssize_t thisp, thatp;
21
22
1.83M
    switch (at) {
23
24
3.57k
    case SRE_AT_BEGINNING:
25
3.57k
    case SRE_AT_BEGINNING_STRING:
26
3.57k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
34.7k
    case SRE_AT_END:
33
34.7k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
68
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
34.7k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.79M
    case SRE_AT_END_STRING:
42
1.79M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
1.83M
    }
87
88
0
    return 0;
89
1.83M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.40G
{
94
    /* check if character is a member of the given set */
95
96
1.40G
    int ok = 1;
97
98
3.17G
    for (;;) {
99
3.17G
        switch (*set++) {
100
101
903M
        case SRE_OP_FAILURE:
102
903M
            return !ok;
103
104
962M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
962M
            if (ch == set[0])
107
6.59M
                return ok;
108
956M
            set++;
109
956M
            break;
110
111
11.1M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
11.1M
            if (sre_category(set[0], (int) ch))
114
8.07M
                return ok;
115
3.03M
            set++;
116
3.03M
            break;
117
118
543M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
543M
            if (ch < 256 &&
121
518M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
198M
                return ok;
123
345M
            set += 256/SRE_CODE_BITS;
124
345M
            break;
125
126
443M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
443M
            if (set[0] <= ch && ch <= set[1])
129
290M
                return ok;
130
153M
            set += 2;
131
153M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
307M
        case SRE_OP_NEGATE:
148
307M
            ok = !ok;
149
307M
            break;
150
151
4
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
4
        {
154
4
            Py_ssize_t count, block;
155
4
            count = *(set++);
156
157
4
            if (ch < 0x10000u)
158
4
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
4
            set += 256/sizeof(SRE_CODE);
162
4
            if (block >=0 &&
163
4
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
4
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
4
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
4
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.17G
        }
175
3.17G
    }
176
1.40G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
429M
{
94
    /* check if character is a member of the given set */
95
96
429M
    int ok = 1;
97
98
841M
    for (;;) {
99
841M
        switch (*set++) {
100
101
212M
        case SRE_OP_FAILURE:
102
212M
            return !ok;
103
104
182M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
182M
            if (ch == set[0])
107
3.21M
                return ok;
108
179M
            set++;
109
179M
            break;
110
111
9.93M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
9.93M
            if (sre_category(set[0], (int) ch))
114
6.91M
                return ok;
115
3.02M
            set++;
116
3.02M
            break;
117
118
135M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
135M
            if (ch < 256 &&
121
135M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
56.7M
                return ok;
123
79.1M
            set += 256/SRE_CODE_BITS;
124
79.1M
            break;
125
126
229M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
229M
            if (set[0] <= ch && ch <= set[1])
129
149M
                return ok;
130
79.5M
            set += 2;
131
79.5M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
71.2M
        case SRE_OP_NEGATE:
148
71.2M
            ok = !ok;
149
71.2M
            break;
150
151
4
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
4
        {
154
4
            Py_ssize_t count, block;
155
4
            count = *(set++);
156
157
4
            if (ch < 0x10000u)
158
4
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
4
            set += 256/sizeof(SRE_CODE);
162
4
            if (block >=0 &&
163
4
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
4
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
4
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
4
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
841M
        }
175
841M
    }
176
429M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
630M
{
94
    /* check if character is a member of the given set */
95
96
630M
    int ok = 1;
97
98
1.51G
    for (;;) {
99
1.51G
        switch (*set++) {
100
101
449M
        case SRE_OP_FAILURE:
102
449M
            return !ok;
103
104
569M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
569M
            if (ch == set[0])
107
2.50M
                return ok;
108
567M
            set++;
109
567M
            break;
110
111
435k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
435k
            if (sre_category(set[0], (int) ch))
114
426k
                return ok;
115
8.48k
            set++;
116
8.48k
            break;
117
118
195M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
195M
            if (ch < 256 &&
121
182M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
59.0M
                return ok;
123
136M
            set += 256/SRE_CODE_BITS;
124
136M
            break;
125
126
180M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
180M
            if (set[0] <= ch && ch <= set[1])
129
119M
                return ok;
130
61.6M
            set += 2;
131
61.6M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
123M
        case SRE_OP_NEGATE:
148
123M
            ok = !ok;
149
123M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.51G
        }
175
1.51G
    }
176
630M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
346M
{
94
    /* check if character is a member of the given set */
95
96
346M
    int ok = 1;
97
98
811M
    for (;;) {
99
811M
        switch (*set++) {
100
101
240M
        case SRE_OP_FAILURE:
102
240M
            return !ok;
103
104
210M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
210M
            if (ch == set[0])
107
882k
                return ok;
108
209M
            set++;
109
209M
            break;
110
111
734k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
734k
            if (sre_category(set[0], (int) ch))
114
734k
                return ok;
115
373
            set++;
116
373
            break;
117
118
212M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
212M
            if (ch < 256 &&
121
199M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
82.7M
                return ok;
123
129M
            set += 256/SRE_CODE_BITS;
124
129M
            break;
125
126
33.7M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
33.7M
            if (set[0] <= ch && ch <= set[1])
129
21.4M
                return ok;
130
12.2M
            set += 2;
131
12.2M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
112M
        case SRE_OP_NEGATE:
148
112M
            ok = !ok;
149
112M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
811M
        }
175
811M
    }
176
346M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
382M
{
195
382M
    SRE_CODE chr;
196
382M
    SRE_CHAR c;
197
382M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
382M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
382M
    Py_ssize_t i;
200
382M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
382M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
49.7M
        end = ptr + maxcount;
205
206
382M
    switch (pattern[0]) {
207
208
332M
    case SRE_OP_IN:
209
        /* repeated set */
210
332M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
736M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
404M
            ptr++;
213
332M
        break;
214
215
832
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
832
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
1.64k
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
816
            ptr++;
220
832
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
49.5M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
49.5M
        chr = pattern[1];
232
49.5M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
49.5M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
38.0M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
38.0M
        else
238
38.0M
#endif
239
52.3M
        while (ptr < end && *ptr == c)
240
2.84M
            ptr++;
241
49.5M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
724k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
724k
        chr = pattern[1];
270
724k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
724k
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
389k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
389k
        else
276
389k
#endif
277
54.6M
        while (ptr < end && *ptr != c)
278
53.9M
            ptr++;
279
724k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
382M
    }
319
320
382M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
382M
           ptr - (SRE_CHAR*) state->ptr));
322
382M
    return ptr - (SRE_CHAR*) state->ptr;
323
382M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
106M
{
195
106M
    SRE_CODE chr;
196
106M
    SRE_CHAR c;
197
106M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
106M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
106M
    Py_ssize_t i;
200
106M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
106M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
21.5M
        end = ptr + maxcount;
205
206
106M
    switch (pattern[0]) {
207
208
81.5M
    case SRE_OP_IN:
209
        /* repeated set */
210
81.5M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
238M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
156M
            ptr++;
213
81.5M
        break;
214
215
832
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
832
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
1.64k
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
816
            ptr++;
220
832
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
25.0M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
25.0M
        chr = pattern[1];
232
25.0M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
25.0M
        c = (SRE_CHAR) chr;
234
25.0M
#if SIZEOF_SRE_CHAR < 4
235
25.0M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
25.0M
        else
238
25.0M
#endif
239
25.4M
        while (ptr < end && *ptr == c)
240
388k
            ptr++;
241
25.0M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
135k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
135k
        chr = pattern[1];
270
135k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
135k
        c = (SRE_CHAR) chr;
272
135k
#if SIZEOF_SRE_CHAR < 4
273
135k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
135k
        else
276
135k
#endif
277
11.0M
        while (ptr < end && *ptr != c)
278
10.8M
            ptr++;
279
135k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
106M
    }
319
320
106M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
106M
           ptr - (SRE_CHAR*) state->ptr));
322
106M
    return ptr - (SRE_CHAR*) state->ptr;
323
106M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
182M
{
195
182M
    SRE_CODE chr;
196
182M
    SRE_CHAR c;
197
182M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
182M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
182M
    Py_ssize_t i;
200
182M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
182M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
14.3M
        end = ptr + maxcount;
205
206
182M
    switch (pattern[0]) {
207
208
169M
    case SRE_OP_IN:
209
        /* repeated set */
210
169M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
301M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
131M
            ptr++;
213
169M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
12.9M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
12.9M
        chr = pattern[1];
232
12.9M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
12.9M
        c = (SRE_CHAR) chr;
234
12.9M
#if SIZEOF_SRE_CHAR < 4
235
12.9M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
12.9M
        else
238
12.9M
#endif
239
14.5M
        while (ptr < end && *ptr == c)
240
1.60M
            ptr++;
241
12.9M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
253k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
253k
        chr = pattern[1];
270
253k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
253k
        c = (SRE_CHAR) chr;
272
253k
#if SIZEOF_SRE_CHAR < 4
273
253k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
253k
        else
276
253k
#endif
277
12.4M
        while (ptr < end && *ptr != c)
278
12.2M
            ptr++;
279
253k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
182M
    }
319
320
182M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
182M
           ptr - (SRE_CHAR*) state->ptr));
322
182M
    return ptr - (SRE_CHAR*) state->ptr;
323
182M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
92.9M
{
195
92.9M
    SRE_CODE chr;
196
92.9M
    SRE_CHAR c;
197
92.9M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
92.9M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
92.9M
    Py_ssize_t i;
200
92.9M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
92.9M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
13.7M
        end = ptr + maxcount;
205
206
92.9M
    switch (pattern[0]) {
207
208
81.1M
    case SRE_OP_IN:
209
        /* repeated set */
210
81.1M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
196M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
115M
            ptr++;
213
81.1M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
11.4M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
11.4M
        chr = pattern[1];
232
11.4M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
11.4M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
12.3M
        while (ptr < end && *ptr == c)
240
855k
            ptr++;
241
11.4M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
335k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
335k
        chr = pattern[1];
270
335k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
335k
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
31.1M
        while (ptr < end && *ptr != c)
278
30.8M
            ptr++;
279
335k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
92.9M
    }
319
320
92.9M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
92.9M
           ptr - (SRE_CHAR*) state->ptr));
322
92.9M
    return ptr - (SRE_CHAR*) state->ptr;
323
92.9M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
409M
    do { \
355
409M
        ctx->lastmark = state->lastmark; \
356
409M
        ctx->lastindex = state->lastindex; \
357
409M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
82.1M
    do { \
360
82.1M
        state->lastmark = ctx->lastmark; \
361
82.1M
        state->lastindex = ctx->lastindex; \
362
82.1M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
125M
    do { \
366
125M
        TRACE(("push last_ptr: %zd", \
367
125M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
125M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
125M
    } while (0)
370
#define LAST_PTR_POP()  \
371
125M
    do { \
372
125M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
125M
        TRACE(("pop last_ptr: %zd", \
374
125M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
125M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
333M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
561M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
808M
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
37.4M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
26.0M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
895M
#define DATA_STACK_ALLOC(state, type, ptr) \
389
895M
do { \
390
895M
    alloc_pos = state->data_stack_base; \
391
895M
    TRACE(("allocating %s in %zd (%zd)\n", \
392
895M
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
895M
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
157M
        int j = data_stack_grow(state, sizeof(type)); \
395
157M
        if (j < 0) return j; \
396
157M
        if (ctx_pos != -1) \
397
157M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
157M
    } \
399
895M
    ptr = (type*)(state->data_stack+alloc_pos); \
400
895M
    state->data_stack_base += sizeof(type); \
401
895M
} while (0)
402
403
891M
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
891M
do { \
405
891M
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
891M
    ptr = (type*)(state->data_stack+pos); \
407
891M
} while (0)
408
409
336M
#define DATA_STACK_PUSH(state, data, size) \
410
336M
do { \
411
336M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
336M
           data, state->data_stack_base, size)); \
413
336M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
83.6k
        int j = data_stack_grow(state, size); \
415
83.6k
        if (j < 0) return j; \
416
83.6k
        if (ctx_pos != -1) \
417
83.6k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
83.6k
    } \
419
336M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
336M
    state->data_stack_base += size; \
421
336M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
161M
#define DATA_STACK_POP(state, data, size, discard) \
427
161M
do { \
428
161M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
161M
           data, state->data_stack_base-size, size)); \
430
161M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
161M
    if (discard) \
432
161M
        state->data_stack_base -= size; \
433
161M
} while (0)
434
435
1.07G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.07G
do { \
437
1.07G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.07G
           state->data_stack_base-size, size)); \
439
1.07G
    state->data_stack_base -= size; \
440
1.07G
} while(0)
441
442
#define DATA_PUSH(x) \
443
125M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
125M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
895M
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
895M
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
890M
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
328M
    do if (lastmark >= 0) { \
473
211M
        MARK_TRACE("push", (lastmark)); \
474
211M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
211M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
328M
    } while (0)
477
#define MARK_POP(lastmark) \
478
51.4M
    do if (lastmark >= 0) { \
479
34.8M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
34.8M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
34.8M
        MARK_TRACE("pop", (lastmark)); \
482
51.4M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.68M
    do if (lastmark >= 0) { \
485
1.06M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.06M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.06M
        MARK_TRACE("pop keep", (lastmark)); \
488
1.68M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
277M
    do if (lastmark >= 0) { \
491
176M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
176M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
176M
        MARK_TRACE("pop discard", (lastmark)); \
494
277M
    } while (0)
495
496
386M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
125M
#define JUMP_MAX_UNTIL_2     2
499
37.4M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
37.1M
#define JUMP_REPEAT          7
504
14.8M
#define JUMP_REPEAT_ONE_1    8
505
134M
#define JUMP_REPEAT_ONE_2    9
506
832
#define JUMP_MIN_REPEAT_ONE  10
507
103M
#define JUMP_BRANCH          11
508
26.0M
#define JUMP_ASSERT          12
509
29.2M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
508M
    ctx->pattern = pattern; \
516
508M
    ctx->ptr = ptr; \
517
508M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
508M
    nextctx->pattern = nextpattern; \
519
508M
    nextctx->toplevel = toplevel_; \
520
508M
    nextctx->jump = jumpvalue; \
521
508M
    nextctx->last_ctx_pos = ctx_pos; \
522
508M
    pattern = nextpattern; \
523
508M
    ctx_pos = alloc_pos; \
524
508M
    ctx = nextctx; \
525
508M
    goto entrance; \
526
508M
    jumplabel: \
527
508M
    pattern = ctx->pattern; \
528
508M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
453M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
55.3M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
1.63G
    do {                                                           \
553
1.63G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
1.63G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
1.63G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
1.71G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
1.63G
        do {                               \
588
1.63G
            MAYBE_CHECK_SIGNALS;           \
589
1.63G
            goto *sre_targets[*pattern++]; \
590
1.63G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
386M
{
601
386M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
386M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
386M
    Py_ssize_t ret = 0;
604
386M
    int jump;
605
386M
    unsigned int sigcount = state->sigcount;
606
607
386M
    SRE(match_context)* ctx;
608
386M
    SRE(match_context)* nextctx;
609
386M
    INIT_TRACE(state);
610
611
386M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
386M
    DATA_ALLOC(SRE(match_context), ctx);
614
386M
    ctx->last_ctx_pos = -1;
615
386M
    ctx->jump = JUMP_NONE;
616
386M
    ctx->toplevel = toplevel;
617
386M
    ctx_pos = alloc_pos;
618
619
386M
#if USE_COMPUTED_GOTOS
620
386M
#include "sre_targets.h"
621
386M
#endif
622
623
895M
entrance:
624
625
895M
    ;  // Fashion statement.
626
895M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
895M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
53.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
1.95M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
1.95M
                   end - ptr, (size_t) pattern[3]));
634
1.95M
            RETURN_FAILURE;
635
1.95M
        }
636
51.2M
        pattern += pattern[1] + 1;
637
51.2M
    }
638
639
893M
#if USE_COMPUTED_GOTOS
640
893M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
893M
    {
647
648
893M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
314M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
314M
                   ptr, pattern[0]));
653
314M
            {
654
314M
                int i = pattern[0];
655
314M
                if (i & 1)
656
42.9M
                    state->lastindex = i/2 + 1;
657
314M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
308M
                    int j = state->lastmark + 1;
663
314M
                    while (j < i)
664
6.21M
                        state->mark[j++] = NULL;
665
308M
                    state->lastmark = i;
666
308M
                }
667
314M
                state->mark[i] = ptr;
668
314M
            }
669
314M
            pattern++;
670
314M
            DISPATCH;
671
672
314M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
120M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
120M
                   ptr, *pattern));
677
120M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
38.0M
                RETURN_FAILURE;
679
82.0M
            pattern++;
680
82.0M
            ptr++;
681
82.0M
            DISPATCH;
682
683
82.0M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
161M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
161M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
161M
            if (ctx->toplevel &&
698
37.7M
                ((state->match_all && ptr != state->end) ||
699
37.7M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
161M
            state->ptr = ptr;
704
161M
            RETURN_SUCCESS;
705
706
15.9M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
15.9M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
15.9M
            if (!SRE(at)(state, ptr, *pattern))
711
6.81M
                RETURN_FAILURE;
712
9.09M
            pattern++;
713
9.09M
            DISPATCH;
714
715
9.09M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
222M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
222M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
222M
            if (ptr >= end ||
749
222M
                !SRE(charset)(state, pattern + 1, *ptr))
750
22.3M
                RETURN_FAILURE;
751
200M
            pattern += pattern[0];
752
200M
            ptr++;
753
200M
            DISPATCH;
754
755
200M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
7.87M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
7.87M
                   pattern, ptr, pattern[0]));
758
7.87M
            if (ptr >= end ||
759
7.87M
                sre_lower_ascii(*ptr) != *pattern)
760
68.0k
                RETURN_FAILURE;
761
7.80M
            pattern++;
762
7.80M
            ptr++;
763
7.80M
            DISPATCH;
764
765
7.80M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
28
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
28
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
28
            if (ptr >= end
828
20
                || !SRE(charset)(state, pattern+1,
829
20
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
12
            pattern += pattern[0];
832
12
            ptr++;
833
12
            DISPATCH;
834
835
12
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
80.6M
        TARGET(SRE_OP_JUMP):
845
80.6M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
80.6M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
80.6M
                   ptr, pattern[0]));
850
80.6M
            pattern += pattern[0];
851
80.6M
            DISPATCH;
852
853
98.4M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
98.4M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
98.4M
            LASTMARK_SAVE();
858
98.4M
            if (state->repeat)
859
61.6M
                MARK_PUSH(ctx->lastmark);
860
220M
            for (; pattern[0]; pattern += pattern[0]) {
861
199M
                if (pattern[1] == SRE_OP_LITERAL &&
862
123M
                    (ptr >= end ||
863
123M
                     (SRE_CODE) *ptr != pattern[2]))
864
67.4M
                    continue;
865
132M
                if (pattern[1] == SRE_OP_IN &&
866
52.6M
                    (ptr >= end ||
867
52.5M
                     !SRE(charset)(state, pattern + 3,
868
52.5M
                                   (SRE_CODE) *ptr)))
869
28.4M
                    continue;
870
103M
                state->ptr = ptr;
871
103M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
103M
                if (ret) {
873
77.5M
                    if (state->repeat)
874
52.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
77.5M
                    RETURN_ON_ERROR(ret);
876
77.5M
                    RETURN_SUCCESS;
877
77.5M
                }
878
26.3M
                if (state->repeat)
879
16.3k
                    MARK_POP_KEEP(ctx->lastmark);
880
26.3M
                LASTMARK_RESTORE();
881
26.3M
            }
882
20.8M
            if (state->repeat)
883
8.93M
                MARK_POP_DISCARD(ctx->lastmark);
884
20.8M
            RETURN_FAILURE;
885
886
383M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
383M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
383M
                   pattern[1], pattern[2]));
898
899
383M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
851k
                RETURN_FAILURE; /* cannot match */
901
902
382M
            state->ptr = ptr;
903
904
382M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
382M
            RETURN_ON_ERROR(ret);
906
382M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
382M
            ctx->count = ret;
908
382M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
382M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
225M
                RETURN_FAILURE;
917
918
156M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
6.02M
                ptr == state->end &&
920
91.7k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
91.7k
            {
922
                /* tail is empty.  we're finished */
923
91.7k
                state->ptr = ptr;
924
91.7k
                RETURN_SUCCESS;
925
91.7k
            }
926
927
156M
            LASTMARK_SAVE();
928
156M
            if (state->repeat)
929
112M
                MARK_PUSH(ctx->lastmark);
930
931
156M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
24.2M
                ctx->u.chr = pattern[pattern[0]+1];
935
24.2M
                for (;;) {
936
60.6M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
51.2M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
36.3M
                        ptr--;
939
36.3M
                        ctx->count--;
940
36.3M
                    }
941
24.2M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
9.40M
                        break;
943
14.8M
                    state->ptr = ptr;
944
14.8M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
14.8M
                            pattern+pattern[0]);
946
14.8M
                    if (ret) {
947
14.8M
                        if (state->repeat)
948
12.8M
                            MARK_POP_DISCARD(ctx->lastmark);
949
14.8M
                        RETURN_ON_ERROR(ret);
950
14.8M
                        RETURN_SUCCESS;
951
14.8M
                    }
952
851
                    if (state->repeat)
953
835
                        MARK_POP_KEEP(ctx->lastmark);
954
851
                    LASTMARK_RESTORE();
955
956
851
                    ptr--;
957
851
                    ctx->count--;
958
851
                }
959
9.40M
                if (state->repeat)
960
8.21M
                    MARK_POP_DISCARD(ctx->lastmark);
961
131M
            } else {
962
                /* general case */
963
136M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
134M
                    state->ptr = ptr;
965
134M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
134M
                            pattern+pattern[0]);
967
134M
                    if (ret) {
968
130M
                        if (state->repeat)
969
90.1M
                            MARK_POP_DISCARD(ctx->lastmark);
970
130M
                        RETURN_ON_ERROR(ret);
971
130M
                        RETURN_SUCCESS;
972
130M
                    }
973
4.41M
                    if (state->repeat)
974
1.66M
                        MARK_POP_KEEP(ctx->lastmark);
975
4.41M
                    LASTMARK_RESTORE();
976
977
4.41M
                    ptr--;
978
4.41M
                    ctx->count--;
979
4.41M
                }
980
1.94M
                if (state->repeat)
981
1.41M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.94M
            }
983
11.3M
            RETURN_FAILURE;
984
985
16
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
16
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
16
                   pattern[1], pattern[2]));
997
998
16
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
16
            state->ptr = ptr;
1002
1003
16
            if (pattern[1] == 0)
1004
16
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
16
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
16
            } else {
1028
                /* general case */
1029
16
                LASTMARK_SAVE();
1030
16
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
832
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
832
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
832
                    state->ptr = ptr;
1036
832
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
832
                            pattern+pattern[0]);
1038
832
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
832
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
832
                    LASTMARK_RESTORE();
1047
1048
832
                    state->ptr = ptr;
1049
832
                    ret = SRE(count)(state, pattern+3, 1);
1050
832
                    RETURN_ON_ERROR(ret);
1051
832
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
832
                    if (ret == 0)
1053
16
                        break;
1054
832
                    assert(ret == 1);
1055
816
                    ptr++;
1056
816
                    ctx->count++;
1057
816
                }
1058
16
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
16
            }
1061
16
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
37.1M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
37.1M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
37.1M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
37.1M
            ctx->u.rep = repeat_pool_malloc(state);
1127
37.1M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
37.1M
            ctx->u.rep->count = -1;
1131
37.1M
            ctx->u.rep->pattern = pattern;
1132
37.1M
            ctx->u.rep->prev = state->repeat;
1133
37.1M
            ctx->u.rep->last_ptr = NULL;
1134
37.1M
            state->repeat = ctx->u.rep;
1135
1136
37.1M
            state->ptr = ptr;
1137
37.1M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
37.1M
            state->repeat = ctx->u.rep->prev;
1139
37.1M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
37.1M
            if (ret) {
1142
37.0M
                RETURN_ON_ERROR(ret);
1143
37.0M
                RETURN_SUCCESS;
1144
37.0M
            }
1145
93.0k
            RETURN_FAILURE;
1146
1147
140M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
140M
            ctx->u.rep = state->repeat;
1155
140M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
140M
            state->ptr = ptr;
1159
1160
140M
            ctx->count = ctx->u.rep->count+1;
1161
1162
140M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
140M
                   ptr, ctx->count));
1164
1165
140M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
140M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
15.2M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
125M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
125M
                ctx->u.rep->count = ctx->count;
1185
125M
                LASTMARK_SAVE();
1186
125M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
125M
                LAST_PTR_PUSH();
1189
125M
                ctx->u.rep->last_ptr = state->ptr;
1190
125M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
125M
                        ctx->u.rep->pattern+3);
1192
125M
                LAST_PTR_POP();
1193
125M
                if (ret) {
1194
103M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
103M
                    RETURN_ON_ERROR(ret);
1196
103M
                    RETURN_SUCCESS;
1197
103M
                }
1198
22.2M
                MARK_POP(ctx->lastmark);
1199
22.2M
                LASTMARK_RESTORE();
1200
22.2M
                ctx->u.rep->count = ctx->count-1;
1201
22.2M
                state->ptr = ptr;
1202
22.2M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
37.4M
            state->repeat = ctx->u.rep->prev;
1207
37.4M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
37.4M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
37.4M
            RETURN_ON_SUCCESS(ret);
1211
396k
            state->ptr = ptr;
1212
396k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
26.0M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
26.0M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
26.0M
                   ptr, pattern[1]));
1565
26.0M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
26.0M
            state->ptr = ptr - pattern[1];
1568
26.0M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
26.0M
            RETURN_ON_FAILURE(ret);
1570
21.2M
            pattern += pattern[0];
1571
21.2M
            DISPATCH;
1572
1573
29.2M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
29.2M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
29.2M
                   ptr, pattern[1]));
1578
29.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
29.2M
                state->ptr = ptr - pattern[1];
1580
29.2M
                LASTMARK_SAVE();
1581
29.2M
                if (state->repeat)
1582
29.2M
                    MARK_PUSH(ctx->lastmark);
1583
1584
58.4M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
58.4M
                if (ret) {
1586
10.9k
                    if (state->repeat)
1587
10.9k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
10.9k
                    RETURN_ON_ERROR(ret);
1589
10.9k
                    RETURN_FAILURE;
1590
10.9k
                }
1591
29.2M
                if (state->repeat)
1592
29.2M
                    MARK_POP(ctx->lastmark);
1593
29.2M
                LASTMARK_RESTORE();
1594
29.2M
            }
1595
29.2M
            pattern += pattern[0];
1596
29.2M
            DISPATCH;
1597
1598
29.2M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
895M
exit:
1620
895M
    ctx_pos = ctx->last_ctx_pos;
1621
895M
    jump = ctx->jump;
1622
895M
    DATA_POP_DISCARD(ctx);
1623
895M
    if (ctx_pos == -1) {
1624
386M
        state->sigcount = sigcount;
1625
386M
        return ret;
1626
386M
    }
1627
508M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
508M
    switch (jump) {
1630
125M
        case JUMP_MAX_UNTIL_2:
1631
125M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
125M
            goto jump_max_until_2;
1633
37.4M
        case JUMP_MAX_UNTIL_3:
1634
37.4M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
37.4M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
103M
        case JUMP_BRANCH:
1643
103M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
103M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
37.1M
        case JUMP_REPEAT:
1658
37.1M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
37.1M
            goto jump_repeat;
1660
14.8M
        case JUMP_REPEAT_ONE_1:
1661
14.8M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
14.8M
            goto jump_repeat_one_1;
1663
134M
        case JUMP_REPEAT_ONE_2:
1664
134M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
134M
            goto jump_repeat_one_2;
1666
832
        case JUMP_MIN_REPEAT_ONE:
1667
832
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
832
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
26.0M
        case JUMP_ASSERT:
1673
26.0M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
26.0M
            goto jump_assert;
1675
29.2M
        case JUMP_ASSERT_NOT:
1676
29.2M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
29.2M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
508M
    }
1683
1684
0
    return ret; /* should never get here */
1685
508M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
128M
{
601
128M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
128M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
128M
    Py_ssize_t ret = 0;
604
128M
    int jump;
605
128M
    unsigned int sigcount = state->sigcount;
606
607
128M
    SRE(match_context)* ctx;
608
128M
    SRE(match_context)* nextctx;
609
128M
    INIT_TRACE(state);
610
611
128M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
128M
    DATA_ALLOC(SRE(match_context), ctx);
614
128M
    ctx->last_ctx_pos = -1;
615
128M
    ctx->jump = JUMP_NONE;
616
128M
    ctx->toplevel = toplevel;
617
128M
    ctx_pos = alloc_pos;
618
619
128M
#if USE_COMPUTED_GOTOS
620
128M
#include "sre_targets.h"
621
128M
#endif
622
623
307M
entrance:
624
625
307M
    ;  // Fashion statement.
626
307M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
307M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
32.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
1.95M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
1.95M
                   end - ptr, (size_t) pattern[3]));
634
1.95M
            RETURN_FAILURE;
635
1.95M
        }
636
30.2M
        pattern += pattern[1] + 1;
637
30.2M
    }
638
639
305M
#if USE_COMPUTED_GOTOS
640
305M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
305M
    {
647
648
305M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
93.3M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
93.3M
                   ptr, pattern[0]));
653
93.3M
            {
654
93.3M
                int i = pattern[0];
655
93.3M
                if (i & 1)
656
19.9M
                    state->lastindex = i/2 + 1;
657
93.3M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
90.4M
                    int j = state->lastmark + 1;
663
93.7M
                    while (j < i)
664
3.25M
                        state->mark[j++] = NULL;
665
90.4M
                    state->lastmark = i;
666
90.4M
                }
667
93.3M
                state->mark[i] = ptr;
668
93.3M
            }
669
93.3M
            pattern++;
670
93.3M
            DISPATCH;
671
672
93.3M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
72.1M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
72.1M
                   ptr, *pattern));
677
72.1M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
23.6M
                RETURN_FAILURE;
679
48.4M
            pattern++;
680
48.4M
            ptr++;
681
48.4M
            DISPATCH;
682
683
48.4M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
67.2M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
67.2M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
67.2M
            if (ctx->toplevel &&
698
22.3M
                ((state->match_all && ptr != state->end) ||
699
22.3M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
67.2M
            state->ptr = ptr;
704
67.2M
            RETURN_SUCCESS;
705
706
12.3M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
12.3M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
12.3M
            if (!SRE(at)(state, ptr, *pattern))
711
3.25M
                RETURN_FAILURE;
712
9.05M
            pattern++;
713
9.05M
            DISPATCH;
714
715
9.05M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
74.6M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
74.6M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
74.6M
            if (ptr >= end ||
749
74.6M
                !SRE(charset)(state, pattern + 1, *ptr))
750
5.08M
                RETURN_FAILURE;
751
69.5M
            pattern += pattern[0];
752
69.5M
            ptr++;
753
69.5M
            DISPATCH;
754
755
69.5M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
659k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
659k
                   pattern, ptr, pattern[0]));
758
659k
            if (ptr >= end ||
759
659k
                sre_lower_ascii(*ptr) != *pattern)
760
3.73k
                RETURN_FAILURE;
761
656k
            pattern++;
762
656k
            ptr++;
763
656k
            DISPATCH;
764
765
656k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
28
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
28
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
28
            if (ptr >= end
828
20
                || !SRE(charset)(state, pattern+1,
829
20
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
12
            pattern += pattern[0];
832
12
            ptr++;
833
12
            DISPATCH;
834
835
12
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
35.4M
        TARGET(SRE_OP_JUMP):
845
35.4M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
35.4M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
35.4M
                   ptr, pattern[0]));
850
35.4M
            pattern += pattern[0];
851
35.4M
            DISPATCH;
852
853
43.9M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
43.9M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
43.9M
            LASTMARK_SAVE();
858
43.9M
            if (state->repeat)
859
14.6M
                MARK_PUSH(ctx->lastmark);
860
105M
            for (; pattern[0]; pattern += pattern[0]) {
861
94.6M
                if (pattern[1] == SRE_OP_LITERAL &&
862
69.7M
                    (ptr >= end ||
863
69.4M
                     (SRE_CODE) *ptr != pattern[2]))
864
30.6M
                    continue;
865
64.0M
                if (pattern[1] == SRE_OP_IN &&
866
14.2M
                    (ptr >= end ||
867
14.1M
                     !SRE(charset)(state, pattern + 3,
868
14.1M
                                   (SRE_CODE) *ptr)))
869
7.23M
                    continue;
870
56.8M
                state->ptr = ptr;
871
56.8M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
56.8M
                if (ret) {
873
33.5M
                    if (state->repeat)
874
14.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
33.5M
                    RETURN_ON_ERROR(ret);
876
33.5M
                    RETURN_SUCCESS;
877
33.5M
                }
878
23.2M
                if (state->repeat)
879
5.80k
                    MARK_POP_KEEP(ctx->lastmark);
880
23.2M
                LASTMARK_RESTORE();
881
23.2M
            }
882
10.3M
            if (state->repeat)
883
486k
                MARK_POP_DISCARD(ctx->lastmark);
884
10.3M
            RETURN_FAILURE;
885
886
107M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
107M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
107M
                   pattern[1], pattern[2]));
898
899
107M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
701k
                RETURN_FAILURE; /* cannot match */
901
902
106M
            state->ptr = ptr;
903
904
106M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
106M
            RETURN_ON_ERROR(ret);
906
106M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
106M
            ctx->count = ret;
908
106M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
106M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
54.1M
                RETURN_FAILURE;
917
918
52.6M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
725k
                ptr == state->end &&
920
66.6k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
66.6k
            {
922
                /* tail is empty.  we're finished */
923
66.6k
                state->ptr = ptr;
924
66.6k
                RETURN_SUCCESS;
925
66.6k
            }
926
927
52.5M
            LASTMARK_SAVE();
928
52.5M
            if (state->repeat)
929
38.1M
                MARK_PUSH(ctx->lastmark);
930
931
52.5M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.55M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.55M
                for (;;) {
936
16.2M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
14.6M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
9.69M
                        ptr--;
939
9.69M
                        ctx->count--;
940
9.69M
                    }
941
6.55M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.58M
                        break;
943
4.96M
                    state->ptr = ptr;
944
4.96M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.96M
                            pattern+pattern[0]);
946
4.96M
                    if (ret) {
947
4.96M
                        if (state->repeat)
948
3.01M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.96M
                        RETURN_ON_ERROR(ret);
950
4.96M
                        RETURN_SUCCESS;
951
4.96M
                    }
952
237
                    if (state->repeat)
953
221
                        MARK_POP_KEEP(ctx->lastmark);
954
237
                    LASTMARK_RESTORE();
955
956
237
                    ptr--;
957
237
                    ctx->count--;
958
237
                }
959
1.58M
                if (state->repeat)
960
405k
                    MARK_POP_DISCARD(ctx->lastmark);
961
46.0M
            } else {
962
                /* general case */
963
48.9M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
47.2M
                    state->ptr = ptr;
965
47.2M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
47.2M
                            pattern+pattern[0]);
967
47.2M
                    if (ret) {
968
44.3M
                        if (state->repeat)
969
33.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
44.3M
                        RETURN_ON_ERROR(ret);
971
44.3M
                        RETURN_SUCCESS;
972
44.3M
                    }
973
2.93M
                    if (state->repeat)
974
1.37M
                        MARK_POP_KEEP(ctx->lastmark);
975
2.93M
                    LASTMARK_RESTORE();
976
977
2.93M
                    ptr--;
978
2.93M
                    ctx->count--;
979
2.93M
                }
980
1.70M
                if (state->repeat)
981
1.21M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.70M
            }
983
3.28M
            RETURN_FAILURE;
984
985
16
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
16
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
16
                   pattern[1], pattern[2]));
997
998
16
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
16
            state->ptr = ptr;
1002
1003
16
            if (pattern[1] == 0)
1004
16
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
16
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
16
            } else {
1028
                /* general case */
1029
16
                LASTMARK_SAVE();
1030
16
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
832
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
832
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
832
                    state->ptr = ptr;
1036
832
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
832
                            pattern+pattern[0]);
1038
832
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
832
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
832
                    LASTMARK_RESTORE();
1047
1048
832
                    state->ptr = ptr;
1049
832
                    ret = SRE(count)(state, pattern+3, 1);
1050
832
                    RETURN_ON_ERROR(ret);
1051
832
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
832
                    if (ret == 0)
1053
16
                        break;
1054
832
                    assert(ret == 1);
1055
816
                    ptr++;
1056
816
                    ctx->count++;
1057
816
                }
1058
16
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
16
            }
1061
16
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
8.97M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
8.97M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
8.97M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
8.97M
            ctx->u.rep = repeat_pool_malloc(state);
1127
8.97M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
8.97M
            ctx->u.rep->count = -1;
1131
8.97M
            ctx->u.rep->pattern = pattern;
1132
8.97M
            ctx->u.rep->prev = state->repeat;
1133
8.97M
            ctx->u.rep->last_ptr = NULL;
1134
8.97M
            state->repeat = ctx->u.rep;
1135
1136
8.97M
            state->ptr = ptr;
1137
8.97M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
8.97M
            state->repeat = ctx->u.rep->prev;
1139
8.97M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
8.97M
            if (ret) {
1142
8.88M
                RETURN_ON_ERROR(ret);
1143
8.88M
                RETURN_SUCCESS;
1144
8.88M
            }
1145
91.6k
            RETURN_FAILURE;
1146
1147
45.9M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
45.9M
            ctx->u.rep = state->repeat;
1155
45.9M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
45.9M
            state->ptr = ptr;
1159
1160
45.9M
            ctx->count = ctx->u.rep->count+1;
1161
1162
45.9M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
45.9M
                   ptr, ctx->count));
1164
1165
45.9M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
45.9M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
5.86M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
40.1M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
40.1M
                ctx->u.rep->count = ctx->count;
1185
40.1M
                LASTMARK_SAVE();
1186
40.1M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
40.1M
                LAST_PTR_PUSH();
1189
40.1M
                ctx->u.rep->last_ptr = state->ptr;
1190
40.1M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
40.1M
                        ctx->u.rep->pattern+3);
1192
40.1M
                LAST_PTR_POP();
1193
40.1M
                if (ret) {
1194
36.7M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
36.7M
                    RETURN_ON_ERROR(ret);
1196
36.7M
                    RETURN_SUCCESS;
1197
36.7M
                }
1198
3.33M
                MARK_POP(ctx->lastmark);
1199
3.33M
                LASTMARK_RESTORE();
1200
3.33M
                ctx->u.rep->count = ctx->count-1;
1201
3.33M
                state->ptr = ptr;
1202
3.33M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
9.19M
            state->repeat = ctx->u.rep->prev;
1207
9.19M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
9.19M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
9.19M
            RETURN_ON_SUCCESS(ret);
1211
309k
            state->ptr = ptr;
1212
309k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.66M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.66M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.66M
                   ptr, pattern[1]));
1565
3.66M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.66M
            state->ptr = ptr - pattern[1];
1568
3.66M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.66M
            RETURN_ON_FAILURE(ret);
1570
3.36M
            pattern += pattern[0];
1571
3.36M
            DISPATCH;
1572
1573
7.88M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
7.88M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
7.88M
                   ptr, pattern[1]));
1578
7.88M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
7.88M
                state->ptr = ptr - pattern[1];
1580
7.88M
                LASTMARK_SAVE();
1581
7.88M
                if (state->repeat)
1582
7.88M
                    MARK_PUSH(ctx->lastmark);
1583
1584
15.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
15.7M
                if (ret) {
1586
1.10k
                    if (state->repeat)
1587
1.10k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.10k
                    RETURN_ON_ERROR(ret);
1589
1.10k
                    RETURN_FAILURE;
1590
1.10k
                }
1591
7.88M
                if (state->repeat)
1592
7.88M
                    MARK_POP(ctx->lastmark);
1593
7.88M
                LASTMARK_RESTORE();
1594
7.88M
            }
1595
7.88M
            pattern += pattern[0];
1596
7.88M
            DISPATCH;
1597
1598
7.88M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
307M
exit:
1620
307M
    ctx_pos = ctx->last_ctx_pos;
1621
307M
    jump = ctx->jump;
1622
307M
    DATA_POP_DISCARD(ctx);
1623
307M
    if (ctx_pos == -1) {
1624
128M
        state->sigcount = sigcount;
1625
128M
        return ret;
1626
128M
    }
1627
178M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
178M
    switch (jump) {
1630
40.1M
        case JUMP_MAX_UNTIL_2:
1631
40.1M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
40.1M
            goto jump_max_until_2;
1633
9.19M
        case JUMP_MAX_UNTIL_3:
1634
9.19M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
9.19M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
56.8M
        case JUMP_BRANCH:
1643
56.8M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
56.8M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
8.97M
        case JUMP_REPEAT:
1658
8.97M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
8.97M
            goto jump_repeat;
1660
4.96M
        case JUMP_REPEAT_ONE_1:
1661
4.96M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.96M
            goto jump_repeat_one_1;
1663
47.2M
        case JUMP_REPEAT_ONE_2:
1664
47.2M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
47.2M
            goto jump_repeat_one_2;
1666
832
        case JUMP_MIN_REPEAT_ONE:
1667
832
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
832
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.66M
        case JUMP_ASSERT:
1673
3.66M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.66M
            goto jump_assert;
1675
7.88M
        case JUMP_ASSERT_NOT:
1676
7.88M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
7.88M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
178M
    }
1683
1684
0
    return ret; /* should never get here */
1685
178M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
203M
{
601
203M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
203M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
203M
    Py_ssize_t ret = 0;
604
203M
    int jump;
605
203M
    unsigned int sigcount = state->sigcount;
606
607
203M
    SRE(match_context)* ctx;
608
203M
    SRE(match_context)* nextctx;
609
203M
    INIT_TRACE(state);
610
611
203M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
203M
    DATA_ALLOC(SRE(match_context), ctx);
614
203M
    ctx->last_ctx_pos = -1;
615
203M
    ctx->jump = JUMP_NONE;
616
203M
    ctx->toplevel = toplevel;
617
203M
    ctx_pos = alloc_pos;
618
619
203M
#if USE_COMPUTED_GOTOS
620
203M
#include "sre_targets.h"
621
203M
#endif
622
623
347M
entrance:
624
625
347M
    ;  // Fashion statement.
626
347M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
347M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
11.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
1.59k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
1.59k
                   end - ptr, (size_t) pattern[3]));
634
1.59k
            RETURN_FAILURE;
635
1.59k
        }
636
11.8M
        pattern += pattern[1] + 1;
637
11.8M
    }
638
639
347M
#if USE_COMPUTED_GOTOS
640
347M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
347M
    {
647
648
347M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
156M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
156M
                   ptr, pattern[0]));
653
156M
            {
654
156M
                int i = pattern[0];
655
156M
                if (i & 1)
656
9.50M
                    state->lastindex = i/2 + 1;
657
156M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
155M
                    int j = state->lastmark + 1;
663
156M
                    while (j < i)
664
682k
                        state->mark[j++] = NULL;
665
155M
                    state->lastmark = i;
666
155M
                }
667
156M
                state->mark[i] = ptr;
668
156M
            }
669
156M
            pattern++;
670
156M
            DISPATCH;
671
672
156M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
23.3M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
23.3M
                   ptr, *pattern));
677
23.3M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
6.92M
                RETURN_FAILURE;
679
16.4M
            pattern++;
680
16.4M
            ptr++;
681
16.4M
            DISPATCH;
682
683
16.4M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
69.1M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
69.1M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
69.1M
            if (ctx->toplevel &&
698
8.39M
                ((state->match_all && ptr != state->end) ||
699
8.39M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
69.1M
            state->ptr = ptr;
704
69.1M
            RETURN_SUCCESS;
705
706
1.77M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
1.77M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
1.77M
            if (!SRE(at)(state, ptr, *pattern))
711
1.73M
                RETURN_FAILURE;
712
34.6k
            pattern++;
713
34.6k
            DISPATCH;
714
715
34.6k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
91.7M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
91.7M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
91.7M
            if (ptr >= end ||
749
91.7M
                !SRE(charset)(state, pattern + 1, *ptr))
750
8.28M
                RETURN_FAILURE;
751
83.4M
            pattern += pattern[0];
752
83.4M
            ptr++;
753
83.4M
            DISPATCH;
754
755
83.4M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.34M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.34M
                   pattern, ptr, pattern[0]));
758
4.34M
            if (ptr >= end ||
759
4.34M
                sre_lower_ascii(*ptr) != *pattern)
760
29.8k
                RETURN_FAILURE;
761
4.31M
            pattern++;
762
4.31M
            ptr++;
763
4.31M
            DISPATCH;
764
765
4.31M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
19.9M
        TARGET(SRE_OP_JUMP):
845
19.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
19.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
19.9M
                   ptr, pattern[0]));
850
19.9M
            pattern += pattern[0];
851
19.9M
            DISPATCH;
852
853
24.0M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
24.0M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
24.0M
            LASTMARK_SAVE();
858
24.0M
            if (state->repeat)
859
19.7M
                MARK_PUSH(ctx->lastmark);
860
50.3M
            for (; pattern[0]; pattern += pattern[0]) {
861
45.6M
                if (pattern[1] == SRE_OP_LITERAL &&
862
23.1M
                    (ptr >= end ||
863
23.1M
                     (SRE_CODE) *ptr != pattern[2]))
864
14.5M
                    continue;
865
31.0M
                if (pattern[1] == SRE_OP_IN &&
866
17.1M
                    (ptr >= end ||
867
17.1M
                     !SRE(charset)(state, pattern + 3,
868
17.1M
                                   (SRE_CODE) *ptr)))
869
9.55M
                    continue;
870
21.5M
                state->ptr = ptr;
871
21.5M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
21.5M
                if (ret) {
873
19.3M
                    if (state->repeat)
874
16.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
19.3M
                    RETURN_ON_ERROR(ret);
876
19.3M
                    RETURN_SUCCESS;
877
19.3M
                }
878
2.15M
                if (state->repeat)
879
2.71k
                    MARK_POP_KEEP(ctx->lastmark);
880
2.15M
                LASTMARK_RESTORE();
881
2.15M
            }
882
4.70M
            if (state->repeat)
883
3.38M
                MARK_POP_DISCARD(ctx->lastmark);
884
4.70M
            RETURN_FAILURE;
885
886
182M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
182M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
182M
                   pattern[1], pattern[2]));
898
899
182M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
138k
                RETURN_FAILURE; /* cannot match */
901
902
182M
            state->ptr = ptr;
903
904
182M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
182M
            RETURN_ON_ERROR(ret);
906
182M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
182M
            ctx->count = ret;
908
182M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
182M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
136M
                RETURN_FAILURE;
917
918
46.2M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
3.99M
                ptr == state->end &&
920
19.5k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
19.5k
            {
922
                /* tail is empty.  we're finished */
923
19.5k
                state->ptr = ptr;
924
19.5k
                RETURN_SUCCESS;
925
19.5k
            }
926
927
46.2M
            LASTMARK_SAVE();
928
46.2M
            if (state->repeat)
929
29.8M
                MARK_PUSH(ctx->lastmark);
930
931
46.2M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.54M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.54M
                for (;;) {
936
14.0M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
10.9M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
7.46M
                        ptr--;
939
7.46M
                        ctx->count--;
940
7.46M
                    }
941
6.54M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
3.03M
                        break;
943
3.50M
                    state->ptr = ptr;
944
3.50M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.50M
                            pattern+pattern[0]);
946
3.50M
                    if (ret) {
947
3.50M
                        if (state->repeat)
948
3.46M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.50M
                        RETURN_ON_ERROR(ret);
950
3.50M
                        RETURN_SUCCESS;
951
3.50M
                    }
952
310
                    if (state->repeat)
953
310
                        MARK_POP_KEEP(ctx->lastmark);
954
310
                    LASTMARK_RESTORE();
955
956
310
                    ptr--;
957
310
                    ctx->count--;
958
310
                }
959
3.03M
                if (state->repeat)
960
3.02M
                    MARK_POP_DISCARD(ctx->lastmark);
961
39.6M
            } else {
962
                /* general case */
963
40.3M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
40.1M
                    state->ptr = ptr;
965
40.1M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
40.1M
                            pattern+pattern[0]);
967
40.1M
                    if (ret) {
968
39.5M
                        if (state->repeat)
969
23.2M
                            MARK_POP_DISCARD(ctx->lastmark);
970
39.5M
                        RETURN_ON_ERROR(ret);
971
39.5M
                        RETURN_SUCCESS;
972
39.5M
                    }
973
639k
                    if (state->repeat)
974
186k
                        MARK_POP_KEEP(ctx->lastmark);
975
639k
                    LASTMARK_RESTORE();
976
977
639k
                    ptr--;
978
639k
                    ctx->count--;
979
639k
                }
980
164k
                if (state->repeat)
981
136k
                    MARK_POP_DISCARD(ctx->lastmark);
982
164k
            }
983
3.20M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
11.3M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
11.3M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
11.3M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
11.3M
            ctx->u.rep = repeat_pool_malloc(state);
1127
11.3M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
11.3M
            ctx->u.rep->count = -1;
1131
11.3M
            ctx->u.rep->pattern = pattern;
1132
11.3M
            ctx->u.rep->prev = state->repeat;
1133
11.3M
            ctx->u.rep->last_ptr = NULL;
1134
11.3M
            state->repeat = ctx->u.rep;
1135
1136
11.3M
            state->ptr = ptr;
1137
11.3M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
11.3M
            state->repeat = ctx->u.rep->prev;
1139
11.3M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
11.3M
            if (ret) {
1142
11.3M
                RETURN_ON_ERROR(ret);
1143
11.3M
                RETURN_SUCCESS;
1144
11.3M
            }
1145
985
            RETURN_FAILURE;
1146
1147
40.4M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
40.4M
            ctx->u.rep = state->repeat;
1155
40.4M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
40.4M
            state->ptr = ptr;
1159
1160
40.4M
            ctx->count = ctx->u.rep->count+1;
1161
1162
40.4M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
40.4M
                   ptr, ctx->count));
1164
1165
40.4M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
40.4M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.29M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
37.1M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
37.1M
                ctx->u.rep->count = ctx->count;
1185
37.1M
                LASTMARK_SAVE();
1186
37.1M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
37.1M
                LAST_PTR_PUSH();
1189
37.1M
                ctx->u.rep->last_ptr = state->ptr;
1190
37.1M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
37.1M
                        ctx->u.rep->pattern+3);
1192
37.1M
                LAST_PTR_POP();
1193
37.1M
                if (ret) {
1194
28.9M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
28.9M
                    RETURN_ON_ERROR(ret);
1196
28.9M
                    RETURN_SUCCESS;
1197
28.9M
                }
1198
8.11M
                MARK_POP(ctx->lastmark);
1199
8.11M
                LASTMARK_RESTORE();
1200
8.11M
                ctx->u.rep->count = ctx->count-1;
1201
8.11M
                state->ptr = ptr;
1202
8.11M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
11.4M
            state->repeat = ctx->u.rep->prev;
1207
11.4M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
11.4M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
11.4M
            RETURN_ON_SUCCESS(ret);
1211
51.5k
            state->ptr = ptr;
1212
51.5k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
9.63M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
9.63M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
9.63M
                   ptr, pattern[1]));
1565
9.63M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
9.63M
            state->ptr = ptr - pattern[1];
1568
9.63M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
9.63M
            RETURN_ON_FAILURE(ret);
1570
6.69M
            pattern += pattern[0];
1571
6.69M
            DISPATCH;
1572
1573
9.03M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
9.03M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
9.03M
                   ptr, pattern[1]));
1578
9.03M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
9.03M
                state->ptr = ptr - pattern[1];
1580
9.03M
                LASTMARK_SAVE();
1581
9.03M
                if (state->repeat)
1582
9.03M
                    MARK_PUSH(ctx->lastmark);
1583
1584
18.0M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
18.0M
                if (ret) {
1586
2.34k
                    if (state->repeat)
1587
2.34k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.34k
                    RETURN_ON_ERROR(ret);
1589
2.34k
                    RETURN_FAILURE;
1590
2.34k
                }
1591
9.03M
                if (state->repeat)
1592
9.03M
                    MARK_POP(ctx->lastmark);
1593
9.03M
                LASTMARK_RESTORE();
1594
9.03M
            }
1595
9.03M
            pattern += pattern[0];
1596
9.03M
            DISPATCH;
1597
1598
9.03M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
347M
exit:
1620
347M
    ctx_pos = ctx->last_ctx_pos;
1621
347M
    jump = ctx->jump;
1622
347M
    DATA_POP_DISCARD(ctx);
1623
347M
    if (ctx_pos == -1) {
1624
203M
        state->sigcount = sigcount;
1625
203M
        return ret;
1626
203M
    }
1627
143M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
143M
    switch (jump) {
1630
37.1M
        case JUMP_MAX_UNTIL_2:
1631
37.1M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
37.1M
            goto jump_max_until_2;
1633
11.4M
        case JUMP_MAX_UNTIL_3:
1634
11.4M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
11.4M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
21.5M
        case JUMP_BRANCH:
1643
21.5M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
21.5M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
11.3M
        case JUMP_REPEAT:
1658
11.3M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
11.3M
            goto jump_repeat;
1660
3.50M
        case JUMP_REPEAT_ONE_1:
1661
3.50M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.50M
            goto jump_repeat_one_1;
1663
40.1M
        case JUMP_REPEAT_ONE_2:
1664
40.1M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
40.1M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
9.63M
        case JUMP_ASSERT:
1673
9.63M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
9.63M
            goto jump_assert;
1675
9.03M
        case JUMP_ASSERT_NOT:
1676
9.03M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
9.03M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
143M
    }
1683
1684
0
    return ret; /* should never get here */
1685
143M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
53.9M
{
601
53.9M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
53.9M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
53.9M
    Py_ssize_t ret = 0;
604
53.9M
    int jump;
605
53.9M
    unsigned int sigcount = state->sigcount;
606
607
53.9M
    SRE(match_context)* ctx;
608
53.9M
    SRE(match_context)* nextctx;
609
53.9M
    INIT_TRACE(state);
610
611
53.9M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
53.9M
    DATA_ALLOC(SRE(match_context), ctx);
614
53.9M
    ctx->last_ctx_pos = -1;
615
53.9M
    ctx->jump = JUMP_NONE;
616
53.9M
    ctx->toplevel = toplevel;
617
53.9M
    ctx_pos = alloc_pos;
618
619
53.9M
#if USE_COMPUTED_GOTOS
620
53.9M
#include "sre_targets.h"
621
53.9M
#endif
622
623
239M
entrance:
624
625
239M
    ;  // Fashion statement.
626
239M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
239M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
9.17M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
973
            TRACE(("reject (got %tu chars, need %zu)\n",
633
973
                   end - ptr, (size_t) pattern[3]));
634
973
            RETURN_FAILURE;
635
973
        }
636
9.17M
        pattern += pattern[1] + 1;
637
9.17M
    }
638
639
239M
#if USE_COMPUTED_GOTOS
640
239M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
239M
    {
647
648
239M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
64.1M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
64.1M
                   ptr, pattern[0]));
653
64.1M
            {
654
64.1M
                int i = pattern[0];
655
64.1M
                if (i & 1)
656
13.5M
                    state->lastindex = i/2 + 1;
657
64.1M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
61.8M
                    int j = state->lastmark + 1;
663
64.1M
                    while (j < i)
664
2.28M
                        state->mark[j++] = NULL;
665
61.8M
                    state->lastmark = i;
666
61.8M
                }
667
64.1M
                state->mark[i] = ptr;
668
64.1M
            }
669
64.1M
            pattern++;
670
64.1M
            DISPATCH;
671
672
64.1M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
24.6M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
24.6M
                   ptr, *pattern));
677
24.6M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
7.43M
                RETURN_FAILURE;
679
17.2M
            pattern++;
680
17.2M
            ptr++;
681
17.2M
            DISPATCH;
682
683
17.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
25.5M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
25.5M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
25.5M
            if (ctx->toplevel &&
698
7.05M
                ((state->match_all && ptr != state->end) ||
699
7.05M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
25.5M
            state->ptr = ptr;
704
25.5M
            RETURN_SUCCESS;
705
706
1.83M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
1.83M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
1.83M
            if (!SRE(at)(state, ptr, *pattern))
711
1.82M
                RETURN_FAILURE;
712
4.09k
            pattern++;
713
4.09k
            DISPATCH;
714
715
4.09k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
56.1M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
56.1M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
56.1M
            if (ptr >= end ||
749
56.1M
                !SRE(charset)(state, pattern + 1, *ptr))
750
8.97M
                RETURN_FAILURE;
751
47.1M
            pattern += pattern[0];
752
47.1M
            ptr++;
753
47.1M
            DISPATCH;
754
755
47.1M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.86M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.86M
                   pattern, ptr, pattern[0]));
758
2.86M
            if (ptr >= end ||
759
2.86M
                sre_lower_ascii(*ptr) != *pattern)
760
34.5k
                RETURN_FAILURE;
761
2.82M
            pattern++;
762
2.82M
            ptr++;
763
2.82M
            DISPATCH;
764
765
2.82M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
25.2M
        TARGET(SRE_OP_JUMP):
845
25.2M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
25.2M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
25.2M
                   ptr, pattern[0]));
850
25.2M
            pattern += pattern[0];
851
25.2M
            DISPATCH;
852
853
30.3M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
30.3M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
30.3M
            LASTMARK_SAVE();
858
30.3M
            if (state->repeat)
859
27.2M
                MARK_PUSH(ctx->lastmark);
860
65.2M
            for (; pattern[0]; pattern += pattern[0]) {
861
59.5M
                if (pattern[1] == SRE_OP_LITERAL &&
862
30.4M
                    (ptr >= end ||
863
30.4M
                     (SRE_CODE) *ptr != pattern[2]))
864
22.3M
                    continue;
865
37.1M
                if (pattern[1] == SRE_OP_IN &&
866
21.2M
                    (ptr >= end ||
867
21.2M
                     !SRE(charset)(state, pattern + 3,
868
21.2M
                                   (SRE_CODE) *ptr)))
869
11.6M
                    continue;
870
25.5M
                state->ptr = ptr;
871
25.5M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
25.5M
                if (ret) {
873
24.6M
                    if (state->repeat)
874
22.2M
                        MARK_POP_DISCARD(ctx->lastmark);
875
24.6M
                    RETURN_ON_ERROR(ret);
876
24.6M
                    RETURN_SUCCESS;
877
24.6M
                }
878
893k
                if (state->repeat)
879
7.83k
                    MARK_POP_KEEP(ctx->lastmark);
880
893k
                LASTMARK_RESTORE();
881
893k
            }
882
5.76M
            if (state->repeat)
883
5.06M
                MARK_POP_DISCARD(ctx->lastmark);
884
5.76M
            RETURN_FAILURE;
885
886
92.9M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
92.9M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
92.9M
                   pattern[1], pattern[2]));
898
899
92.9M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
11.1k
                RETURN_FAILURE; /* cannot match */
901
902
92.9M
            state->ptr = ptr;
903
904
92.9M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
92.9M
            RETURN_ON_ERROR(ret);
906
92.9M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
92.9M
            ctx->count = ret;
908
92.9M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
92.9M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
35.4M
                RETURN_FAILURE;
917
918
57.4M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.30M
                ptr == state->end &&
920
5.65k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
5.65k
            {
922
                /* tail is empty.  we're finished */
923
5.65k
                state->ptr = ptr;
924
5.65k
                RETURN_SUCCESS;
925
5.65k
            }
926
927
57.4M
            LASTMARK_SAVE();
928
57.4M
            if (state->repeat)
929
44.6M
                MARK_PUSH(ctx->lastmark);
930
931
57.4M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
11.1M
                ctx->u.chr = pattern[pattern[0]+1];
935
11.1M
                for (;;) {
936
30.4M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
25.6M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
19.2M
                        ptr--;
939
19.2M
                        ctx->count--;
940
19.2M
                    }
941
11.1M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
4.78M
                        break;
943
6.39M
                    state->ptr = ptr;
944
6.39M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
6.39M
                            pattern+pattern[0]);
946
6.39M
                    if (ret) {
947
6.39M
                        if (state->repeat)
948
6.39M
                            MARK_POP_DISCARD(ctx->lastmark);
949
6.39M
                        RETURN_ON_ERROR(ret);
950
6.39M
                        RETURN_SUCCESS;
951
6.39M
                    }
952
304
                    if (state->repeat)
953
304
                        MARK_POP_KEEP(ctx->lastmark);
954
304
                    LASTMARK_RESTORE();
955
956
304
                    ptr--;
957
304
                    ctx->count--;
958
304
                }
959
4.78M
                if (state->repeat)
960
4.78M
                    MARK_POP_DISCARD(ctx->lastmark);
961
46.2M
            } else {
962
                /* general case */
963
47.0M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
47.0M
                    state->ptr = ptr;
965
47.0M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
47.0M
                            pattern+pattern[0]);
967
47.0M
                    if (ret) {
968
46.1M
                        if (state->repeat)
969
33.4M
                            MARK_POP_DISCARD(ctx->lastmark);
970
46.1M
                        RETURN_ON_ERROR(ret);
971
46.1M
                        RETURN_SUCCESS;
972
46.1M
                    }
973
845k
                    if (state->repeat)
974
100k
                        MARK_POP_KEEP(ctx->lastmark);
975
845k
                    LASTMARK_RESTORE();
976
977
845k
                    ptr--;
978
845k
                    ctx->count--;
979
845k
                }
980
76.8k
                if (state->repeat)
981
66.2k
                    MARK_POP_DISCARD(ctx->lastmark);
982
76.8k
            }
983
4.86M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
16.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
16.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
16.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
16.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
16.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
16.8M
            ctx->u.rep->count = -1;
1131
16.8M
            ctx->u.rep->pattern = pattern;
1132
16.8M
            ctx->u.rep->prev = state->repeat;
1133
16.8M
            ctx->u.rep->last_ptr = NULL;
1134
16.8M
            state->repeat = ctx->u.rep;
1135
1136
16.8M
            state->ptr = ptr;
1137
16.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
16.8M
            state->repeat = ctx->u.rep->prev;
1139
16.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
16.8M
            if (ret) {
1142
16.8M
                RETURN_ON_ERROR(ret);
1143
16.8M
                RETURN_SUCCESS;
1144
16.8M
            }
1145
375
            RETURN_FAILURE;
1146
1147
54.0M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
54.0M
            ctx->u.rep = state->repeat;
1155
54.0M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
54.0M
            state->ptr = ptr;
1159
1160
54.0M
            ctx->count = ctx->u.rep->count+1;
1161
1162
54.0M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
54.0M
                   ptr, ctx->count));
1164
1165
54.0M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
54.0M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
6.10M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
47.9M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
47.9M
                ctx->u.rep->count = ctx->count;
1185
47.9M
                LASTMARK_SAVE();
1186
47.9M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
47.9M
                LAST_PTR_PUSH();
1189
47.9M
                ctx->u.rep->last_ptr = state->ptr;
1190
47.9M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
47.9M
                        ctx->u.rep->pattern+3);
1192
47.9M
                LAST_PTR_POP();
1193
47.9M
                if (ret) {
1194
37.2M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
37.2M
                    RETURN_ON_ERROR(ret);
1196
37.2M
                    RETURN_SUCCESS;
1197
37.2M
                }
1198
10.7M
                MARK_POP(ctx->lastmark);
1199
10.7M
                LASTMARK_RESTORE();
1200
10.7M
                ctx->u.rep->count = ctx->count-1;
1201
10.7M
                state->ptr = ptr;
1202
10.7M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
16.8M
            state->repeat = ctx->u.rep->prev;
1207
16.8M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
16.8M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
16.8M
            RETURN_ON_SUCCESS(ret);
1211
34.8k
            state->ptr = ptr;
1212
34.8k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
12.7M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
12.7M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
12.7M
                   ptr, pattern[1]));
1565
12.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
12.7M
            state->ptr = ptr - pattern[1];
1568
12.7M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
12.7M
            RETURN_ON_FAILURE(ret);
1570
11.1M
            pattern += pattern[0];
1571
11.1M
            DISPATCH;
1572
1573
12.3M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
12.3M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
12.3M
                   ptr, pattern[1]));
1578
12.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
12.3M
                state->ptr = ptr - pattern[1];
1580
12.3M
                LASTMARK_SAVE();
1581
12.3M
                if (state->repeat)
1582
12.3M
                    MARK_PUSH(ctx->lastmark);
1583
1584
24.6M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
24.6M
                if (ret) {
1586
7.49k
                    if (state->repeat)
1587
7.49k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
7.49k
                    RETURN_ON_ERROR(ret);
1589
7.49k
                    RETURN_FAILURE;
1590
7.49k
                }
1591
12.3M
                if (state->repeat)
1592
12.3M
                    MARK_POP(ctx->lastmark);
1593
12.3M
                LASTMARK_RESTORE();
1594
12.3M
            }
1595
12.3M
            pattern += pattern[0];
1596
12.3M
            DISPATCH;
1597
1598
12.3M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
239M
exit:
1620
239M
    ctx_pos = ctx->last_ctx_pos;
1621
239M
    jump = ctx->jump;
1622
239M
    DATA_POP_DISCARD(ctx);
1623
239M
    if (ctx_pos == -1) {
1624
53.9M
        state->sigcount = sigcount;
1625
53.9M
        return ret;
1626
53.9M
    }
1627
185M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
185M
    switch (jump) {
1630
47.9M
        case JUMP_MAX_UNTIL_2:
1631
47.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
47.9M
            goto jump_max_until_2;
1633
16.8M
        case JUMP_MAX_UNTIL_3:
1634
16.8M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
16.8M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
25.5M
        case JUMP_BRANCH:
1643
25.5M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
25.5M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
16.8M
        case JUMP_REPEAT:
1658
16.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
16.8M
            goto jump_repeat;
1660
6.39M
        case JUMP_REPEAT_ONE_1:
1661
6.39M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
6.39M
            goto jump_repeat_one_1;
1663
47.0M
        case JUMP_REPEAT_ONE_2:
1664
47.0M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
47.0M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
12.7M
        case JUMP_ASSERT:
1673
12.7M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
12.7M
            goto jump_assert;
1675
12.3M
        case JUMP_ASSERT_NOT:
1676
12.3M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
12.3M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
185M
    }
1683
1684
0
    return ret; /* should never get here */
1685
185M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
230M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
111M
{
1694
111M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
111M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
111M
    Py_ssize_t status = 0;
1697
111M
    Py_ssize_t prefix_len = 0;
1698
111M
    Py_ssize_t prefix_skip = 0;
1699
111M
    SRE_CODE* prefix = NULL;
1700
111M
    SRE_CODE* charset = NULL;
1701
111M
    SRE_CODE* overlap = NULL;
1702
111M
    int flags = 0;
1703
111M
    INIT_TRACE(state);
1704
1705
111M
    if (ptr > end)
1706
0
        return 0;
1707
1708
111M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
111M
        flags = pattern[2];
1713
1714
111M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
3.73M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
3.73M
                   end - ptr, (size_t) pattern[3]));
1717
3.73M
            return 0;
1718
3.73M
        }
1719
107M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
8.00M
            end -= pattern[3] - 1;
1723
8.00M
            if (end <= ptr)
1724
0
                end = ptr;
1725
8.00M
        }
1726
1727
107M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
8.01M
            prefix_len = pattern[5];
1731
8.01M
            prefix_skip = pattern[6];
1732
8.01M
            prefix = pattern + 7;
1733
8.01M
            overlap = prefix + prefix_len - 1;
1734
99.8M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
95.7M
            charset = pattern + 5;
1738
1739
107M
        pattern += 1 + pattern[1];
1740
107M
    }
1741
1742
107M
    TRACE(("prefix = %p %zd %zd\n",
1743
107M
           prefix, prefix_len, prefix_skip));
1744
107M
    TRACE(("charset = %p\n", charset));
1745
1746
107M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
7.16M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
4.80M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
4.80M
#endif
1753
4.80M
        end = (SRE_CHAR *)state->end;
1754
4.80M
        state->must_advance = 0;
1755
7.99M
        while (ptr < end) {
1756
118M
            while (*ptr != c) {
1757
110M
                if (++ptr >= end)
1758
466k
                    return 0;
1759
110M
            }
1760
7.38M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
7.38M
            state->start = ptr;
1762
7.38M
            state->ptr = ptr + prefix_skip;
1763
7.38M
            if (flags & SRE_INFO_LITERAL)
1764
7.11k
                return 1; /* we got all of it */
1765
7.37M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
7.37M
            if (status != 0)
1767
6.54M
                return status;
1768
833k
            ++ptr;
1769
833k
            RESET_CAPTURE_GROUP();
1770
833k
        }
1771
147k
        return 0;
1772
4.80M
    }
1773
1774
100M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
849k
        Py_ssize_t i = 0;
1778
1779
849k
        end = (SRE_CHAR *)state->end;
1780
849k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.67M
        for (i = 0; i < prefix_len; i++)
1784
1.11M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
559k
#endif
1787
1.76M
        while (ptr < end) {
1788
1.76M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
21.8M
            while (*ptr++ != c) {
1790
20.1M
                if (ptr >= end)
1791
369
                    return 0;
1792
20.1M
            }
1793
1.76M
            if (ptr >= end)
1794
56
                return 0;
1795
1796
1.76M
            i = 1;
1797
1.76M
            state->must_advance = 0;
1798
1.76M
            do {
1799
1.76M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.64M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.64M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.64M
                    state->start = ptr - (prefix_len - 1);
1808
1.64M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.64M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.64M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.64M
                    if (status != 0)
1813
848k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
791k
                    if (++ptr >= end)
1816
66
                        return 0;
1817
791k
                    RESET_CAPTURE_GROUP();
1818
791k
                }
1819
915k
                i = overlap[i];
1820
915k
            } while (i != 0);
1821
1.76M
        }
1822
0
        return 0;
1823
849k
    }
1824
1825
99.8M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
95.7M
        end = (SRE_CHAR *)state->end;
1828
95.7M
        state->must_advance = 0;
1829
98.8M
        for (;;) {
1830
402M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
304M
                ptr++;
1832
98.8M
            if (ptr >= end)
1833
3.68M
                return 0;
1834
95.1M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
95.1M
            state->start = ptr;
1836
95.1M
            state->ptr = ptr;
1837
95.1M
            status = SRE(match)(state, pattern, 0);
1838
95.1M
            if (status != 0)
1839
92.0M
                break;
1840
3.05M
            ptr++;
1841
3.05M
            RESET_CAPTURE_GROUP();
1842
3.05M
        }
1843
95.7M
    } else {
1844
        /* general case */
1845
4.11M
        assert(ptr <= end);
1846
4.11M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
4.11M
        state->start = state->ptr = ptr;
1848
4.11M
        status = SRE(match)(state, pattern, 1);
1849
4.11M
        state->must_advance = 0;
1850
4.11M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
75
            (pattern[1] == SRE_AT_BEGINNING ||
1852
75
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
229M
        while (status == 0 && ptr < end) {
1858
225M
            ptr++;
1859
225M
            RESET_CAPTURE_GROUP();
1860
225M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
225M
            state->start = state->ptr = ptr;
1862
225M
            status = SRE(match)(state, pattern, 0);
1863
225M
        }
1864
4.11M
    }
1865
1866
96.2M
    return status;
1867
99.8M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
48.7M
{
1694
48.7M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
48.7M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
48.7M
    Py_ssize_t status = 0;
1697
48.7M
    Py_ssize_t prefix_len = 0;
1698
48.7M
    Py_ssize_t prefix_skip = 0;
1699
48.7M
    SRE_CODE* prefix = NULL;
1700
48.7M
    SRE_CODE* charset = NULL;
1701
48.7M
    SRE_CODE* overlap = NULL;
1702
48.7M
    int flags = 0;
1703
48.7M
    INIT_TRACE(state);
1704
1705
48.7M
    if (ptr > end)
1706
0
        return 0;
1707
1708
48.7M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
48.7M
        flags = pattern[2];
1713
1714
48.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
3.62M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
3.62M
                   end - ptr, (size_t) pattern[3]));
1717
3.62M
            return 0;
1718
3.62M
        }
1719
45.1M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
1.94M
            end -= pattern[3] - 1;
1723
1.94M
            if (end <= ptr)
1724
0
                end = ptr;
1725
1.94M
        }
1726
1727
45.1M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
1.94M
            prefix_len = pattern[5];
1731
1.94M
            prefix_skip = pattern[6];
1732
1.94M
            prefix = pattern + 7;
1733
1.94M
            overlap = prefix + prefix_len - 1;
1734
43.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
40.0M
            charset = pattern + 5;
1738
1739
45.1M
        pattern += 1 + pattern[1];
1740
45.1M
    }
1741
1742
45.1M
    TRACE(("prefix = %p %zd %zd\n",
1743
45.1M
           prefix, prefix_len, prefix_skip));
1744
45.1M
    TRACE(("charset = %p\n", charset));
1745
1746
45.1M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.87M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.87M
#if SIZEOF_SRE_CHAR < 4
1750
1.87M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.87M
#endif
1753
1.87M
        end = (SRE_CHAR *)state->end;
1754
1.87M
        state->must_advance = 0;
1755
2.24M
        while (ptr < end) {
1756
30.4M
            while (*ptr != c) {
1757
28.6M
                if (++ptr >= end)
1758
373k
                    return 0;
1759
28.6M
            }
1760
1.73M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.73M
            state->start = ptr;
1762
1.73M
            state->ptr = ptr + prefix_skip;
1763
1.73M
            if (flags & SRE_INFO_LITERAL)
1764
582
                return 1; /* we got all of it */
1765
1.73M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.73M
            if (status != 0)
1767
1.35M
                return status;
1768
377k
            ++ptr;
1769
377k
            RESET_CAPTURE_GROUP();
1770
377k
        }
1771
143k
        return 0;
1772
1.87M
    }
1773
1774
43.2M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
75.7k
        Py_ssize_t i = 0;
1778
1779
75.7k
        end = (SRE_CHAR *)state->end;
1780
75.7k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
75.7k
#if SIZEOF_SRE_CHAR < 4
1783
227k
        for (i = 0; i < prefix_len; i++)
1784
151k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
75.7k
#endif
1787
144k
        while (ptr < end) {
1788
144k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.76M
            while (*ptr++ != c) {
1790
2.62M
                if (ptr >= end)
1791
78
                    return 0;
1792
2.62M
            }
1793
144k
            if (ptr >= end)
1794
16
                return 0;
1795
1796
144k
            i = 1;
1797
144k
            state->must_advance = 0;
1798
144k
            do {
1799
144k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
134k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
134k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
134k
                    state->start = ptr - (prefix_len - 1);
1808
134k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
134k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
134k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
134k
                    if (status != 0)
1813
75.6k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
58.7k
                    if (++ptr >= end)
1816
27
                        return 0;
1817
58.7k
                    RESET_CAPTURE_GROUP();
1818
58.7k
                }
1819
68.9k
                i = overlap[i];
1820
68.9k
            } while (i != 0);
1821
144k
        }
1822
0
        return 0;
1823
75.7k
    }
1824
1825
43.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
40.0M
        end = (SRE_CHAR *)state->end;
1828
40.0M
        state->must_advance = 0;
1829
41.9M
        for (;;) {
1830
108M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
66.2M
                ptr++;
1832
41.9M
            if (ptr >= end)
1833
2.58M
                return 0;
1834
39.3M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
39.3M
            state->start = ptr;
1836
39.3M
            state->ptr = ptr;
1837
39.3M
            status = SRE(match)(state, pattern, 0);
1838
39.3M
            if (status != 0)
1839
37.4M
                break;
1840
1.88M
            ptr++;
1841
1.88M
            RESET_CAPTURE_GROUP();
1842
1.88M
        }
1843
40.0M
    } else {
1844
        /* general case */
1845
3.11M
        assert(ptr <= end);
1846
3.11M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
3.11M
        state->start = state->ptr = ptr;
1848
3.11M
        status = SRE(match)(state, pattern, 1);
1849
3.11M
        state->must_advance = 0;
1850
3.11M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
21
            (pattern[1] == SRE_AT_BEGINNING ||
1852
21
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
55.4M
        while (status == 0 && ptr < end) {
1858
52.3M
            ptr++;
1859
52.3M
            RESET_CAPTURE_GROUP();
1860
52.3M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
52.3M
            state->start = state->ptr = ptr;
1862
52.3M
            status = SRE(match)(state, pattern, 0);
1863
52.3M
        }
1864
3.11M
    }
1865
1866
40.6M
    return status;
1867
43.1M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
55.5M
{
1694
55.5M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
55.5M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
55.5M
    Py_ssize_t status = 0;
1697
55.5M
    Py_ssize_t prefix_len = 0;
1698
55.5M
    Py_ssize_t prefix_skip = 0;
1699
55.5M
    SRE_CODE* prefix = NULL;
1700
55.5M
    SRE_CODE* charset = NULL;
1701
55.5M
    SRE_CODE* overlap = NULL;
1702
55.5M
    int flags = 0;
1703
55.5M
    INIT_TRACE(state);
1704
1705
55.5M
    if (ptr > end)
1706
0
        return 0;
1707
1708
55.5M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
55.5M
        flags = pattern[2];
1713
1714
55.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
109k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
109k
                   end - ptr, (size_t) pattern[3]));
1717
109k
            return 0;
1718
109k
        }
1719
55.3M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.41M
            end -= pattern[3] - 1;
1723
3.41M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.41M
        }
1726
1727
55.3M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.41M
            prefix_len = pattern[5];
1731
3.41M
            prefix_skip = pattern[6];
1732
3.41M
            prefix = pattern + 7;
1733
3.41M
            overlap = prefix + prefix_len - 1;
1734
51.9M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
51.1M
            charset = pattern + 5;
1738
1739
55.3M
        pattern += 1 + pattern[1];
1740
55.3M
    }
1741
1742
55.3M
    TRACE(("prefix = %p %zd %zd\n",
1743
55.3M
           prefix, prefix_len, prefix_skip));
1744
55.3M
    TRACE(("charset = %p\n", charset));
1745
1746
55.3M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.93M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.93M
#if SIZEOF_SRE_CHAR < 4
1750
2.93M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.93M
#endif
1753
2.93M
        end = (SRE_CHAR *)state->end;
1754
2.93M
        state->must_advance = 0;
1755
3.34M
        while (ptr < end) {
1756
58.9M
            while (*ptr != c) {
1757
55.7M
                if (++ptr >= end)
1758
87.0k
                    return 0;
1759
55.7M
            }
1760
3.24M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.24M
            state->start = ptr;
1762
3.24M
            state->ptr = ptr + prefix_skip;
1763
3.24M
            if (flags & SRE_INFO_LITERAL)
1764
3.73k
                return 1; /* we got all of it */
1765
3.24M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.24M
            if (status != 0)
1767
2.83M
                return status;
1768
409k
            ++ptr;
1769
409k
            RESET_CAPTURE_GROUP();
1770
409k
        }
1771
3.46k
        return 0;
1772
2.93M
    }
1773
1774
52.4M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
483k
        Py_ssize_t i = 0;
1778
1779
483k
        end = (SRE_CHAR *)state->end;
1780
483k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
483k
#if SIZEOF_SRE_CHAR < 4
1783
1.44M
        for (i = 0; i < prefix_len; i++)
1784
966k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
483k
#endif
1787
946k
        while (ptr < end) {
1788
946k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
7.19M
            while (*ptr++ != c) {
1790
6.24M
                if (ptr >= end)
1791
137
                    return 0;
1792
6.24M
            }
1793
946k
            if (ptr >= end)
1794
21
                return 0;
1795
1796
946k
            i = 1;
1797
946k
            state->must_advance = 0;
1798
947k
            do {
1799
947k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
914k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
914k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
914k
                    state->start = ptr - (prefix_len - 1);
1808
914k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
914k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
914k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
914k
                    if (status != 0)
1813
483k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
431k
                    if (++ptr >= end)
1816
19
                        return 0;
1817
430k
                    RESET_CAPTURE_GROUP();
1818
430k
                }
1819
464k
                i = overlap[i];
1820
464k
            } while (i != 0);
1821
946k
        }
1822
0
        return 0;
1823
483k
    }
1824
1825
51.9M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
51.1M
        end = (SRE_CHAR *)state->end;
1828
51.1M
        state->must_advance = 0;
1829
51.6M
        for (;;) {
1830
221M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
170M
                ptr++;
1832
51.6M
            if (ptr >= end)
1833
1.05M
                return 0;
1834
50.5M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
50.5M
            state->start = ptr;
1836
50.5M
            state->ptr = ptr;
1837
50.5M
            status = SRE(match)(state, pattern, 0);
1838
50.5M
            if (status != 0)
1839
50.0M
                break;
1840
540k
            ptr++;
1841
540k
            RESET_CAPTURE_GROUP();
1842
540k
        }
1843
51.1M
    } else {
1844
        /* general case */
1845
874k
        assert(ptr <= end);
1846
874k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
874k
        state->start = state->ptr = ptr;
1848
874k
        status = SRE(match)(state, pattern, 1);
1849
874k
        state->must_advance = 0;
1850
874k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
28
            (pattern[1] == SRE_AT_BEGINNING ||
1852
28
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
137M
        while (status == 0 && ptr < end) {
1858
136M
            ptr++;
1859
136M
            RESET_CAPTURE_GROUP();
1860
136M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
136M
            state->start = state->ptr = ptr;
1862
136M
            status = SRE(match)(state, pattern, 0);
1863
136M
        }
1864
874k
    }
1865
1866
50.9M
    return status;
1867
51.9M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.38M
{
1694
7.38M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.38M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.38M
    Py_ssize_t status = 0;
1697
7.38M
    Py_ssize_t prefix_len = 0;
1698
7.38M
    Py_ssize_t prefix_skip = 0;
1699
7.38M
    SRE_CODE* prefix = NULL;
1700
7.38M
    SRE_CODE* charset = NULL;
1701
7.38M
    SRE_CODE* overlap = NULL;
1702
7.38M
    int flags = 0;
1703
7.38M
    INIT_TRACE(state);
1704
1705
7.38M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.38M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.38M
        flags = pattern[2];
1713
1714
7.38M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.18k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.18k
                   end - ptr, (size_t) pattern[3]));
1717
5.18k
            return 0;
1718
5.18k
        }
1719
7.38M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.65M
            end -= pattern[3] - 1;
1723
2.65M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.65M
        }
1726
1727
7.38M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.65M
            prefix_len = pattern[5];
1731
2.65M
            prefix_skip = pattern[6];
1732
2.65M
            prefix = pattern + 7;
1733
2.65M
            overlap = prefix + prefix_len - 1;
1734
4.73M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
4.60M
            charset = pattern + 5;
1738
1739
7.38M
        pattern += 1 + pattern[1];
1740
7.38M
    }
1741
1742
7.38M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.38M
           prefix, prefix_len, prefix_skip));
1744
7.38M
    TRACE(("charset = %p\n", charset));
1745
1746
7.38M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.36M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.36M
        end = (SRE_CHAR *)state->end;
1754
2.36M
        state->must_advance = 0;
1755
2.40M
        while (ptr < end) {
1756
28.7M
            while (*ptr != c) {
1757
26.3M
                if (++ptr >= end)
1758
5.45k
                    return 0;
1759
26.3M
            }
1760
2.40M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.40M
            state->start = ptr;
1762
2.40M
            state->ptr = ptr + prefix_skip;
1763
2.40M
            if (flags & SRE_INFO_LITERAL)
1764
2.80k
                return 1; /* we got all of it */
1765
2.40M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.40M
            if (status != 0)
1767
2.35M
                return status;
1768
46.2k
            ++ptr;
1769
46.2k
            RESET_CAPTURE_GROUP();
1770
46.2k
        }
1771
712
        return 0;
1772
2.36M
    }
1773
1774
5.02M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
290k
        Py_ssize_t i = 0;
1778
1779
290k
        end = (SRE_CHAR *)state->end;
1780
290k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
671k
        while (ptr < end) {
1788
671k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
11.9M
            while (*ptr++ != c) {
1790
11.2M
                if (ptr >= end)
1791
154
                    return 0;
1792
11.2M
            }
1793
671k
            if (ptr >= end)
1794
19
                return 0;
1795
1796
671k
            i = 1;
1797
671k
            state->must_advance = 0;
1798
671k
            do {
1799
671k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
592k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
592k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
592k
                    state->start = ptr - (prefix_len - 1);
1808
592k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
592k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
592k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
592k
                    if (status != 0)
1813
289k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
302k
                    if (++ptr >= end)
1816
20
                        return 0;
1817
302k
                    RESET_CAPTURE_GROUP();
1818
302k
                }
1819
381k
                i = overlap[i];
1820
381k
            } while (i != 0);
1821
671k
        }
1822
0
        return 0;
1823
290k
    }
1824
1825
4.73M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
4.60M
        end = (SRE_CHAR *)state->end;
1828
4.60M
        state->must_advance = 0;
1829
5.24M
        for (;;) {
1830
72.8M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
67.6M
                ptr++;
1832
5.24M
            if (ptr >= end)
1833
55.7k
                return 0;
1834
5.18M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
5.18M
            state->start = ptr;
1836
5.18M
            state->ptr = ptr;
1837
5.18M
            status = SRE(match)(state, pattern, 0);
1838
5.18M
            if (status != 0)
1839
4.55M
                break;
1840
634k
            ptr++;
1841
634k
            RESET_CAPTURE_GROUP();
1842
634k
        }
1843
4.60M
    } else {
1844
        /* general case */
1845
124k
        assert(ptr <= end);
1846
124k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
124k
        state->start = state->ptr = ptr;
1848
124k
        status = SRE(match)(state, pattern, 1);
1849
124k
        state->must_advance = 0;
1850
124k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
26
            (pattern[1] == SRE_AT_BEGINNING ||
1852
26
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
36.6M
        while (status == 0 && ptr < end) {
1858
36.4M
            ptr++;
1859
36.4M
            RESET_CAPTURE_GROUP();
1860
36.4M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
36.4M
            state->start = state->ptr = ptr;
1862
36.4M
            status = SRE(match)(state, pattern, 0);
1863
36.4M
        }
1864
124k
    }
1865
1866
4.67M
    return status;
1867
4.73M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/