Coverage Report

Created: 2026-01-10 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
93.2M
{
18
    /* check if pointer is at given position */
19
20
93.2M
    Py_ssize_t thisp, thatp;
21
22
93.2M
    switch (at) {
23
24
11.3M
    case SRE_AT_BEGINNING:
25
11.3M
    case SRE_AT_BEGINNING_STRING:
26
11.3M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
78.5M
    case SRE_AT_END:
33
78.5M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
1.14M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
78.5M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
3.33M
    case SRE_AT_END_STRING:
42
3.33M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
93.2M
    }
87
88
0
    return 0;
89
93.2M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
34.3M
{
18
    /* check if pointer is at given position */
19
20
34.3M
    Py_ssize_t thisp, thatp;
21
22
34.3M
    switch (at) {
23
24
10.3M
    case SRE_AT_BEGINNING:
25
10.3M
    case SRE_AT_BEGINNING_STRING:
26
10.3M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
22.7M
    case SRE_AT_END:
33
22.7M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
423k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
22.7M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.30M
    case SRE_AT_END_STRING:
42
1.30M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
34.3M
    }
87
88
0
    return 0;
89
34.3M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
38.4M
{
18
    /* check if pointer is at given position */
19
20
38.4M
    Py_ssize_t thisp, thatp;
21
22
38.4M
    switch (at) {
23
24
1.02M
    case SRE_AT_BEGINNING:
25
1.02M
    case SRE_AT_BEGINNING_STRING:
26
1.02M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
36.5M
    case SRE_AT_END:
33
36.5M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
710k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
36.5M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
920k
    case SRE_AT_END_STRING:
42
920k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
38.4M
    }
87
88
0
    return 0;
89
38.4M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
20.4M
{
18
    /* check if pointer is at given position */
19
20
20.4M
    Py_ssize_t thisp, thatp;
21
22
20.4M
    switch (at) {
23
24
18.5k
    case SRE_AT_BEGINNING:
25
18.5k
    case SRE_AT_BEGINNING_STRING:
26
18.5k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
19.3M
    case SRE_AT_END:
33
19.3M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
7.24k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
19.3M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.11M
    case SRE_AT_END_STRING:
42
1.11M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
20.4M
    }
87
88
0
    return 0;
89
20.4M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.54G
{
94
    /* check if character is a member of the given set */
95
96
1.54G
    int ok = 1;
97
98
3.55G
    for (;;) {
99
3.55G
        switch (*set++) {
100
101
1.04G
        case SRE_OP_FAILURE:
102
1.04G
            return !ok;
103
104
1.30G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.30G
            if (ch == set[0])
107
8.42M
                return ok;
108
1.29G
            set++;
109
1.29G
            break;
110
111
98.0M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
98.0M
            if (sre_category(set[0], (int) ch))
114
50.2M
                return ok;
115
47.8M
            set++;
116
47.8M
            break;
117
118
479M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
479M
            if (ch < 256 &&
121
457M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
195M
                return ok;
123
284M
            set += 256/SRE_CODE_BITS;
124
284M
            break;
125
126
377M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
377M
            if (set[0] <= ch && ch <= set[1])
129
238M
                return ok;
130
138M
            set += 2;
131
138M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
248M
        case SRE_OP_NEGATE:
148
248M
            ok = !ok;
149
248M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.55G
        }
175
3.55G
    }
176
1.54G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
459M
{
94
    /* check if character is a member of the given set */
95
96
459M
    int ok = 1;
97
98
974M
    for (;;) {
99
974M
        switch (*set++) {
100
101
271M
        case SRE_OP_FAILURE:
102
271M
            return !ok;
103
104
337M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
337M
            if (ch == set[0])
107
5.45M
                return ok;
108
332M
            set++;
109
332M
            break;
110
111
39.4M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
39.4M
            if (sre_category(set[0], (int) ch))
114
21.1M
                return ok;
115
18.2M
            set++;
116
18.2M
            break;
117
118
98.7M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
98.7M
            if (ch < 256 &&
121
98.7M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
50.1M
                return ok;
123
48.6M
            set += 256/SRE_CODE_BITS;
124
48.6M
            break;
125
126
186M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
186M
            if (set[0] <= ch && ch <= set[1])
129
110M
                return ok;
130
75.0M
            set += 2;
131
75.0M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
41.8M
        case SRE_OP_NEGATE:
148
41.8M
            ok = !ok;
149
41.8M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
974M
        }
175
974M
    }
176
459M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
669M
{
94
    /* check if character is a member of the given set */
95
96
669M
    int ok = 1;
97
98
1.63G
    for (;;) {
99
1.63G
        switch (*set++) {
100
101
496M
        case SRE_OP_FAILURE:
102
496M
            return !ok;
103
104
674M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
674M
            if (ch == set[0])
107
1.63M
                return ok;
108
672M
            set++;
109
672M
            break;
110
111
38.6M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
38.6M
            if (sre_category(set[0], (int) ch))
114
16.6M
                return ok;
115
22.0M
            set++;
116
22.0M
            break;
117
118
160M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
160M
            if (ch < 256 &&
121
150M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
49.6M
                return ok;
123
110M
            set += 256/SRE_CODE_BITS;
124
110M
            break;
125
126
160M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
160M
            if (set[0] <= ch && ch <= set[1])
129
105M
                return ok;
130
54.2M
            set += 2;
131
54.2M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
102M
        case SRE_OP_NEGATE:
148
102M
            ok = !ok;
149
102M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.63G
        }
175
1.63G
    }
176
669M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
412M
{
94
    /* check if character is a member of the given set */
95
96
412M
    int ok = 1;
97
98
946M
    for (;;) {
99
946M
        switch (*set++) {
100
101
281M
        case SRE_OP_FAILURE:
102
281M
            return !ok;
103
104
289M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
289M
            if (ch == set[0])
107
1.33M
                return ok;
108
288M
            set++;
109
288M
            break;
110
111
19.9M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
19.9M
            if (sre_category(set[0], (int) ch))
114
12.4M
                return ok;
115
7.54M
            set++;
116
7.54M
            break;
117
118
220M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
220M
            if (ch < 256 &&
121
208M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
95.3M
                return ok;
123
125M
            set += 256/SRE_CODE_BITS;
124
125M
            break;
125
126
31.1M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
31.1M
            if (set[0] <= ch && ch <= set[1])
129
21.6M
                return ok;
130
9.59M
            set += 2;
131
9.59M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
104M
        case SRE_OP_NEGATE:
148
104M
            ok = !ok;
149
104M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
946M
        }
175
946M
    }
176
412M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
638M
{
195
638M
    SRE_CODE chr;
196
638M
    SRE_CHAR c;
197
638M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
638M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
638M
    Py_ssize_t i;
200
638M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
638M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
78.1M
        end = ptr + maxcount;
205
206
638M
    switch (pattern[0]) {
207
208
503M
    case SRE_OP_IN:
209
        /* repeated set */
210
503M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
868M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
365M
            ptr++;
213
503M
        break;
214
215
41.3M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
41.3M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
108M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
67.3M
            ptr++;
220
41.3M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
91.9M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
91.9M
        chr = pattern[1];
232
91.9M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
91.9M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
80.7M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
80.7M
        else
238
80.7M
#endif
239
96.5M
        while (ptr < end && *ptr == c)
240
4.60M
            ptr++;
241
91.9M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.68M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.68M
        chr = pattern[1];
270
1.68M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.68M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
1.07M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
1.07M
        else
276
1.07M
#endif
277
42.1M
        while (ptr < end && *ptr != c)
278
40.5M
            ptr++;
279
1.68M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
638M
    }
319
320
638M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
638M
           ptr - (SRE_CHAR*) state->ptr));
322
638M
    return ptr - (SRE_CHAR*) state->ptr;
323
638M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
244M
{
195
244M
    SRE_CODE chr;
196
244M
    SRE_CHAR c;
197
244M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
244M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
244M
    Py_ssize_t i;
200
244M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
244M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
22.8M
        end = ptr + maxcount;
205
206
244M
    switch (pattern[0]) {
207
208
164M
    case SRE_OP_IN:
209
        /* repeated set */
210
164M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
287M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
123M
            ptr++;
213
164M
        break;
214
215
12.8M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
12.8M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
29.7M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
16.8M
            ptr++;
220
12.8M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
66.6M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
66.6M
        chr = pattern[1];
232
66.6M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
66.6M
        c = (SRE_CHAR) chr;
234
66.6M
#if SIZEOF_SRE_CHAR < 4
235
66.6M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
66.6M
        else
238
66.6M
#endif
239
68.9M
        while (ptr < end && *ptr == c)
240
2.24M
            ptr++;
241
66.6M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
633k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
633k
        chr = pattern[1];
270
633k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
633k
        c = (SRE_CHAR) chr;
272
633k
#if SIZEOF_SRE_CHAR < 4
273
633k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
633k
        else
276
633k
#endif
277
9.37M
        while (ptr < end && *ptr != c)
278
8.74M
            ptr++;
279
633k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
244M
    }
319
320
244M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
244M
           ptr - (SRE_CHAR*) state->ptr));
322
244M
    return ptr - (SRE_CHAR*) state->ptr;
323
244M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
256M
{
195
256M
    SRE_CODE chr;
196
256M
    SRE_CHAR c;
197
256M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
256M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
256M
    Py_ssize_t i;
200
256M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
256M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
33.8M
        end = ptr + maxcount;
205
206
256M
    switch (pattern[0]) {
207
208
220M
    case SRE_OP_IN:
209
        /* repeated set */
210
220M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
345M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
124M
            ptr++;
213
220M
        break;
214
215
21.1M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
21.1M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
54.6M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
33.5M
            ptr++;
220
21.1M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
14.0M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
14.0M
        chr = pattern[1];
232
14.0M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
14.0M
        c = (SRE_CHAR) chr;
234
14.0M
#if SIZEOF_SRE_CHAR < 4
235
14.0M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
14.0M
        else
238
14.0M
#endif
239
15.5M
        while (ptr < end && *ptr == c)
240
1.50M
            ptr++;
241
14.0M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
442k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
442k
        chr = pattern[1];
270
442k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
442k
        c = (SRE_CHAR) chr;
272
442k
#if SIZEOF_SRE_CHAR < 4
273
442k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
442k
        else
276
442k
#endif
277
10.5M
        while (ptr < end && *ptr != c)
278
10.1M
            ptr++;
279
442k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
256M
    }
319
320
256M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
256M
           ptr - (SRE_CHAR*) state->ptr));
322
256M
    return ptr - (SRE_CHAR*) state->ptr;
323
256M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
137M
{
195
137M
    SRE_CODE chr;
196
137M
    SRE_CHAR c;
197
137M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
137M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
137M
    Py_ssize_t i;
200
137M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
137M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
21.4M
        end = ptr + maxcount;
205
206
137M
    switch (pattern[0]) {
207
208
118M
    case SRE_OP_IN:
209
        /* repeated set */
210
118M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
235M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
116M
            ptr++;
213
118M
        break;
214
215
7.26M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
7.26M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
24.1M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
16.9M
            ptr++;
220
7.26M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
11.2M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
11.2M
        chr = pattern[1];
232
11.2M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
11.2M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
12.1M
        while (ptr < end && *ptr == c)
240
858k
            ptr++;
241
11.2M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
604k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
604k
        chr = pattern[1];
270
604k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
604k
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
22.2M
        while (ptr < end && *ptr != c)
278
21.6M
            ptr++;
279
604k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
137M
    }
319
320
137M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
137M
           ptr - (SRE_CHAR*) state->ptr));
322
137M
    return ptr - (SRE_CHAR*) state->ptr;
323
137M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
525M
    do { \
355
525M
        ctx->lastmark = state->lastmark; \
356
525M
        ctx->lastindex = state->lastindex; \
357
525M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
333M
    do { \
360
333M
        state->lastmark = ctx->lastmark; \
361
333M
        state->lastindex = ctx->lastindex; \
362
333M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
190M
    do { \
366
190M
        TRACE(("push last_ptr: %zd", \
367
190M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
190M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
190M
    } while (0)
370
#define LAST_PTR_POP()  \
371
190M
    do { \
372
190M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
190M
        TRACE(("pop last_ptr: %zd", \
374
190M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
190M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
880M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
553M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.12G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
114M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
26.7M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.43G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.43G
do { \
390
1.43G
    alloc_pos = state->data_stack_base; \
391
1.43G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.43G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.43G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
162M
        int j = data_stack_grow(state, sizeof(type)); \
395
162M
        if (j < 0) return j; \
396
162M
        if (ctx_pos != -1) \
397
162M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
162M
    } \
399
1.43G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.43G
    state->data_stack_base += sizeof(type); \
401
1.43G
} while (0)
402
403
1.53G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.53G
do { \
405
1.53G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.53G
    ptr = (type*)(state->data_stack+pos); \
407
1.53G
} while (0)
408
409
469M
#define DATA_STACK_PUSH(state, data, size) \
410
469M
do { \
411
469M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
469M
           data, state->data_stack_base, size)); \
413
469M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
83.1k
        int j = data_stack_grow(state, size); \
415
83.1k
        if (j < 0) return j; \
416
83.1k
        if (ctx_pos != -1) \
417
83.1k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
83.1k
    } \
419
469M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
469M
    state->data_stack_base += size; \
421
469M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
300M
#define DATA_STACK_POP(state, data, size, discard) \
427
300M
do { \
428
300M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
300M
           data, state->data_stack_base-size, size)); \
430
300M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
300M
    if (discard) \
432
300M
        state->data_stack_base -= size; \
433
300M
} while (0)
434
435
1.60G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.60G
do { \
437
1.60G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.60G
           state->data_stack_base-size, size)); \
439
1.60G
    state->data_stack_base -= size; \
440
1.60G
} while(0)
441
442
#define DATA_PUSH(x) \
443
190M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
190M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.43G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.43G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.53G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
375M
    do if (lastmark >= 0) { \
473
278M
        MARK_TRACE("push", (lastmark)); \
474
278M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
278M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
375M
    } while (0)
477
#define MARK_POP(lastmark) \
478
121M
    do if (lastmark >= 0) { \
479
107M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
107M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
107M
        MARK_TRACE("pop", (lastmark)); \
482
121M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.25M
    do if (lastmark >= 0) { \
485
2.01M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
2.01M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
2.01M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.25M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
253M
    do if (lastmark >= 0) { \
491
171M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
171M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
171M
        MARK_TRACE("pop discard", (lastmark)); \
494
253M
    } while (0)
495
496
539M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
190M
#define JUMP_MAX_UNTIL_2     2
499
114M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
113M
#define JUMP_REPEAT          7
504
10.9M
#define JUMP_REPEAT_ONE_1    8
505
212M
#define JUMP_REPEAT_ONE_2    9
506
42.0M
#define JUMP_MIN_REPEAT_ONE  10
507
161M
#define JUMP_BRANCH          11
508
26.7M
#define JUMP_ASSERT          12
509
22.7M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
894M
    ctx->pattern = pattern; \
516
894M
    ctx->ptr = ptr; \
517
894M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
894M
    nextctx->pattern = nextpattern; \
519
894M
    nextctx->toplevel = toplevel_; \
520
894M
    nextctx->jump = jumpvalue; \
521
894M
    nextctx->last_ctx_pos = ctx_pos; \
522
894M
    pattern = nextpattern; \
523
894M
    ctx_pos = alloc_pos; \
524
894M
    ctx = nextctx; \
525
894M
    goto entrance; \
526
894M
    jumplabel: \
527
894M
    pattern = ctx->pattern; \
528
894M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
845M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
49.5M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.46G
    do {                                                           \
553
2.46G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.46G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.46G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.53G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.46G
        do {                               \
588
2.46G
            MAYBE_CHECK_SIGNALS;           \
589
2.46G
            goto *sre_targets[*pattern++]; \
590
2.46G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
539M
{
601
539M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
539M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
539M
    Py_ssize_t ret = 0;
604
539M
    int jump;
605
539M
    unsigned int sigcount = state->sigcount;
606
607
539M
    SRE(match_context)* ctx;
608
539M
    SRE(match_context)* nextctx;
609
539M
    INIT_TRACE(state);
610
611
539M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
539M
    DATA_ALLOC(SRE(match_context), ctx);
614
539M
    ctx->last_ctx_pos = -1;
615
539M
    ctx->jump = JUMP_NONE;
616
539M
    ctx->toplevel = toplevel;
617
539M
    ctx_pos = alloc_pos;
618
619
539M
#if USE_COMPUTED_GOTOS
620
539M
#include "sre_targets.h"
621
539M
#endif
622
623
1.43G
entrance:
624
625
1.43G
    ;  // Fashion statement.
626
1.43G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.43G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
63.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.01M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.01M
                   end - ptr, (size_t) pattern[3]));
634
5.01M
            RETURN_FAILURE;
635
5.01M
        }
636
58.2M
        pattern += pattern[1] + 1;
637
58.2M
    }
638
639
1.42G
#if USE_COMPUTED_GOTOS
640
1.42G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.42G
    {
647
648
1.42G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
625M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
625M
                   ptr, pattern[0]));
653
625M
            {
654
625M
                int i = pattern[0];
655
625M
                if (i & 1)
656
106M
                    state->lastindex = i/2 + 1;
657
625M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
619M
                    int j = state->lastmark + 1;
663
635M
                    while (j < i)
664
16.4M
                        state->mark[j++] = NULL;
665
619M
                    state->lastmark = i;
666
619M
                }
667
625M
                state->mark[i] = ptr;
668
625M
            }
669
625M
            pattern++;
670
625M
            DISPATCH;
671
672
625M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
139M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
139M
                   ptr, *pattern));
677
139M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
61.2M
                RETURN_FAILURE;
679
78.4M
            pattern++;
680
78.4M
            ptr++;
681
78.4M
            DISPATCH;
682
683
78.4M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
159M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
159M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
159M
            if (ctx->toplevel &&
698
45.2M
                ((state->match_all && ptr != state->end) ||
699
45.2M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
159M
            state->ptr = ptr;
704
159M
            RETURN_SUCCESS;
705
706
93.2M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
93.2M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
93.2M
            if (!SRE(at)(state, ptr, *pattern))
711
74.0M
                RETURN_FAILURE;
712
19.2M
            pattern++;
713
19.2M
            DISPATCH;
714
715
19.2M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
270M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
270M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
270M
            if (ptr >= end ||
749
269M
                !SRE(charset)(state, pattern + 1, *ptr))
750
85.7M
                RETURN_FAILURE;
751
184M
            pattern += pattern[0];
752
184M
            ptr++;
753
184M
            DISPATCH;
754
755
184M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
5.78M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
5.78M
                   pattern, ptr, pattern[0]));
758
5.78M
            if (ptr >= end ||
759
5.78M
                sre_lower_ascii(*ptr) != *pattern)
760
73.2k
                RETURN_FAILURE;
761
5.71M
            pattern++;
762
5.71M
            ptr++;
763
5.71M
            DISPATCH;
764
765
5.71M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
75.3M
        TARGET(SRE_OP_JUMP):
845
75.3M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
75.3M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
75.3M
                   ptr, pattern[0]));
850
75.3M
            pattern += pattern[0];
851
75.3M
            DISPATCH;
852
853
116M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
116M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
116M
            LASTMARK_SAVE();
858
116M
            if (state->repeat)
859
58.7M
                MARK_PUSH(ctx->lastmark);
860
292M
            for (; pattern[0]; pattern += pattern[0]) {
861
248M
                if (pattern[1] == SRE_OP_LITERAL &&
862
115M
                    (ptr >= end ||
863
115M
                     (SRE_CODE) *ptr != pattern[2]))
864
58.8M
                    continue;
865
189M
                if (pattern[1] == SRE_OP_IN &&
866
52.6M
                    (ptr >= end ||
867
52.5M
                     !SRE(charset)(state, pattern + 3,
868
52.5M
                                   (SRE_CODE) *ptr)))
869
28.7M
                    continue;
870
161M
                state->ptr = ptr;
871
161M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
161M
                if (ret) {
873
73.0M
                    if (state->repeat)
874
47.8M
                        MARK_POP_DISCARD(ctx->lastmark);
875
73.0M
                    RETURN_ON_ERROR(ret);
876
73.0M
                    RETURN_SUCCESS;
877
73.0M
                }
878
88.1M
                if (state->repeat)
879
15.4k
                    MARK_POP_KEEP(ctx->lastmark);
880
88.1M
                LASTMARK_RESTORE();
881
88.1M
            }
882
43.6M
            if (state->repeat)
883
10.8M
                MARK_POP_DISCARD(ctx->lastmark);
884
43.6M
            RETURN_FAILURE;
885
886
602M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
602M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
602M
                   pattern[1], pattern[2]));
898
899
602M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.30M
                RETURN_FAILURE; /* cannot match */
901
902
601M
            state->ptr = ptr;
903
904
601M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
601M
            RETURN_ON_ERROR(ret);
906
601M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
601M
            ctx->count = ret;
908
601M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
601M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
411M
                RETURN_FAILURE;
917
918
190M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.14M
                ptr == state->end &&
920
77.0k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
77.0k
            {
922
                /* tail is empty.  we're finished */
923
77.0k
                state->ptr = ptr;
924
77.0k
                RETURN_SUCCESS;
925
77.0k
            }
926
927
190M
            LASTMARK_SAVE();
928
190M
            if (state->repeat)
929
103M
                MARK_PUSH(ctx->lastmark);
930
931
190M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
25.8M
                ctx->u.chr = pattern[pattern[0]+1];
935
25.8M
                for (;;) {
936
65.9M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
50.9M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
40.0M
                        ptr--;
939
40.0M
                        ctx->count--;
940
40.0M
                    }
941
25.8M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
14.9M
                        break;
943
10.9M
                    state->ptr = ptr;
944
10.9M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
10.9M
                            pattern+pattern[0]);
946
10.9M
                    if (ret) {
947
10.9M
                        if (state->repeat)
948
9.88M
                            MARK_POP_DISCARD(ctx->lastmark);
949
10.9M
                        RETURN_ON_ERROR(ret);
950
10.9M
                        RETURN_SUCCESS;
951
10.9M
                    }
952
622
                    if (state->repeat)
953
622
                        MARK_POP_KEEP(ctx->lastmark);
954
622
                    LASTMARK_RESTORE();
955
956
622
                    ptr--;
957
622
                    ctx->count--;
958
622
                }
959
14.9M
                if (state->repeat)
960
13.5M
                    MARK_POP_DISCARD(ctx->lastmark);
961
164M
            } else {
962
                /* general case */
963
251M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
212M
                    state->ptr = ptr;
965
212M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
212M
                            pattern+pattern[0]);
967
212M
                    if (ret) {
968
125M
                        if (state->repeat)
969
78.2M
                            MARK_POP_DISCARD(ctx->lastmark);
970
125M
                        RETURN_ON_ERROR(ret);
971
125M
                        RETURN_SUCCESS;
972
125M
                    }
973
86.8M
                    if (state->repeat)
974
2.24M
                        MARK_POP_KEEP(ctx->lastmark);
975
86.8M
                    LASTMARK_RESTORE();
976
977
86.8M
                    ptr--;
978
86.8M
                    ctx->count--;
979
86.8M
                }
980
39.0M
                if (state->repeat)
981
1.36M
                    MARK_POP_DISCARD(ctx->lastmark);
982
39.0M
            }
983
53.9M
            RETURN_FAILURE;
984
985
4.80M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
4.80M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
4.80M
                   pattern[1], pattern[2]));
997
998
4.80M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
4.80M
            state->ptr = ptr;
1002
1003
4.80M
            if (pattern[1] == 0)
1004
4.80M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
4.80M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
4.80M
            } else {
1028
                /* general case */
1029
4.80M
                LASTMARK_SAVE();
1030
4.80M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
42.0M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
42.0M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
42.0M
                    state->ptr = ptr;
1036
42.0M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
42.0M
                            pattern+pattern[0]);
1038
42.0M
                    if (ret) {
1039
4.80M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
4.80M
                        RETURN_ON_ERROR(ret);
1042
4.80M
                        RETURN_SUCCESS;
1043
4.80M
                    }
1044
37.2M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
37.2M
                    LASTMARK_RESTORE();
1047
1048
37.2M
                    state->ptr = ptr;
1049
37.2M
                    ret = SRE(count)(state, pattern+3, 1);
1050
37.2M
                    RETURN_ON_ERROR(ret);
1051
37.2M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
37.2M
                    if (ret == 0)
1053
0
                        break;
1054
37.2M
                    assert(ret == 1);
1055
37.2M
                    ptr++;
1056
37.2M
                    ctx->count++;
1057
37.2M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
113M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
113M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
113M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
113M
            ctx->u.rep = repeat_pool_malloc(state);
1127
113M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
113M
            ctx->u.rep->count = -1;
1131
113M
            ctx->u.rep->pattern = pattern;
1132
113M
            ctx->u.rep->prev = state->repeat;
1133
113M
            ctx->u.rep->last_ptr = NULL;
1134
113M
            state->repeat = ctx->u.rep;
1135
1136
113M
            state->ptr = ptr;
1137
113M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
113M
            state->repeat = ctx->u.rep->prev;
1139
113M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
113M
            if (ret) {
1142
43.7M
                RETURN_ON_ERROR(ret);
1143
43.7M
                RETURN_SUCCESS;
1144
43.7M
            }
1145
69.7M
            RETURN_FAILURE;
1146
1147
206M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
206M
            ctx->u.rep = state->repeat;
1155
206M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
206M
            state->ptr = ptr;
1159
1160
206M
            ctx->count = ctx->u.rep->count+1;
1161
1162
206M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
206M
                   ptr, ctx->count));
1164
1165
206M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
206M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
15.5M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
190M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
190M
                ctx->u.rep->count = ctx->count;
1185
190M
                LASTMARK_SAVE();
1186
190M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
190M
                LAST_PTR_PUSH();
1189
190M
                ctx->u.rep->last_ptr = state->ptr;
1190
190M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
190M
                        ctx->u.rep->pattern+3);
1192
190M
                LAST_PTR_POP();
1193
190M
                if (ret) {
1194
92.0M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
92.0M
                    RETURN_ON_ERROR(ret);
1196
92.0M
                    RETURN_SUCCESS;
1197
92.0M
                }
1198
98.8M
                MARK_POP(ctx->lastmark);
1199
98.8M
                LASTMARK_RESTORE();
1200
98.8M
                ctx->u.rep->count = ctx->count-1;
1201
98.8M
                state->ptr = ptr;
1202
98.8M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
114M
            state->repeat = ctx->u.rep->prev;
1207
114M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
114M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
114M
            RETURN_ON_SUCCESS(ret);
1211
70.6M
            state->ptr = ptr;
1212
70.6M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
26.7M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
26.7M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
26.7M
                   ptr, pattern[1]));
1565
26.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
26.7M
            state->ptr = ptr - pattern[1];
1568
26.7M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
26.7M
            RETURN_ON_FAILURE(ret);
1570
22.4M
            pattern += pattern[0];
1571
22.4M
            DISPATCH;
1572
1573
22.7M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
22.7M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
22.7M
                   ptr, pattern[1]));
1578
22.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
22.7M
                state->ptr = ptr - pattern[1];
1580
22.7M
                LASTMARK_SAVE();
1581
22.7M
                if (state->repeat)
1582
22.7M
                    MARK_PUSH(ctx->lastmark);
1583
1584
45.5M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
45.5M
                if (ret) {
1586
10.6k
                    if (state->repeat)
1587
10.6k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
10.6k
                    RETURN_ON_ERROR(ret);
1589
10.6k
                    RETURN_FAILURE;
1590
10.6k
                }
1591
22.7M
                if (state->repeat)
1592
22.7M
                    MARK_POP(ctx->lastmark);
1593
22.7M
                LASTMARK_RESTORE();
1594
22.7M
            }
1595
22.7M
            pattern += pattern[0];
1596
22.7M
            DISPATCH;
1597
1598
22.7M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.43G
exit:
1620
1.43G
    ctx_pos = ctx->last_ctx_pos;
1621
1.43G
    jump = ctx->jump;
1622
1.43G
    DATA_POP_DISCARD(ctx);
1623
1.43G
    if (ctx_pos == -1) {
1624
539M
        state->sigcount = sigcount;
1625
539M
        return ret;
1626
539M
    }
1627
894M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
894M
    switch (jump) {
1630
190M
        case JUMP_MAX_UNTIL_2:
1631
190M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
190M
            goto jump_max_until_2;
1633
114M
        case JUMP_MAX_UNTIL_3:
1634
114M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
114M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
161M
        case JUMP_BRANCH:
1643
161M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
161M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
113M
        case JUMP_REPEAT:
1658
113M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
113M
            goto jump_repeat;
1660
10.9M
        case JUMP_REPEAT_ONE_1:
1661
10.9M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
10.9M
            goto jump_repeat_one_1;
1663
212M
        case JUMP_REPEAT_ONE_2:
1664
212M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
212M
            goto jump_repeat_one_2;
1666
42.0M
        case JUMP_MIN_REPEAT_ONE:
1667
42.0M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
42.0M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
26.7M
        case JUMP_ASSERT:
1673
26.7M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
26.7M
            goto jump_assert;
1675
22.7M
        case JUMP_ASSERT_NOT:
1676
22.7M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
22.7M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
894M
    }
1683
1684
0
    return ret; /* should never get here */
1685
894M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
219M
{
601
219M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
219M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
219M
    Py_ssize_t ret = 0;
604
219M
    int jump;
605
219M
    unsigned int sigcount = state->sigcount;
606
607
219M
    SRE(match_context)* ctx;
608
219M
    SRE(match_context)* nextctx;
609
219M
    INIT_TRACE(state);
610
611
219M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
219M
    DATA_ALLOC(SRE(match_context), ctx);
614
219M
    ctx->last_ctx_pos = -1;
615
219M
    ctx->jump = JUMP_NONE;
616
219M
    ctx->toplevel = toplevel;
617
219M
    ctx_pos = alloc_pos;
618
619
219M
#if USE_COMPUTED_GOTOS
620
219M
#include "sre_targets.h"
621
219M
#endif
622
623
526M
entrance:
624
625
526M
    ;  // Fashion statement.
626
526M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
526M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
40.0M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.85M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.85M
                   end - ptr, (size_t) pattern[3]));
634
4.85M
            RETURN_FAILURE;
635
4.85M
        }
636
35.1M
        pattern += pattern[1] + 1;
637
35.1M
    }
638
639
522M
#if USE_COMPUTED_GOTOS
640
522M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
522M
    {
647
648
522M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
239M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
239M
                   ptr, pattern[0]));
653
239M
            {
654
239M
                int i = pattern[0];
655
239M
                if (i & 1)
656
44.2M
                    state->lastindex = i/2 + 1;
657
239M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
235M
                    int j = state->lastmark + 1;
663
247M
                    while (j < i)
664
11.9M
                        state->mark[j++] = NULL;
665
235M
                    state->lastmark = i;
666
235M
                }
667
239M
                state->mark[i] = ptr;
668
239M
            }
669
239M
            pattern++;
670
239M
            DISPATCH;
671
672
239M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
86.1M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
86.1M
                   ptr, *pattern));
677
86.1M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
36.5M
                RETURN_FAILURE;
679
49.5M
            pattern++;
680
49.5M
            ptr++;
681
49.5M
            DISPATCH;
682
683
49.5M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
64.6M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
64.6M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
64.6M
            if (ctx->toplevel &&
698
27.4M
                ((state->match_all && ptr != state->end) ||
699
27.4M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
64.6M
            state->ptr = ptr;
704
64.6M
            RETURN_SUCCESS;
705
706
34.3M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
34.3M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
34.3M
            if (!SRE(at)(state, ptr, *pattern))
711
17.0M
                RETURN_FAILURE;
712
17.3M
            pattern++;
713
17.3M
            DISPATCH;
714
715
17.3M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
74.5M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
74.5M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
74.5M
            if (ptr >= end ||
749
74.2M
                !SRE(charset)(state, pattern + 1, *ptr))
750
17.8M
                RETURN_FAILURE;
751
56.7M
            pattern += pattern[0];
752
56.7M
            ptr++;
753
56.7M
            DISPATCH;
754
755
56.7M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
613k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
613k
                   pattern, ptr, pattern[0]));
758
613k
            if (ptr >= end ||
759
613k
                sre_lower_ascii(*ptr) != *pattern)
760
23.8k
                RETURN_FAILURE;
761
590k
            pattern++;
762
590k
            ptr++;
763
590k
            DISPATCH;
764
765
590k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
32.9M
        TARGET(SRE_OP_JUMP):
845
32.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
32.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
32.9M
                   ptr, pattern[0]));
850
32.9M
            pattern += pattern[0];
851
32.9M
            DISPATCH;
852
853
61.2M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
61.2M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
61.2M
            LASTMARK_SAVE();
858
61.2M
            if (state->repeat)
859
12.8M
                MARK_PUSH(ctx->lastmark);
860
173M
            for (; pattern[0]; pattern += pattern[0]) {
861
143M
                if (pattern[1] == SRE_OP_LITERAL &&
862
66.0M
                    (ptr >= end ||
863
65.9M
                     (SRE_CODE) *ptr != pattern[2]))
864
24.9M
                    continue;
865
118M
                if (pattern[1] == SRE_OP_IN &&
866
12.7M
                    (ptr >= end ||
867
12.6M
                     !SRE(charset)(state, pattern + 3,
868
12.6M
                                   (SRE_CODE) *ptr)))
869
6.54M
                    continue;
870
112M
                state->ptr = ptr;
871
112M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
112M
                if (ret) {
873
31.6M
                    if (state->repeat)
874
12.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
31.6M
                    RETURN_ON_ERROR(ret);
876
31.6M
                    RETURN_SUCCESS;
877
31.6M
                }
878
80.8M
                if (state->repeat)
879
5.65k
                    MARK_POP_KEEP(ctx->lastmark);
880
80.8M
                LASTMARK_RESTORE();
881
80.8M
            }
882
29.6M
            if (state->repeat)
883
710k
                MARK_POP_DISCARD(ctx->lastmark);
884
29.6M
            RETURN_FAILURE;
885
886
236M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
236M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
236M
                   pattern[1], pattern[2]));
898
899
236M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.08M
                RETURN_FAILURE; /* cannot match */
901
902
235M
            state->ptr = ptr;
903
904
235M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
235M
            RETURN_ON_ERROR(ret);
906
235M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
235M
            ctx->count = ret;
908
235M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
235M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
173M
                RETURN_FAILURE;
917
918
61.2M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
651k
                ptr == state->end &&
920
55.7k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
55.7k
            {
922
                /* tail is empty.  we're finished */
923
55.7k
                state->ptr = ptr;
924
55.7k
                RETURN_SUCCESS;
925
55.7k
            }
926
927
61.2M
            LASTMARK_SAVE();
928
61.2M
            if (state->repeat)
929
34.4M
                MARK_PUSH(ctx->lastmark);
930
931
61.2M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.89M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.89M
                for (;;) {
936
16.0M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
13.6M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
10.1M
                        ptr--;
939
10.1M
                        ctx->count--;
940
10.1M
                    }
941
5.89M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.38M
                        break;
943
3.51M
                    state->ptr = ptr;
944
3.51M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.51M
                            pattern+pattern[0]);
946
3.51M
                    if (ret) {
947
3.51M
                        if (state->repeat)
948
2.47M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.51M
                        RETURN_ON_ERROR(ret);
950
3.51M
                        RETURN_SUCCESS;
951
3.51M
                    }
952
143
                    if (state->repeat)
953
143
                        MARK_POP_KEEP(ctx->lastmark);
954
143
                    LASTMARK_RESTORE();
955
956
143
                    ptr--;
957
143
                    ctx->count--;
958
143
                }
959
2.38M
                if (state->repeat)
960
1.03M
                    MARK_POP_DISCARD(ctx->lastmark);
961
55.3M
            } else {
962
                /* general case */
963
77.0M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
66.7M
                    state->ptr = ptr;
965
66.7M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
66.7M
                            pattern+pattern[0]);
967
66.7M
                    if (ret) {
968
45.0M
                        if (state->repeat)
969
30.0M
                            MARK_POP_DISCARD(ctx->lastmark);
970
45.0M
                        RETURN_ON_ERROR(ret);
971
45.0M
                        RETURN_SUCCESS;
972
45.0M
                    }
973
21.6M
                    if (state->repeat)
974
1.31M
                        MARK_POP_KEEP(ctx->lastmark);
975
21.6M
                    LASTMARK_RESTORE();
976
977
21.6M
                    ptr--;
978
21.6M
                    ctx->count--;
979
21.6M
                }
980
10.2M
                if (state->repeat)
981
803k
                    MARK_POP_DISCARD(ctx->lastmark);
982
10.2M
            }
983
12.6M
            RETURN_FAILURE;
984
985
3.93M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
3.93M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
3.93M
                   pattern[1], pattern[2]));
997
998
3.93M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
3.93M
            state->ptr = ptr;
1002
1003
3.93M
            if (pattern[1] == 0)
1004
3.93M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
3.93M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
3.93M
            } else {
1028
                /* general case */
1029
3.93M
                LASTMARK_SAVE();
1030
3.93M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
13.0M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
13.0M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
13.0M
                    state->ptr = ptr;
1036
13.0M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
13.0M
                            pattern+pattern[0]);
1038
13.0M
                    if (ret) {
1039
3.93M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
3.93M
                        RETURN_ON_ERROR(ret);
1042
3.93M
                        RETURN_SUCCESS;
1043
3.93M
                    }
1044
9.16M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
9.16M
                    LASTMARK_RESTORE();
1047
1048
9.16M
                    state->ptr = ptr;
1049
9.16M
                    ret = SRE(count)(state, pattern+3, 1);
1050
9.16M
                    RETURN_ON_ERROR(ret);
1051
9.16M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
9.16M
                    if (ret == 0)
1053
0
                        break;
1054
9.16M
                    assert(ret == 1);
1055
9.16M
                    ptr++;
1056
9.16M
                    ctx->count++;
1057
9.16M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
26.3M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
26.3M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
26.3M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
26.3M
            ctx->u.rep = repeat_pool_malloc(state);
1127
26.3M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
26.3M
            ctx->u.rep->count = -1;
1131
26.3M
            ctx->u.rep->pattern = pattern;
1132
26.3M
            ctx->u.rep->prev = state->repeat;
1133
26.3M
            ctx->u.rep->last_ptr = NULL;
1134
26.3M
            state->repeat = ctx->u.rep;
1135
1136
26.3M
            state->ptr = ptr;
1137
26.3M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
26.3M
            state->repeat = ctx->u.rep->prev;
1139
26.3M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
26.3M
            if (ret) {
1142
11.2M
                RETURN_ON_ERROR(ret);
1143
11.2M
                RETURN_SUCCESS;
1144
11.2M
            }
1145
15.1M
            RETURN_FAILURE;
1146
1147
57.7M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
57.7M
            ctx->u.rep = state->repeat;
1155
57.7M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
57.7M
            state->ptr = ptr;
1159
1160
57.7M
            ctx->count = ctx->u.rep->count+1;
1161
1162
57.7M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
57.7M
                   ptr, ctx->count));
1164
1165
57.7M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
57.7M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
8.54M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
49.2M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
49.2M
                ctx->u.rep->count = ctx->count;
1185
49.2M
                LASTMARK_SAVE();
1186
49.2M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
49.2M
                LAST_PTR_PUSH();
1189
49.2M
                ctx->u.rep->last_ptr = state->ptr;
1190
49.2M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
49.2M
                        ctx->u.rep->pattern+3);
1192
49.2M
                LAST_PTR_POP();
1193
49.2M
                if (ret) {
1194
30.8M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
30.8M
                    RETURN_ON_ERROR(ret);
1196
30.8M
                    RETURN_SUCCESS;
1197
30.8M
                }
1198
18.3M
                MARK_POP(ctx->lastmark);
1199
18.3M
                LASTMARK_RESTORE();
1200
18.3M
                ctx->u.rep->count = ctx->count-1;
1201
18.3M
                state->ptr = ptr;
1202
18.3M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
26.9M
            state->repeat = ctx->u.rep->prev;
1207
26.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
26.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
26.9M
            RETURN_ON_SUCCESS(ret);
1211
15.7M
            state->ptr = ptr;
1212
15.7M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.16M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.16M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.16M
                   ptr, pattern[1]));
1565
3.16M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.16M
            state->ptr = ptr - pattern[1];
1568
3.16M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.16M
            RETURN_ON_FAILURE(ret);
1570
2.94M
            pattern += pattern[0];
1571
2.94M
            DISPATCH;
1572
1573
5.60M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.60M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.60M
                   ptr, pattern[1]));
1578
5.60M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.60M
                state->ptr = ptr - pattern[1];
1580
5.60M
                LASTMARK_SAVE();
1581
5.60M
                if (state->repeat)
1582
5.60M
                    MARK_PUSH(ctx->lastmark);
1583
1584
11.2M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
11.2M
                if (ret) {
1586
1.46k
                    if (state->repeat)
1587
1.46k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.46k
                    RETURN_ON_ERROR(ret);
1589
1.46k
                    RETURN_FAILURE;
1590
1.46k
                }
1591
5.60M
                if (state->repeat)
1592
5.60M
                    MARK_POP(ctx->lastmark);
1593
5.60M
                LASTMARK_RESTORE();
1594
5.60M
            }
1595
5.60M
            pattern += pattern[0];
1596
5.60M
            DISPATCH;
1597
1598
5.60M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
526M
exit:
1620
526M
    ctx_pos = ctx->last_ctx_pos;
1621
526M
    jump = ctx->jump;
1622
526M
    DATA_POP_DISCARD(ctx);
1623
526M
    if (ctx_pos == -1) {
1624
219M
        state->sigcount = sigcount;
1625
219M
        return ret;
1626
219M
    }
1627
307M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
307M
    switch (jump) {
1630
49.2M
        case JUMP_MAX_UNTIL_2:
1631
49.2M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
49.2M
            goto jump_max_until_2;
1633
26.9M
        case JUMP_MAX_UNTIL_3:
1634
26.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
26.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
112M
        case JUMP_BRANCH:
1643
112M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
112M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
26.3M
        case JUMP_REPEAT:
1658
26.3M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
26.3M
            goto jump_repeat;
1660
3.51M
        case JUMP_REPEAT_ONE_1:
1661
3.51M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.51M
            goto jump_repeat_one_1;
1663
66.7M
        case JUMP_REPEAT_ONE_2:
1664
66.7M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
66.7M
            goto jump_repeat_one_2;
1666
13.0M
        case JUMP_MIN_REPEAT_ONE:
1667
13.0M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
13.0M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.16M
        case JUMP_ASSERT:
1673
3.16M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.16M
            goto jump_assert;
1675
5.60M
        case JUMP_ASSERT_NOT:
1676
5.60M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.60M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
307M
    }
1683
1684
0
    return ret; /* should never get here */
1685
307M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
238M
{
601
238M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
238M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
238M
    Py_ssize_t ret = 0;
604
238M
    int jump;
605
238M
    unsigned int sigcount = state->sigcount;
606
607
238M
    SRE(match_context)* ctx;
608
238M
    SRE(match_context)* nextctx;
609
238M
    INIT_TRACE(state);
610
611
238M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
238M
    DATA_ALLOC(SRE(match_context), ctx);
614
238M
    ctx->last_ctx_pos = -1;
615
238M
    ctx->jump = JUMP_NONE;
616
238M
    ctx->toplevel = toplevel;
617
238M
    ctx_pos = alloc_pos;
618
619
238M
#if USE_COMPUTED_GOTOS
620
238M
#include "sre_targets.h"
621
238M
#endif
622
623
531M
entrance:
624
625
531M
    ;  // Fashion statement.
626
531M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
531M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
12.3M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
155k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
155k
                   end - ptr, (size_t) pattern[3]));
634
155k
            RETURN_FAILURE;
635
155k
        }
636
12.1M
        pattern += pattern[1] + 1;
637
12.1M
    }
638
639
531M
#if USE_COMPUTED_GOTOS
640
531M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
531M
    {
647
648
531M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
261M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
261M
                   ptr, pattern[0]));
653
261M
            {
654
261M
                int i = pattern[0];
655
261M
                if (i & 1)
656
36.7M
                    state->lastindex = i/2 + 1;
657
261M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
259M
                    int j = state->lastmark + 1;
663
262M
                    while (j < i)
664
2.79M
                        state->mark[j++] = NULL;
665
259M
                    state->lastmark = i;
666
259M
                }
667
261M
                state->mark[i] = ptr;
668
261M
            }
669
261M
            pattern++;
670
261M
            DISPATCH;
671
672
261M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
27.9M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
27.9M
                   ptr, *pattern));
677
27.9M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
13.9M
                RETURN_FAILURE;
679
13.9M
            pattern++;
680
13.9M
            ptr++;
681
13.9M
            DISPATCH;
682
683
13.9M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
62.6M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
62.6M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
62.6M
            if (ctx->toplevel &&
698
8.40M
                ((state->match_all && ptr != state->end) ||
699
8.40M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
62.6M
            state->ptr = ptr;
704
62.6M
            RETURN_SUCCESS;
705
706
38.4M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
38.4M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
38.4M
            if (!SRE(at)(state, ptr, *pattern))
711
36.5M
                RETURN_FAILURE;
712
1.88M
            pattern++;
713
1.88M
            DISPATCH;
714
715
1.88M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
114M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
114M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
114M
            if (ptr >= end ||
749
114M
                !SRE(charset)(state, pattern + 1, *ptr))
750
42.5M
                RETURN_FAILURE;
751
72.2M
            pattern += pattern[0];
752
72.2M
            ptr++;
753
72.2M
            DISPATCH;
754
755
72.2M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
3.16M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
3.16M
                   pattern, ptr, pattern[0]));
758
3.16M
            if (ptr >= end ||
759
3.16M
                sre_lower_ascii(*ptr) != *pattern)
760
19.0k
                RETURN_FAILURE;
761
3.15M
            pattern++;
762
3.15M
            ptr++;
763
3.15M
            DISPATCH;
764
765
3.15M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
17.2M
        TARGET(SRE_OP_JUMP):
845
17.2M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
17.2M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
17.2M
                   ptr, pattern[0]));
850
17.2M
            pattern += pattern[0];
851
17.2M
            DISPATCH;
852
853
22.7M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
22.7M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
22.7M
            LASTMARK_SAVE();
858
22.7M
            if (state->repeat)
859
16.8M
                MARK_PUSH(ctx->lastmark);
860
49.7M
            for (; pattern[0]; pattern += pattern[0]) {
861
43.8M
                if (pattern[1] == SRE_OP_LITERAL &&
862
19.7M
                    (ptr >= end ||
863
19.7M
                     (SRE_CODE) *ptr != pattern[2]))
864
12.6M
                    continue;
865
31.2M
                if (pattern[1] == SRE_OP_IN &&
866
14.7M
                    (ptr >= end ||
867
14.7M
                     !SRE(charset)(state, pattern + 3,
868
14.7M
                                   (SRE_CODE) *ptr)))
869
8.12M
                    continue;
870
23.1M
                state->ptr = ptr;
871
23.1M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
23.1M
                if (ret) {
873
16.8M
                    if (state->repeat)
874
14.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
16.8M
                    RETURN_ON_ERROR(ret);
876
16.8M
                    RETURN_SUCCESS;
877
16.8M
                }
878
6.27M
                if (state->repeat)
879
3.33k
                    MARK_POP_KEEP(ctx->lastmark);
880
6.27M
                LASTMARK_RESTORE();
881
6.27M
            }
882
5.87M
            if (state->repeat)
883
2.75M
                MARK_POP_DISCARD(ctx->lastmark);
884
5.87M
            RETURN_FAILURE;
885
886
235M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
235M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
235M
                   pattern[1], pattern[2]));
898
899
235M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
201k
                RETURN_FAILURE; /* cannot match */
901
902
235M
            state->ptr = ptr;
903
904
235M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
235M
            RETURN_ON_ERROR(ret);
906
235M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
235M
            ctx->count = ret;
908
235M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
235M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
176M
                RETURN_FAILURE;
917
918
58.8M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
3.28M
                ptr == state->end &&
920
17.8k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
17.8k
            {
922
                /* tail is empty.  we're finished */
923
17.8k
                state->ptr = ptr;
924
17.8k
                RETURN_SUCCESS;
925
17.8k
            }
926
927
58.8M
            LASTMARK_SAVE();
928
58.8M
            if (state->repeat)
929
22.2M
                MARK_PUSH(ctx->lastmark);
930
931
58.8M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
4.71M
                ctx->u.chr = pattern[pattern[0]+1];
935
4.71M
                for (;;) {
936
8.44M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
6.60M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
3.72M
                        ptr--;
939
3.72M
                        ctx->count--;
940
3.72M
                    }
941
4.71M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.83M
                        break;
943
2.87M
                    state->ptr = ptr;
944
2.87M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.87M
                            pattern+pattern[0]);
946
2.87M
                    if (ret) {
947
2.87M
                        if (state->repeat)
948
2.84M
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.87M
                        RETURN_ON_ERROR(ret);
950
2.87M
                        RETURN_SUCCESS;
951
2.87M
                    }
952
230
                    if (state->repeat)
953
230
                        MARK_POP_KEEP(ctx->lastmark);
954
230
                    LASTMARK_RESTORE();
955
956
230
                    ptr--;
957
230
                    ctx->count--;
958
230
                }
959
1.83M
                if (state->repeat)
960
1.82M
                    MARK_POP_DISCARD(ctx->lastmark);
961
54.1M
            } else {
962
                /* general case */
963
95.9M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
74.5M
                    state->ptr = ptr;
965
74.5M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
74.5M
                            pattern+pattern[0]);
967
74.5M
                    if (ret) {
968
32.7M
                        if (state->repeat)
969
17.1M
                            MARK_POP_DISCARD(ctx->lastmark);
970
32.7M
                        RETURN_ON_ERROR(ret);
971
32.7M
                        RETURN_SUCCESS;
972
32.7M
                    }
973
41.7M
                    if (state->repeat)
974
714k
                        MARK_POP_KEEP(ctx->lastmark);
975
41.7M
                    LASTMARK_RESTORE();
976
977
41.7M
                    ptr--;
978
41.7M
                    ctx->count--;
979
41.7M
                }
980
21.3M
                if (state->repeat)
981
423k
                    MARK_POP_DISCARD(ctx->lastmark);
982
21.3M
            }
983
23.1M
            RETURN_FAILURE;
984
985
858k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
858k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
858k
                   pattern[1], pattern[2]));
997
998
858k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
858k
            state->ptr = ptr;
1002
1003
858k
            if (pattern[1] == 0)
1004
858k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
858k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
858k
            } else {
1028
                /* general case */
1029
858k
                LASTMARK_SAVE();
1030
858k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
21.7M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
21.7M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
21.7M
                    state->ptr = ptr;
1036
21.7M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
21.7M
                            pattern+pattern[0]);
1038
21.7M
                    if (ret) {
1039
858k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
858k
                        RETURN_ON_ERROR(ret);
1042
858k
                        RETURN_SUCCESS;
1043
858k
                    }
1044
20.8M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
20.8M
                    LASTMARK_RESTORE();
1047
1048
20.8M
                    state->ptr = ptr;
1049
20.8M
                    ret = SRE(count)(state, pattern+3, 1);
1050
20.8M
                    RETURN_ON_ERROR(ret);
1051
20.8M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
20.8M
                    if (ret == 0)
1053
0
                        break;
1054
20.8M
                    assert(ret == 1);
1055
20.8M
                    ptr++;
1056
20.8M
                    ctx->count++;
1057
20.8M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
44.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
44.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
44.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
44.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
44.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
44.8M
            ctx->u.rep->count = -1;
1131
44.8M
            ctx->u.rep->pattern = pattern;
1132
44.8M
            ctx->u.rep->prev = state->repeat;
1133
44.8M
            ctx->u.rep->last_ptr = NULL;
1134
44.8M
            state->repeat = ctx->u.rep;
1135
1136
44.8M
            state->ptr = ptr;
1137
44.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
44.8M
            state->repeat = ctx->u.rep->prev;
1139
44.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
44.8M
            if (ret) {
1142
9.43M
                RETURN_ON_ERROR(ret);
1143
9.43M
                RETURN_SUCCESS;
1144
9.43M
            }
1145
35.3M
            RETURN_FAILURE;
1146
1147
69.1M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
69.1M
            ctx->u.rep = state->repeat;
1155
69.1M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
69.1M
            state->ptr = ptr;
1159
1160
69.1M
            ctx->count = ctx->u.rep->count+1;
1161
1162
69.1M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
69.1M
                   ptr, ctx->count));
1164
1165
69.1M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
69.1M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
2.89M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
66.2M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
66.2M
                ctx->u.rep->count = ctx->count;
1185
66.2M
                LASTMARK_SAVE();
1186
66.2M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
66.2M
                LAST_PTR_PUSH();
1189
66.2M
                ctx->u.rep->last_ptr = state->ptr;
1190
66.2M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
66.2M
                        ctx->u.rep->pattern+3);
1192
66.2M
                LAST_PTR_POP();
1193
66.2M
                if (ret) {
1194
24.0M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
24.0M
                    RETURN_ON_ERROR(ret);
1196
24.0M
                    RETURN_SUCCESS;
1197
24.0M
                }
1198
42.2M
                MARK_POP(ctx->lastmark);
1199
42.2M
                LASTMARK_RESTORE();
1200
42.2M
                ctx->u.rep->count = ctx->count-1;
1201
42.2M
                state->ptr = ptr;
1202
42.2M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
45.1M
            state->repeat = ctx->u.rep->prev;
1207
45.1M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
45.1M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
45.1M
            RETURN_ON_SUCCESS(ret);
1211
35.6M
            state->ptr = ptr;
1212
35.6M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
7.03M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
7.03M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
7.03M
                   ptr, pattern[1]));
1565
7.03M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
7.03M
            state->ptr = ptr - pattern[1];
1568
7.03M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
7.03M
            RETURN_ON_FAILURE(ret);
1570
4.62M
            pattern += pattern[0];
1571
4.62M
            DISPATCH;
1572
1573
7.16M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
7.16M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
7.16M
                   ptr, pattern[1]));
1578
7.16M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
7.16M
                state->ptr = ptr - pattern[1];
1580
7.16M
                LASTMARK_SAVE();
1581
7.16M
                if (state->repeat)
1582
7.16M
                    MARK_PUSH(ctx->lastmark);
1583
1584
14.3M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
14.3M
                if (ret) {
1586
3.07k
                    if (state->repeat)
1587
3.07k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
3.07k
                    RETURN_ON_ERROR(ret);
1589
3.07k
                    RETURN_FAILURE;
1590
3.07k
                }
1591
7.16M
                if (state->repeat)
1592
7.16M
                    MARK_POP(ctx->lastmark);
1593
7.16M
                LASTMARK_RESTORE();
1594
7.16M
            }
1595
7.16M
            pattern += pattern[0];
1596
7.16M
            DISPATCH;
1597
1598
7.16M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
531M
exit:
1620
531M
    ctx_pos = ctx->last_ctx_pos;
1621
531M
    jump = ctx->jump;
1622
531M
    DATA_POP_DISCARD(ctx);
1623
531M
    if (ctx_pos == -1) {
1624
238M
        state->sigcount = sigcount;
1625
238M
        return ret;
1626
238M
    }
1627
292M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
292M
    switch (jump) {
1630
66.2M
        case JUMP_MAX_UNTIL_2:
1631
66.2M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
66.2M
            goto jump_max_until_2;
1633
45.1M
        case JUMP_MAX_UNTIL_3:
1634
45.1M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
45.1M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
23.1M
        case JUMP_BRANCH:
1643
23.1M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
23.1M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
44.8M
        case JUMP_REPEAT:
1658
44.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
44.8M
            goto jump_repeat;
1660
2.87M
        case JUMP_REPEAT_ONE_1:
1661
2.87M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.87M
            goto jump_repeat_one_1;
1663
74.5M
        case JUMP_REPEAT_ONE_2:
1664
74.5M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
74.5M
            goto jump_repeat_one_2;
1666
21.7M
        case JUMP_MIN_REPEAT_ONE:
1667
21.7M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
21.7M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
7.03M
        case JUMP_ASSERT:
1673
7.03M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
7.03M
            goto jump_assert;
1675
7.16M
        case JUMP_ASSERT_NOT:
1676
7.16M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
7.16M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
292M
    }
1683
1684
0
    return ret; /* should never get here */
1685
292M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
81.0M
{
601
81.0M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
81.0M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
81.0M
    Py_ssize_t ret = 0;
604
81.0M
    int jump;
605
81.0M
    unsigned int sigcount = state->sigcount;
606
607
81.0M
    SRE(match_context)* ctx;
608
81.0M
    SRE(match_context)* nextctx;
609
81.0M
    INIT_TRACE(state);
610
611
81.0M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
81.0M
    DATA_ALLOC(SRE(match_context), ctx);
614
81.0M
    ctx->last_ctx_pos = -1;
615
81.0M
    ctx->jump = JUMP_NONE;
616
81.0M
    ctx->toplevel = toplevel;
617
81.0M
    ctx_pos = alloc_pos;
618
619
81.0M
#if USE_COMPUTED_GOTOS
620
81.0M
#include "sre_targets.h"
621
81.0M
#endif
622
623
375M
entrance:
624
625
375M
    ;  // Fashion statement.
626
375M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
375M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
10.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.41k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.41k
                   end - ptr, (size_t) pattern[3]));
634
4.41k
            RETURN_FAILURE;
635
4.41k
        }
636
10.9M
        pattern += pattern[1] + 1;
637
10.9M
    }
638
639
375M
#if USE_COMPUTED_GOTOS
640
375M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
375M
    {
647
648
375M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
125M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
125M
                   ptr, pattern[0]));
653
125M
            {
654
125M
                int i = pattern[0];
655
125M
                if (i & 1)
656
25.8M
                    state->lastindex = i/2 + 1;
657
125M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
123M
                    int j = state->lastmark + 1;
663
125M
                    while (j < i)
664
1.72M
                        state->mark[j++] = NULL;
665
123M
                    state->lastmark = i;
666
123M
                }
667
125M
                state->mark[i] = ptr;
668
125M
            }
669
125M
            pattern++;
670
125M
            DISPATCH;
671
672
125M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
25.7M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
25.7M
                   ptr, *pattern));
677
25.7M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
10.7M
                RETURN_FAILURE;
679
14.9M
            pattern++;
680
14.9M
            ptr++;
681
14.9M
            DISPATCH;
682
683
14.9M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
32.6M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
32.6M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
32.6M
            if (ctx->toplevel &&
698
9.39M
                ((state->match_all && ptr != state->end) ||
699
9.39M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
32.6M
            state->ptr = ptr;
704
32.6M
            RETURN_SUCCESS;
705
706
20.4M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
20.4M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
20.4M
            if (!SRE(at)(state, ptr, *pattern))
711
20.4M
                RETURN_FAILURE;
712
31.1k
            pattern++;
713
31.1k
            DISPATCH;
714
715
31.1k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
81.1M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
81.1M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
81.1M
            if (ptr >= end ||
749
81.1M
                !SRE(charset)(state, pattern + 1, *ptr))
750
25.2M
                RETURN_FAILURE;
751
55.8M
            pattern += pattern[0];
752
55.8M
            ptr++;
753
55.8M
            DISPATCH;
754
755
55.8M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.00M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.00M
                   pattern, ptr, pattern[0]));
758
2.00M
            if (ptr >= end ||
759
2.00M
                sre_lower_ascii(*ptr) != *pattern)
760
30.3k
                RETURN_FAILURE;
761
1.97M
            pattern++;
762
1.97M
            ptr++;
763
1.97M
            DISPATCH;
764
765
1.97M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
25.1M
        TARGET(SRE_OP_JUMP):
845
25.1M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
25.1M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
25.1M
                   ptr, pattern[0]));
850
25.1M
            pattern += pattern[0];
851
25.1M
            DISPATCH;
852
853
32.7M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
32.7M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
32.7M
            LASTMARK_SAVE();
858
32.7M
            if (state->repeat)
859
28.9M
                MARK_PUSH(ctx->lastmark);
860
69.1M
            for (; pattern[0]; pattern += pattern[0]) {
861
60.9M
                if (pattern[1] == SRE_OP_LITERAL &&
862
29.6M
                    (ptr >= end ||
863
29.6M
                     (SRE_CODE) *ptr != pattern[2]))
864
21.2M
                    continue;
865
39.7M
                if (pattern[1] == SRE_OP_IN &&
866
25.1M
                    (ptr >= end ||
867
25.1M
                     !SRE(charset)(state, pattern + 3,
868
25.1M
                                   (SRE_CODE) *ptr)))
869
14.0M
                    continue;
870
25.6M
                state->ptr = ptr;
871
25.6M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
25.6M
                if (ret) {
873
24.5M
                    if (state->repeat)
874
21.5M
                        MARK_POP_DISCARD(ctx->lastmark);
875
24.5M
                    RETURN_ON_ERROR(ret);
876
24.5M
                    RETURN_SUCCESS;
877
24.5M
                }
878
1.04M
                if (state->repeat)
879
6.41k
                    MARK_POP_KEEP(ctx->lastmark);
880
1.04M
                LASTMARK_RESTORE();
881
1.04M
            }
882
8.13M
            if (state->repeat)
883
7.39M
                MARK_POP_DISCARD(ctx->lastmark);
884
8.13M
            RETURN_FAILURE;
885
886
130M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
130M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
130M
                   pattern[1], pattern[2]));
898
899
130M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
19.6k
                RETURN_FAILURE; /* cannot match */
901
902
130M
            state->ptr = ptr;
903
904
130M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
130M
            RETURN_ON_ERROR(ret);
906
130M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
130M
            ctx->count = ret;
908
130M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
130M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
60.5M
                RETURN_FAILURE;
917
918
70.0M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.21M
                ptr == state->end &&
920
3.42k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.42k
            {
922
                /* tail is empty.  we're finished */
923
3.42k
                state->ptr = ptr;
924
3.42k
                RETURN_SUCCESS;
925
3.42k
            }
926
927
70.0M
            LASTMARK_SAVE();
928
70.0M
            if (state->repeat)
929
46.4M
                MARK_PUSH(ctx->lastmark);
930
931
70.0M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
15.2M
                ctx->u.chr = pattern[pattern[0]+1];
935
15.2M
                for (;;) {
936
41.4M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
30.7M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
26.1M
                        ptr--;
939
26.1M
                        ctx->count--;
940
26.1M
                    }
941
15.2M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
10.7M
                        break;
943
4.56M
                    state->ptr = ptr;
944
4.56M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.56M
                            pattern+pattern[0]);
946
4.56M
                    if (ret) {
947
4.56M
                        if (state->repeat)
948
4.56M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.56M
                        RETURN_ON_ERROR(ret);
950
4.56M
                        RETURN_SUCCESS;
951
4.56M
                    }
952
249
                    if (state->repeat)
953
249
                        MARK_POP_KEEP(ctx->lastmark);
954
249
                    LASTMARK_RESTORE();
955
956
249
                    ptr--;
957
249
                    ctx->count--;
958
249
                }
959
10.7M
                if (state->repeat)
960
10.6M
                    MARK_POP_DISCARD(ctx->lastmark);
961
54.7M
            } else {
962
                /* general case */
963
78.1M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
70.7M
                    state->ptr = ptr;
965
70.7M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
70.7M
                            pattern+pattern[0]);
967
70.7M
                    if (ret) {
968
47.3M
                        if (state->repeat)
969
31.0M
                            MARK_POP_DISCARD(ctx->lastmark);
970
47.3M
                        RETURN_ON_ERROR(ret);
971
47.3M
                        RETURN_SUCCESS;
972
47.3M
                    }
973
23.3M
                    if (state->repeat)
974
218k
                        MARK_POP_KEEP(ctx->lastmark);
975
23.3M
                    LASTMARK_RESTORE();
976
977
23.3M
                    ptr--;
978
23.3M
                    ctx->count--;
979
23.3M
                }
980
7.41M
                if (state->repeat)
981
138k
                    MARK_POP_DISCARD(ctx->lastmark);
982
7.41M
            }
983
18.1M
            RETURN_FAILURE;
984
985
12.0k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
12.0k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
12.0k
                   pattern[1], pattern[2]));
997
998
12.0k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
12.0k
            state->ptr = ptr;
1002
1003
12.0k
            if (pattern[1] == 0)
1004
12.0k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
12.0k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
12.0k
            } else {
1028
                /* general case */
1029
12.0k
                LASTMARK_SAVE();
1030
12.0k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
7.27M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
7.27M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
7.27M
                    state->ptr = ptr;
1036
7.27M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
7.27M
                            pattern+pattern[0]);
1038
7.27M
                    if (ret) {
1039
12.0k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
12.0k
                        RETURN_ON_ERROR(ret);
1042
12.0k
                        RETURN_SUCCESS;
1043
12.0k
                    }
1044
7.26M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
7.26M
                    LASTMARK_RESTORE();
1047
1048
7.26M
                    state->ptr = ptr;
1049
7.26M
                    ret = SRE(count)(state, pattern+3, 1);
1050
7.26M
                    RETURN_ON_ERROR(ret);
1051
7.26M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
7.26M
                    if (ret == 0)
1053
0
                        break;
1054
7.26M
                    assert(ret == 1);
1055
7.26M
                    ptr++;
1056
7.26M
                    ctx->count++;
1057
7.26M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
42.2M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
42.2M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
42.2M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
42.2M
            ctx->u.rep = repeat_pool_malloc(state);
1127
42.2M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
42.2M
            ctx->u.rep->count = -1;
1131
42.2M
            ctx->u.rep->pattern = pattern;
1132
42.2M
            ctx->u.rep->prev = state->repeat;
1133
42.2M
            ctx->u.rep->last_ptr = NULL;
1134
42.2M
            state->repeat = ctx->u.rep;
1135
1136
42.2M
            state->ptr = ptr;
1137
42.2M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
42.2M
            state->repeat = ctx->u.rep->prev;
1139
42.2M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
42.2M
            if (ret) {
1142
23.0M
                RETURN_ON_ERROR(ret);
1143
23.0M
                RETURN_SUCCESS;
1144
23.0M
            }
1145
19.2M
            RETURN_FAILURE;
1146
1147
79.5M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
79.5M
            ctx->u.rep = state->repeat;
1155
79.5M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
79.5M
            state->ptr = ptr;
1159
1160
79.5M
            ctx->count = ctx->u.rep->count+1;
1161
1162
79.5M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
79.5M
                   ptr, ctx->count));
1164
1165
79.5M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
79.5M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.06M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
75.4M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
75.4M
                ctx->u.rep->count = ctx->count;
1185
75.4M
                LASTMARK_SAVE();
1186
75.4M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
75.4M
                LAST_PTR_PUSH();
1189
75.4M
                ctx->u.rep->last_ptr = state->ptr;
1190
75.4M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
75.4M
                        ctx->u.rep->pattern+3);
1192
75.4M
                LAST_PTR_POP();
1193
75.4M
                if (ret) {
1194
37.1M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
37.1M
                    RETURN_ON_ERROR(ret);
1196
37.1M
                    RETURN_SUCCESS;
1197
37.1M
                }
1198
38.3M
                MARK_POP(ctx->lastmark);
1199
38.3M
                LASTMARK_RESTORE();
1200
38.3M
                ctx->u.rep->count = ctx->count-1;
1201
38.3M
                state->ptr = ptr;
1202
38.3M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
42.3M
            state->repeat = ctx->u.rep->prev;
1207
42.3M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
42.3M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
42.3M
            RETURN_ON_SUCCESS(ret);
1211
19.3M
            state->ptr = ptr;
1212
19.3M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
16.5M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
16.5M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
16.5M
                   ptr, pattern[1]));
1565
16.5M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
16.5M
            state->ptr = ptr - pattern[1];
1568
16.5M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
16.5M
            RETURN_ON_FAILURE(ret);
1570
14.8M
            pattern += pattern[0];
1571
14.8M
            DISPATCH;
1572
1573
14.8M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
10.0M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
10.0M
                   ptr, pattern[1]));
1578
10.0M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
10.0M
                state->ptr = ptr - pattern[1];
1580
10.0M
                LASTMARK_SAVE();
1581
10.0M
                if (state->repeat)
1582
10.0M
                    MARK_PUSH(ctx->lastmark);
1583
1584
20.0M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
20.0M
                if (ret) {
1586
6.13k
                    if (state->repeat)
1587
6.13k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
6.13k
                    RETURN_ON_ERROR(ret);
1589
6.13k
                    RETURN_FAILURE;
1590
6.13k
                }
1591
10.0M
                if (state->repeat)
1592
10.0M
                    MARK_POP(ctx->lastmark);
1593
10.0M
                LASTMARK_RESTORE();
1594
10.0M
            }
1595
10.0M
            pattern += pattern[0];
1596
10.0M
            DISPATCH;
1597
1598
10.0M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
375M
exit:
1620
375M
    ctx_pos = ctx->last_ctx_pos;
1621
375M
    jump = ctx->jump;
1622
375M
    DATA_POP_DISCARD(ctx);
1623
375M
    if (ctx_pos == -1) {
1624
81.0M
        state->sigcount = sigcount;
1625
81.0M
        return ret;
1626
81.0M
    }
1627
294M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
294M
    switch (jump) {
1630
75.4M
        case JUMP_MAX_UNTIL_2:
1631
75.4M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
75.4M
            goto jump_max_until_2;
1633
42.3M
        case JUMP_MAX_UNTIL_3:
1634
42.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
42.3M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
25.6M
        case JUMP_BRANCH:
1643
25.6M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
25.6M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
42.2M
        case JUMP_REPEAT:
1658
42.2M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
42.2M
            goto jump_repeat;
1660
4.56M
        case JUMP_REPEAT_ONE_1:
1661
4.56M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.56M
            goto jump_repeat_one_1;
1663
70.7M
        case JUMP_REPEAT_ONE_2:
1664
70.7M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
70.7M
            goto jump_repeat_one_2;
1666
7.27M
        case JUMP_MIN_REPEAT_ONE:
1667
7.27M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
7.27M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
16.5M
        case JUMP_ASSERT:
1673
16.5M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
16.5M
            goto jump_assert;
1675
10.0M
        case JUMP_ASSERT_NOT:
1676
10.0M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
10.0M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
294M
    }
1683
1684
0
    return ret; /* should never get here */
1685
294M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
378M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
109M
{
1694
109M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
109M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
109M
    Py_ssize_t status = 0;
1697
109M
    Py_ssize_t prefix_len = 0;
1698
109M
    Py_ssize_t prefix_skip = 0;
1699
109M
    SRE_CODE* prefix = NULL;
1700
109M
    SRE_CODE* charset = NULL;
1701
109M
    SRE_CODE* overlap = NULL;
1702
109M
    int flags = 0;
1703
109M
    INIT_TRACE(state);
1704
1705
109M
    if (ptr > end)
1706
0
        return 0;
1707
1708
109M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
109M
        flags = pattern[2];
1713
1714
109M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.13M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.13M
                   end - ptr, (size_t) pattern[3]));
1717
6.13M
            return 0;
1718
6.13M
        }
1719
103M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
8.41M
            end -= pattern[3] - 1;
1723
8.41M
            if (end <= ptr)
1724
0
                end = ptr;
1725
8.41M
        }
1726
1727
103M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
8.41M
            prefix_len = pattern[5];
1731
8.41M
            prefix_skip = pattern[6];
1732
8.41M
            prefix = pattern + 7;
1733
8.41M
            overlap = prefix + prefix_len - 1;
1734
94.7M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
84.7M
            charset = pattern + 5;
1738
1739
103M
        pattern += 1 + pattern[1];
1740
103M
    }
1741
1742
103M
    TRACE(("prefix = %p %zd %zd\n",
1743
103M
           prefix, prefix_len, prefix_skip));
1744
103M
    TRACE(("charset = %p\n", charset));
1745
1746
103M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
7.74M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
4.81M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
4.81M
#endif
1753
4.81M
        end = (SRE_CHAR *)state->end;
1754
4.81M
        state->must_advance = 0;
1755
8.68M
        while (ptr < end) {
1756
98.9M
            while (*ptr != c) {
1757
91.3M
                if (++ptr >= end)
1758
993k
                    return 0;
1759
91.3M
            }
1760
7.61M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
7.61M
            state->start = ptr;
1762
7.61M
            state->ptr = ptr + prefix_skip;
1763
7.61M
            if (flags & SRE_INFO_LITERAL)
1764
4.74k
                return 1; /* we got all of it */
1765
7.60M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
7.60M
            if (status != 0)
1767
6.66M
                return status;
1768
938k
            ++ptr;
1769
938k
            RESET_CAPTURE_GROUP();
1770
938k
        }
1771
78.1k
        return 0;
1772
4.81M
    }
1773
1774
95.4M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
673k
        Py_ssize_t i = 0;
1778
1779
673k
        end = (SRE_CHAR *)state->end;
1780
673k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.40M
        for (i = 0; i < prefix_len; i++)
1784
939k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
469k
#endif
1787
1.27M
        while (ptr < end) {
1788
1.27M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
9.73M
            while (*ptr++ != c) {
1790
8.46M
                if (ptr >= end)
1791
291
                    return 0;
1792
8.46M
            }
1793
1.27M
            if (ptr >= end)
1794
56
                return 0;
1795
1796
1.27M
            i = 1;
1797
1.27M
            state->must_advance = 0;
1798
1.27M
            do {
1799
1.27M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.20M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.20M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.20M
                    state->start = ptr - (prefix_len - 1);
1808
1.20M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.20M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.20M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.20M
                    if (status != 0)
1813
672k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
535k
                    if (++ptr >= end)
1816
60
                        return 0;
1817
535k
                    RESET_CAPTURE_GROUP();
1818
535k
                }
1819
601k
                i = overlap[i];
1820
601k
            } while (i != 0);
1821
1.27M
        }
1822
0
        return 0;
1823
673k
    }
1824
1825
94.7M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
84.7M
        end = (SRE_CHAR *)state->end;
1828
84.7M
        state->must_advance = 0;
1829
87.0M
        for (;;) {
1830
361M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
274M
                ptr++;
1832
87.0M
            if (ptr >= end)
1833
3.79M
                return 0;
1834
83.2M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
83.2M
            state->start = ptr;
1836
83.2M
            state->ptr = ptr;
1837
83.2M
            status = SRE(match)(state, pattern, 0);
1838
83.2M
            if (status != 0)
1839
80.9M
                break;
1840
2.30M
            ptr++;
1841
2.30M
            RESET_CAPTURE_GROUP();
1842
2.30M
        }
1843
84.7M
    } else {
1844
        /* general case */
1845
9.98M
        assert(ptr <= end);
1846
9.98M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
9.98M
        state->start = state->ptr = ptr;
1848
9.98M
        status = SRE(match)(state, pattern, 1);
1849
9.98M
        state->must_advance = 0;
1850
9.98M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
5.10M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
53
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
5.10M
        {
1854
5.10M
            state->start = state->ptr = ptr = end;
1855
5.10M
            return 0;
1856
5.10M
        }
1857
379M
        while (status == 0 && ptr < end) {
1858
374M
            ptr++;
1859
374M
            RESET_CAPTURE_GROUP();
1860
374M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
374M
            state->start = state->ptr = ptr;
1862
374M
            status = SRE(match)(state, pattern, 0);
1863
374M
        }
1864
4.87M
    }
1865
1866
85.8M
    return status;
1867
94.7M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
48.7M
{
1694
48.7M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
48.7M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
48.7M
    Py_ssize_t status = 0;
1697
48.7M
    Py_ssize_t prefix_len = 0;
1698
48.7M
    Py_ssize_t prefix_skip = 0;
1699
48.7M
    SRE_CODE* prefix = NULL;
1700
48.7M
    SRE_CODE* charset = NULL;
1701
48.7M
    SRE_CODE* overlap = NULL;
1702
48.7M
    int flags = 0;
1703
48.7M
    INIT_TRACE(state);
1704
1705
48.7M
    if (ptr > end)
1706
0
        return 0;
1707
1708
48.7M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
48.7M
        flags = pattern[2];
1713
1714
48.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.00M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.00M
                   end - ptr, (size_t) pattern[3]));
1717
6.00M
            return 0;
1718
6.00M
        }
1719
42.7M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.30M
            end -= pattern[3] - 1;
1723
2.30M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.30M
        }
1726
1727
42.7M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.30M
            prefix_len = pattern[5];
1731
2.30M
            prefix_skip = pattern[6];
1732
2.30M
            prefix = pattern + 7;
1733
2.30M
            overlap = prefix + prefix_len - 1;
1734
40.4M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
32.8M
            charset = pattern + 5;
1738
1739
42.7M
        pattern += 1 + pattern[1];
1740
42.7M
    }
1741
1742
42.7M
    TRACE(("prefix = %p %zd %zd\n",
1743
42.7M
           prefix, prefix_len, prefix_skip));
1744
42.7M
    TRACE(("charset = %p\n", charset));
1745
1746
42.7M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.24M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.24M
#if SIZEOF_SRE_CHAR < 4
1750
2.24M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.24M
#endif
1753
2.24M
        end = (SRE_CHAR *)state->end;
1754
2.24M
        state->must_advance = 0;
1755
2.49M
        while (ptr < end) {
1756
23.9M
            while (*ptr != c) {
1757
22.4M
                if (++ptr >= end)
1758
918k
                    return 0;
1759
22.4M
            }
1760
1.50M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.50M
            state->start = ptr;
1762
1.50M
            state->ptr = ptr + prefix_skip;
1763
1.50M
            if (flags & SRE_INFO_LITERAL)
1764
500
                return 1; /* we got all of it */
1765
1.50M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.50M
            if (status != 0)
1767
1.25M
                return status;
1768
247k
            ++ptr;
1769
247k
            RESET_CAPTURE_GROUP();
1770
247k
        }
1771
74.5k
        return 0;
1772
2.24M
    }
1773
1774
40.5M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
56.2k
        Py_ssize_t i = 0;
1778
1779
56.2k
        end = (SRE_CHAR *)state->end;
1780
56.2k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
56.2k
#if SIZEOF_SRE_CHAR < 4
1783
168k
        for (i = 0; i < prefix_len; i++)
1784
112k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
56.2k
#endif
1787
141k
        while (ptr < end) {
1788
141k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
1.08M
            while (*ptr++ != c) {
1790
939k
                if (ptr >= end)
1791
55
                    return 0;
1792
939k
            }
1793
141k
            if (ptr >= end)
1794
23
                return 0;
1795
1796
141k
            i = 1;
1797
141k
            state->must_advance = 0;
1798
141k
            do {
1799
141k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
125k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
125k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
125k
                    state->start = ptr - (prefix_len - 1);
1808
125k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
125k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
125k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
125k
                    if (status != 0)
1813
56.1k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
69.5k
                    if (++ptr >= end)
1816
24
                        return 0;
1817
69.4k
                    RESET_CAPTURE_GROUP();
1818
69.4k
                }
1819
85.3k
                i = overlap[i];
1820
85.3k
            } while (i != 0);
1821
141k
        }
1822
0
        return 0;
1823
56.2k
    }
1824
1825
40.4M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
32.8M
        end = (SRE_CHAR *)state->end;
1828
32.8M
        state->must_advance = 0;
1829
34.1M
        for (;;) {
1830
94.0M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
59.9M
                ptr++;
1832
34.1M
            if (ptr >= end)
1833
2.65M
                return 0;
1834
31.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
31.4M
            state->start = ptr;
1836
31.4M
            state->ptr = ptr;
1837
31.4M
            status = SRE(match)(state, pattern, 0);
1838
31.4M
            if (status != 0)
1839
30.1M
                break;
1840
1.28M
            ptr++;
1841
1.28M
            RESET_CAPTURE_GROUP();
1842
1.28M
        }
1843
32.8M
    } else {
1844
        /* general case */
1845
7.60M
        assert(ptr <= end);
1846
7.60M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
7.60M
        state->start = state->ptr = ptr;
1848
7.60M
        status = SRE(match)(state, pattern, 1);
1849
7.60M
        state->must_advance = 0;
1850
7.60M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.09M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
18
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
4.09M
        {
1854
4.09M
            state->start = state->ptr = ptr = end;
1855
4.09M
            return 0;
1856
4.09M
        }
1857
142M
        while (status == 0 && ptr < end) {
1858
139M
            ptr++;
1859
139M
            RESET_CAPTURE_GROUP();
1860
139M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
139M
            state->start = state->ptr = ptr;
1862
139M
            status = SRE(match)(state, pattern, 0);
1863
139M
        }
1864
3.51M
    }
1865
1866
33.7M
    return status;
1867
40.4M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
52.0M
{
1694
52.0M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
52.0M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
52.0M
    Py_ssize_t status = 0;
1697
52.0M
    Py_ssize_t prefix_len = 0;
1698
52.0M
    Py_ssize_t prefix_skip = 0;
1699
52.0M
    SRE_CODE* prefix = NULL;
1700
52.0M
    SRE_CODE* charset = NULL;
1701
52.0M
    SRE_CODE* overlap = NULL;
1702
52.0M
    int flags = 0;
1703
52.0M
    INIT_TRACE(state);
1704
1705
52.0M
    if (ptr > end)
1706
0
        return 0;
1707
1708
52.0M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
52.0M
        flags = pattern[2];
1713
1714
52.0M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
112k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
112k
                   end - ptr, (size_t) pattern[3]));
1717
112k
            return 0;
1718
112k
        }
1719
51.9M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.97M
            end -= pattern[3] - 1;
1723
2.97M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.97M
        }
1726
1727
51.9M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.97M
            prefix_len = pattern[5];
1731
2.97M
            prefix_skip = pattern[6];
1732
2.97M
            prefix = pattern + 7;
1733
2.97M
            overlap = prefix + prefix_len - 1;
1734
48.9M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
46.7M
            charset = pattern + 5;
1738
1739
51.9M
        pattern += 1 + pattern[1];
1740
51.9M
    }
1741
1742
51.9M
    TRACE(("prefix = %p %zd %zd\n",
1743
51.9M
           prefix, prefix_len, prefix_skip));
1744
51.9M
    TRACE(("charset = %p\n", charset));
1745
1746
51.9M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.56M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.56M
#if SIZEOF_SRE_CHAR < 4
1750
2.56M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.56M
#endif
1753
2.56M
        end = (SRE_CHAR *)state->end;
1754
2.56M
        state->must_advance = 0;
1755
3.16M
        while (ptr < end) {
1756
47.9M
            while (*ptr != c) {
1757
44.8M
                if (++ptr >= end)
1758
71.4k
                    return 0;
1759
44.8M
            }
1760
3.09M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.09M
            state->start = ptr;
1762
3.09M
            state->ptr = ptr + prefix_skip;
1763
3.09M
            if (flags & SRE_INFO_LITERAL)
1764
3.18k
                return 1; /* we got all of it */
1765
3.08M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.08M
            if (status != 0)
1767
2.48M
                return status;
1768
603k
            ++ptr;
1769
603k
            RESET_CAPTURE_GROUP();
1770
603k
        }
1771
2.66k
        return 0;
1772
2.56M
    }
1773
1774
49.3M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
413k
        Py_ssize_t i = 0;
1778
1779
413k
        end = (SRE_CHAR *)state->end;
1780
413k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
413k
#if SIZEOF_SRE_CHAR < 4
1783
1.24M
        for (i = 0; i < prefix_len; i++)
1784
827k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
413k
#endif
1787
679k
        while (ptr < end) {
1788
679k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.73M
            while (*ptr++ != c) {
1790
2.05M
                if (ptr >= end)
1791
97
                    return 0;
1792
2.05M
            }
1793
679k
            if (ptr >= end)
1794
21
                return 0;
1795
1796
679k
            i = 1;
1797
679k
            state->must_advance = 0;
1798
680k
            do {
1799
680k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
662k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
662k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
662k
                    state->start = ptr - (prefix_len - 1);
1808
662k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
662k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
662k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
662k
                    if (status != 0)
1813
413k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
248k
                    if (++ptr >= end)
1816
23
                        return 0;
1817
248k
                    RESET_CAPTURE_GROUP();
1818
248k
                }
1819
267k
                i = overlap[i];
1820
267k
            } while (i != 0);
1821
679k
        }
1822
0
        return 0;
1823
413k
    }
1824
1825
48.9M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
46.7M
        end = (SRE_CHAR *)state->end;
1828
46.7M
        state->must_advance = 0;
1829
47.2M
        for (;;) {
1830
197M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
150M
                ptr++;
1832
47.2M
            if (ptr >= end)
1833
1.08M
                return 0;
1834
46.1M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
46.1M
            state->start = ptr;
1836
46.1M
            state->ptr = ptr;
1837
46.1M
            status = SRE(match)(state, pattern, 0);
1838
46.1M
            if (status != 0)
1839
45.7M
                break;
1840
421k
            ptr++;
1841
421k
            RESET_CAPTURE_GROUP();
1842
421k
        }
1843
46.7M
    } else {
1844
        /* general case */
1845
2.18M
        assert(ptr <= end);
1846
2.18M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.18M
        state->start = state->ptr = ptr;
1848
2.18M
        status = SRE(match)(state, pattern, 1);
1849
2.18M
        state->must_advance = 0;
1850
2.18M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
998k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
18
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
998k
        {
1854
998k
            state->start = state->ptr = ptr = end;
1855
998k
            return 0;
1856
998k
        }
1857
175M
        while (status == 0 && ptr < end) {
1858
174M
            ptr++;
1859
174M
            RESET_CAPTURE_GROUP();
1860
174M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
174M
            state->start = state->ptr = ptr;
1862
174M
            status = SRE(match)(state, pattern, 0);
1863
174M
        }
1864
1.18M
    }
1865
1866
46.8M
    return status;
1867
48.9M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
8.44M
{
1694
8.44M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
8.44M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
8.44M
    Py_ssize_t status = 0;
1697
8.44M
    Py_ssize_t prefix_len = 0;
1698
8.44M
    Py_ssize_t prefix_skip = 0;
1699
8.44M
    SRE_CODE* prefix = NULL;
1700
8.44M
    SRE_CODE* charset = NULL;
1701
8.44M
    SRE_CODE* overlap = NULL;
1702
8.44M
    int flags = 0;
1703
8.44M
    INIT_TRACE(state);
1704
1705
8.44M
    if (ptr > end)
1706
0
        return 0;
1707
1708
8.44M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
8.44M
        flags = pattern[2];
1713
1714
8.44M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
16.2k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
16.2k
                   end - ptr, (size_t) pattern[3]));
1717
16.2k
            return 0;
1718
16.2k
        }
1719
8.43M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.13M
            end -= pattern[3] - 1;
1723
3.13M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.13M
        }
1726
1727
8.43M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.13M
            prefix_len = pattern[5];
1731
3.13M
            prefix_skip = pattern[6];
1732
3.13M
            prefix = pattern + 7;
1733
3.13M
            overlap = prefix + prefix_len - 1;
1734
5.29M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
5.10M
            charset = pattern + 5;
1738
1739
8.43M
        pattern += 1 + pattern[1];
1740
8.43M
    }
1741
1742
8.43M
    TRACE(("prefix = %p %zd %zd\n",
1743
8.43M
           prefix, prefix_len, prefix_skip));
1744
8.43M
    TRACE(("charset = %p\n", charset));
1745
1746
8.43M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.93M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.93M
        end = (SRE_CHAR *)state->end;
1754
2.93M
        state->must_advance = 0;
1755
3.02M
        while (ptr < end) {
1756
27.0M
            while (*ptr != c) {
1757
24.0M
                if (++ptr >= end)
1758
3.87k
                    return 0;
1759
24.0M
            }
1760
3.01M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.01M
            state->start = ptr;
1762
3.01M
            state->ptr = ptr + prefix_skip;
1763
3.01M
            if (flags & SRE_INFO_LITERAL)
1764
1.05k
                return 1; /* we got all of it */
1765
3.01M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.01M
            if (status != 0)
1767
2.92M
                return status;
1768
88.2k
            ++ptr;
1769
88.2k
            RESET_CAPTURE_GROUP();
1770
88.2k
        }
1771
949
        return 0;
1772
2.93M
    }
1773
1774
5.49M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
203k
        Py_ssize_t i = 0;
1778
1779
203k
        end = (SRE_CHAR *)state->end;
1780
203k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
451k
        while (ptr < end) {
1788
451k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
5.92M
            while (*ptr++ != c) {
1790
5.47M
                if (ptr >= end)
1791
139
                    return 0;
1792
5.47M
            }
1793
451k
            if (ptr >= end)
1794
12
                return 0;
1795
1796
451k
            i = 1;
1797
451k
            state->must_advance = 0;
1798
451k
            do {
1799
451k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
420k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
420k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
420k
                    state->start = ptr - (prefix_len - 1);
1808
420k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
420k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
420k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
420k
                    if (status != 0)
1813
202k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
217k
                    if (++ptr >= end)
1816
13
                        return 0;
1817
217k
                    RESET_CAPTURE_GROUP();
1818
217k
                }
1819
248k
                i = overlap[i];
1820
248k
            } while (i != 0);
1821
451k
        }
1822
0
        return 0;
1823
203k
    }
1824
1825
5.29M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
5.10M
        end = (SRE_CHAR *)state->end;
1828
5.10M
        state->must_advance = 0;
1829
5.70M
        for (;;) {
1830
70.5M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
64.8M
                ptr++;
1832
5.70M
            if (ptr >= end)
1833
50.1k
                return 0;
1834
5.65M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
5.65M
            state->start = ptr;
1836
5.65M
            state->ptr = ptr;
1837
5.65M
            status = SRE(match)(state, pattern, 0);
1838
5.65M
            if (status != 0)
1839
5.05M
                break;
1840
596k
            ptr++;
1841
596k
            RESET_CAPTURE_GROUP();
1842
596k
        }
1843
5.10M
    } else {
1844
        /* general case */
1845
190k
        assert(ptr <= end);
1846
190k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
190k
        state->start = state->ptr = ptr;
1848
190k
        status = SRE(match)(state, pattern, 1);
1849
190k
        state->must_advance = 0;
1850
190k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
14.9k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
17
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
14.9k
        {
1854
14.9k
            state->start = state->ptr = ptr = end;
1855
14.9k
            return 0;
1856
14.9k
        }
1857
60.9M
        while (status == 0 && ptr < end) {
1858
60.7M
            ptr++;
1859
60.7M
            RESET_CAPTURE_GROUP();
1860
60.7M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
60.7M
            state->start = state->ptr = ptr;
1862
60.7M
            status = SRE(match)(state, pattern, 0);
1863
60.7M
        }
1864
175k
    }
1865
1866
5.22M
    return status;
1867
5.29M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/