Coverage Report

Created: 2025-11-02 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
13.6M
{
18
    /* check if pointer is at given position */
19
20
13.6M
    Py_ssize_t thisp, thatp;
21
22
13.6M
    switch (at) {
23
24
6.53M
    case SRE_AT_BEGINNING:
25
6.53M
    case SRE_AT_BEGINNING_STRING:
26
6.53M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.69M
    case SRE_AT_END:
33
4.69M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
26.5k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.69M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.45M
    case SRE_AT_END_STRING:
42
2.45M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
13.6M
    }
87
88
0
    return 0;
89
13.6M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
12.3M
{
18
    /* check if pointer is at given position */
19
20
12.3M
    Py_ssize_t thisp, thatp;
21
22
12.3M
    switch (at) {
23
24
6.50M
    case SRE_AT_BEGINNING:
25
6.50M
    case SRE_AT_BEGINNING_STRING:
26
6.50M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.28M
    case SRE_AT_END:
33
4.28M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
25.9k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.28M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.56M
    case SRE_AT_END_STRING:
42
1.56M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
12.3M
    }
87
88
0
    return 0;
89
12.3M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
771k
{
18
    /* check if pointer is at given position */
19
20
771k
    Py_ssize_t thisp, thatp;
21
22
771k
    switch (at) {
23
24
28.9k
    case SRE_AT_BEGINNING:
25
28.9k
    case SRE_AT_BEGINNING_STRING:
26
28.9k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
329k
    case SRE_AT_END:
33
329k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
89
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
329k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
413k
    case SRE_AT_END_STRING:
42
413k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
771k
    }
87
88
0
    return 0;
89
771k
}
sre.c:sre_ucs4_at
Line
Count
Source
17
567k
{
18
    /* check if pointer is at given position */
19
20
567k
    Py_ssize_t thisp, thatp;
21
22
567k
    switch (at) {
23
24
3.73k
    case SRE_AT_BEGINNING:
25
3.73k
    case SRE_AT_BEGINNING_STRING:
26
3.73k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
81.8k
    case SRE_AT_END:
33
81.8k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
502
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
81.8k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
481k
    case SRE_AT_END_STRING:
42
481k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
567k
    }
87
88
0
    return 0;
89
567k
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.29G
{
94
    /* check if character is a member of the given set */
95
96
1.29G
    int ok = 1;
97
98
2.97G
    for (;;) {
99
2.97G
        switch (*set++) {
100
101
869M
        case SRE_OP_FAILURE:
102
869M
            return !ok;
103
104
984M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
984M
            if (ch == set[0])
107
4.32M
                return ok;
108
979M
            set++;
109
979M
            break;
110
111
12.3M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
12.3M
            if (sre_category(set[0], (int) ch))
114
8.53M
                return ok;
115
3.78M
            set++;
116
3.78M
            break;
117
118
526M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
526M
            if (ch < 256 &&
121
478M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
218M
                return ok;
123
308M
            set += 256/SRE_CODE_BITS;
124
308M
            break;
125
126
330M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
330M
            if (set[0] <= ch && ch <= set[1])
129
198M
                return ok;
130
132M
            set += 2;
131
132M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
249M
        case SRE_OP_NEGATE:
148
249M
            ok = !ok;
149
249M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
2.97G
        }
175
2.97G
    }
176
1.29G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
309M
{
94
    /* check if character is a member of the given set */
95
96
309M
    int ok = 1;
97
98
626M
    for (;;) {
99
626M
        switch (*set++) {
100
101
165M
        case SRE_OP_FAILURE:
102
165M
            return !ok;
103
104
176M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
176M
            if (ch == set[0])
107
2.12M
                return ok;
108
174M
            set++;
109
174M
            break;
110
111
11.7M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
11.7M
            if (sre_category(set[0], (int) ch))
114
7.92M
                return ok;
115
3.77M
            set++;
116
3.77M
            break;
117
118
78.8M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
78.8M
            if (ch < 256 &&
121
78.8M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
39.4M
                return ok;
123
39.4M
            set += 256/SRE_CODE_BITS;
124
39.4M
            break;
125
126
159M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
159M
            if (set[0] <= ch && ch <= set[1])
129
94.9M
                return ok;
130
64.9M
            set += 2;
131
64.9M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
34.0M
        case SRE_OP_NEGATE:
148
34.0M
            ok = !ok;
149
34.0M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
626M
        }
175
626M
    }
176
309M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
585M
{
94
    /* check if character is a member of the given set */
95
96
585M
    int ok = 1;
97
98
1.42G
    for (;;) {
99
1.42G
        switch (*set++) {
100
101
430M
        case SRE_OP_FAILURE:
102
430M
            return !ok;
103
104
556M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
556M
            if (ch == set[0])
107
1.34M
                return ok;
108
555M
            set++;
109
555M
            break;
110
111
174k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
174k
            if (sre_category(set[0], (int) ch))
114
166k
                return ok;
115
7.82k
            set++;
116
7.82k
            break;
117
118
184M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
184M
            if (ch < 256 &&
121
167M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
61.7M
                return ok;
123
122M
            set += 256/SRE_CODE_BITS;
124
122M
            break;
125
126
151M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
151M
            if (set[0] <= ch && ch <= set[1])
129
92.1M
                return ok;
130
59.0M
            set += 2;
131
59.0M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
104M
        case SRE_OP_NEGATE:
148
104M
            ok = !ok;
149
104M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.42G
        }
175
1.42G
    }
176
585M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
404M
{
94
    /* check if character is a member of the given set */
95
96
404M
    int ok = 1;
97
98
919M
    for (;;) {
99
919M
        switch (*set++) {
100
101
274M
        case SRE_OP_FAILURE:
102
274M
            return !ok;
103
104
250M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
250M
            if (ch == set[0])
107
861k
                return ok;
108
249M
            set++;
109
249M
            break;
110
111
437k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
437k
            if (sre_category(set[0], (int) ch))
114
436k
                return ok;
115
813
            set++;
116
813
            break;
117
118
263M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
263M
            if (ch < 256 &&
121
232M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
117M
                return ok;
123
146M
            set += 256/SRE_CODE_BITS;
124
146M
            break;
125
126
19.4M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
19.4M
            if (set[0] <= ch && ch <= set[1])
129
11.3M
                return ok;
130
8.13M
            set += 2;
131
8.13M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
111M
        case SRE_OP_NEGATE:
148
111M
            ok = !ok;
149
111M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
919M
        }
175
919M
    }
176
404M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
478M
{
195
478M
    SRE_CODE chr;
196
478M
    SRE_CHAR c;
197
478M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
478M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
478M
    Py_ssize_t i;
200
478M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
478M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
14.7M
        end = ptr + maxcount;
205
206
478M
    switch (pattern[0]) {
207
208
411M
    case SRE_OP_IN:
209
        /* repeated set */
210
411M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
733M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
322M
            ptr++;
213
411M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
62.6M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
62.6M
        chr = pattern[1];
232
62.6M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
62.6M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
59.2M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
59.2M
        else
238
59.2M
#endif
239
66.9M
        while (ptr < end && *ptr == c)
240
4.29M
            ptr++;
241
62.6M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
4.26M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
4.26M
        chr = pattern[1];
270
4.26M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
4.26M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
1.94M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
1.94M
        else
276
1.94M
#endif
277
36.2M
        while (ptr < end && *ptr != c)
278
32.0M
            ptr++;
279
4.26M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
478M
    }
319
320
478M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
478M
           ptr - (SRE_CHAR*) state->ptr));
322
478M
    return ptr - (SRE_CHAR*) state->ptr;
323
478M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
139M
{
195
139M
    SRE_CODE chr;
196
139M
    SRE_CHAR c;
197
139M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
139M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
139M
    Py_ssize_t i;
200
139M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
139M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
3.50M
        end = ptr + maxcount;
205
206
139M
    switch (pattern[0]) {
207
208
86.6M
    case SRE_OP_IN:
209
        /* repeated set */
210
86.6M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
196M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
109M
            ptr++;
213
86.6M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
52.4M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
52.4M
        chr = pattern[1];
232
52.4M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
52.4M
        c = (SRE_CHAR) chr;
234
52.4M
#if SIZEOF_SRE_CHAR < 4
235
52.4M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
52.4M
        else
238
52.4M
#endif
239
54.3M
        while (ptr < end && *ptr == c)
240
1.89M
            ptr++;
241
52.4M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
287k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
287k
        chr = pattern[1];
270
287k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
287k
        c = (SRE_CHAR) chr;
272
287k
#if SIZEOF_SRE_CHAR < 4
273
287k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
287k
        else
276
287k
#endif
277
6.72M
        while (ptr < end && *ptr != c)
278
6.43M
            ptr++;
279
287k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
139M
    }
319
320
139M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
139M
           ptr - (SRE_CHAR*) state->ptr));
322
139M
    return ptr - (SRE_CHAR*) state->ptr;
323
139M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
199M
{
195
199M
    SRE_CODE chr;
196
199M
    SRE_CHAR c;
197
199M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
199M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
199M
    Py_ssize_t i;
200
199M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
199M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
4.65M
        end = ptr + maxcount;
205
206
199M
    switch (pattern[0]) {
207
208
191M
    case SRE_OP_IN:
209
        /* repeated set */
210
191M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
299M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
108M
            ptr++;
213
191M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
6.71M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
6.71M
        chr = pattern[1];
232
6.71M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
6.71M
        c = (SRE_CHAR) chr;
234
6.71M
#if SIZEOF_SRE_CHAR < 4
235
6.71M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
6.71M
        else
238
6.71M
#endif
239
8.57M
        while (ptr < end && *ptr == c)
240
1.86M
            ptr++;
241
6.71M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.65M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.65M
        chr = pattern[1];
270
1.65M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.65M
        c = (SRE_CHAR) chr;
272
1.65M
#if SIZEOF_SRE_CHAR < 4
273
1.65M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
1.65M
        else
276
1.65M
#endif
277
10.2M
        while (ptr < end && *ptr != c)
278
8.56M
            ptr++;
279
1.65M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
199M
    }
319
320
199M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
199M
           ptr - (SRE_CHAR*) state->ptr));
322
199M
    return ptr - (SRE_CHAR*) state->ptr;
323
199M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
139M
{
195
139M
    SRE_CODE chr;
196
139M
    SRE_CHAR c;
197
139M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
139M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
139M
    Py_ssize_t i;
200
139M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
139M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
6.59M
        end = ptr + maxcount;
205
206
139M
    switch (pattern[0]) {
207
208
133M
    case SRE_OP_IN:
209
        /* repeated set */
210
133M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
237M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
103M
            ptr++;
213
133M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
3.46M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
3.46M
        chr = pattern[1];
232
3.46M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
3.46M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
4.00M
        while (ptr < end && *ptr == c)
240
537k
            ptr++;
241
3.46M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.31M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.31M
        chr = pattern[1];
270
2.31M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.31M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
19.3M
        while (ptr < end && *ptr != c)
278
17.0M
            ptr++;
279
2.31M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
139M
    }
319
320
139M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
139M
           ptr - (SRE_CHAR*) state->ptr));
322
139M
    return ptr - (SRE_CHAR*) state->ptr;
323
139M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
408M
    do { \
355
408M
        ctx->lastmark = state->lastmark; \
356
408M
        ctx->lastindex = state->lastindex; \
357
408M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
146M
    do { \
360
146M
        state->lastmark = ctx->lastmark; \
361
146M
        state->lastindex = ctx->lastindex; \
362
146M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
124M
    do { \
366
124M
        TRACE(("push last_ptr: %zd", \
367
124M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
124M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
124M
    } while (0)
370
#define LAST_PTR_POP()  \
371
124M
    do { \
372
124M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
124M
        TRACE(("pop last_ptr: %zd", \
374
124M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
124M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
474M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
528M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
892M
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
58.1M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
38.8M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.00G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.00G
do { \
390
1.00G
    alloc_pos = state->data_stack_base; \
391
1.00G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.00G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.00G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
135M
        int j = data_stack_grow(state, sizeof(type)); \
395
135M
        if (j < 0) return j; \
396
135M
        if (ctx_pos != -1) \
397
135M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
135M
    } \
399
1.00G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.00G
    state->data_stack_base += sizeof(type); \
401
1.00G
} while (0)
402
403
1.03G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.03G
do { \
405
1.03G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.03G
    ptr = (type*)(state->data_stack+pos); \
407
1.03G
} while (0)
408
409
328M
#define DATA_STACK_PUSH(state, data, size) \
410
328M
do { \
411
328M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
328M
           data, state->data_stack_base, size)); \
413
328M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
62.4k
        int j = data_stack_grow(state, size); \
415
62.4k
        if (j < 0) return j; \
416
62.4k
        if (ctx_pos != -1) \
417
62.4k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
62.4k
    } \
419
328M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
328M
    state->data_stack_base += size; \
421
328M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
181M
#define DATA_STACK_POP(state, data, size, discard) \
427
181M
do { \
428
181M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
181M
           data, state->data_stack_base-size, size)); \
430
181M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
181M
    if (discard) \
432
181M
        state->data_stack_base -= size; \
433
181M
} while (0)
434
435
1.15G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.15G
do { \
437
1.15G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.15G
           state->data_stack_base-size, size)); \
439
1.15G
    state->data_stack_base -= size; \
440
1.15G
} while(0)
441
442
#define DATA_PUSH(x) \
443
124M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
124M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.00G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.00G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.03G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
311M
    do if (lastmark >= 0) { \
473
204M
        MARK_TRACE("push", (lastmark)); \
474
204M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
204M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
311M
    } while (0)
477
#define MARK_POP(lastmark) \
478
73.6M
    do if (lastmark >= 0) { \
479
54.9M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
54.9M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
54.9M
        MARK_TRACE("pop", (lastmark)); \
482
73.6M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.01M
    do if (lastmark >= 0) { \
485
2.01M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
2.01M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
2.01M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.01M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
237M
    do if (lastmark >= 0) { \
491
149M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
149M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
149M
        MARK_TRACE("pop discard", (lastmark)); \
494
237M
    } while (0)
495
496
441M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
124M
#define JUMP_MAX_UNTIL_2     2
499
58.1M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
57.1M
#define JUMP_REPEAT          7
504
11.5M
#define JUMP_REPEAT_ONE_1    8
505
102M
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
143M
#define JUMP_BRANCH          11
508
38.8M
#define JUMP_ASSERT          12
509
25.3M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
561M
    ctx->pattern = pattern; \
516
561M
    ctx->ptr = ptr; \
517
561M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
561M
    nextctx->pattern = nextpattern; \
519
561M
    nextctx->toplevel = toplevel_; \
520
561M
    nextctx->jump = jumpvalue; \
521
561M
    nextctx->last_ctx_pos = ctx_pos; \
522
561M
    pattern = nextpattern; \
523
561M
    ctx_pos = alloc_pos; \
524
561M
    ctx = nextctx; \
525
561M
    goto entrance; \
526
561M
    jumplabel: \
527
561M
    pattern = ctx->pattern; \
528
561M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
496M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
64.2M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
1.81G
    do {                                                           \
553
1.81G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
1.81G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
1.81G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
1.89G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
1.81G
        do {                               \
588
1.81G
            MAYBE_CHECK_SIGNALS;           \
589
1.81G
            goto *sre_targets[*pattern++]; \
590
1.81G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
441M
{
601
441M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
441M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
441M
    Py_ssize_t ret = 0;
604
441M
    int jump;
605
441M
    unsigned int sigcount = state->sigcount;
606
607
441M
    SRE(match_context)* ctx;
608
441M
    SRE(match_context)* nextctx;
609
441M
    INIT_TRACE(state);
610
611
441M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
441M
    DATA_ALLOC(SRE(match_context), ctx);
614
441M
    ctx->last_ctx_pos = -1;
615
441M
    ctx->jump = JUMP_NONE;
616
441M
    ctx->toplevel = toplevel;
617
441M
    ctx_pos = alloc_pos;
618
619
441M
#if USE_COMPUTED_GOTOS
620
441M
#include "sre_targets.h"
621
441M
#endif
622
623
1.00G
entrance:
624
625
1.00G
    ;  // Fashion statement.
626
1.00G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.00G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
56.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.50M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.50M
                   end - ptr, (size_t) pattern[3]));
634
4.50M
            RETURN_FAILURE;
635
4.50M
        }
636
52.4M
        pattern += pattern[1] + 1;
637
52.4M
    }
638
639
998M
#if USE_COMPUTED_GOTOS
640
998M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
998M
    {
647
648
998M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
418M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
418M
                   ptr, pattern[0]));
653
418M
            {
654
418M
                int i = pattern[0];
655
418M
                if (i & 1)
656
50.5M
                    state->lastindex = i/2 + 1;
657
418M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
411M
                    int j = state->lastmark + 1;
663
418M
                    while (j < i)
664
6.36M
                        state->mark[j++] = NULL;
665
411M
                    state->lastmark = i;
666
411M
                }
667
418M
                state->mark[i] = ptr;
668
418M
            }
669
418M
            pattern++;
670
418M
            DISPATCH;
671
672
418M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
117M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
117M
                   ptr, *pattern));
677
117M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
39.2M
                RETURN_FAILURE;
679
78.5M
            pattern++;
680
78.5M
            ptr++;
681
78.5M
            DISPATCH;
682
683
78.5M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
154M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
154M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
154M
            if (ctx->toplevel &&
698
42.2M
                ((state->match_all && ptr != state->end) ||
699
42.2M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
154M
            state->ptr = ptr;
704
154M
            RETURN_SUCCESS;
705
706
13.6M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
13.6M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
13.6M
            if (!SRE(at)(state, ptr, *pattern))
711
3.55M
                RETURN_FAILURE;
712
10.1M
            pattern++;
713
10.1M
            DISPATCH;
714
715
10.1M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
175M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
175M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
175M
            if (ptr >= end ||
749
175M
                !SRE(charset)(state, pattern + 1, *ptr))
750
8.36M
                RETURN_FAILURE;
751
166M
            pattern += pattern[0];
752
166M
            ptr++;
753
166M
            DISPATCH;
754
755
166M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
6.04M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
6.04M
                   pattern, ptr, pattern[0]));
758
6.04M
            if (ptr >= end ||
759
6.04M
                sre_lower_ascii(*ptr) != *pattern)
760
207k
                RETURN_FAILURE;
761
5.83M
            pattern++;
762
5.83M
            ptr++;
763
5.83M
            DISPATCH;
764
765
5.83M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
76.7M
        TARGET(SRE_OP_JUMP):
845
76.7M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
76.7M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
76.7M
                   ptr, pattern[0]));
850
76.7M
            pattern += pattern[0];
851
76.7M
            DISPATCH;
852
853
117M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
117M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
117M
            LASTMARK_SAVE();
858
117M
            if (state->repeat)
859
71.5M
                MARK_PUSH(ctx->lastmark);
860
284M
            for (; pattern[0]; pattern += pattern[0]) {
861
241M
                if (pattern[1] == SRE_OP_LITERAL &&
862
113M
                    (ptr >= end ||
863
113M
                     (SRE_CODE) *ptr != pattern[2]))
864
55.4M
                    continue;
865
185M
                if (pattern[1] == SRE_OP_IN &&
866
67.1M
                    (ptr >= end ||
867
67.1M
                     !SRE(charset)(state, pattern + 3,
868
67.1M
                                   (SRE_CODE) *ptr)))
869
42.5M
                    continue;
870
143M
                state->ptr = ptr;
871
143M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
143M
                if (ret) {
873
74.3M
                    if (state->repeat)
874
54.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
74.3M
                    RETURN_ON_ERROR(ret);
876
74.3M
                    RETURN_SUCCESS;
877
74.3M
                }
878
68.9M
                if (state->repeat)
879
19.2k
                    MARK_POP_KEEP(ctx->lastmark);
880
68.9M
                LASTMARK_RESTORE();
881
68.9M
            }
882
43.4M
            if (state->repeat)
883
17.3M
                MARK_POP_DISCARD(ctx->lastmark);
884
43.4M
            RETURN_FAILURE;
885
886
479M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
479M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
479M
                   pattern[1], pattern[2]));
898
899
479M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.23M
                RETURN_FAILURE; /* cannot match */
901
902
478M
            state->ptr = ptr;
903
904
478M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
478M
            RETURN_ON_ERROR(ret);
906
478M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
478M
            ctx->count = ret;
908
478M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
478M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
337M
                RETURN_FAILURE;
917
918
140M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.31M
                ptr == state->end &&
920
73.5k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
73.5k
            {
922
                /* tail is empty.  we're finished */
923
73.5k
                state->ptr = ptr;
924
73.5k
                RETURN_SUCCESS;
925
73.5k
            }
926
927
140M
            LASTMARK_SAVE();
928
140M
            if (state->repeat)
929
90.2M
                MARK_PUSH(ctx->lastmark);
930
931
140M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
40.8M
                ctx->u.chr = pattern[pattern[0]+1];
935
40.8M
                for (;;) {
936
101M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
71.7M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
60.2M
                        ptr--;
939
60.2M
                        ctx->count--;
940
60.2M
                    }
941
40.8M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
29.3M
                        break;
943
11.5M
                    state->ptr = ptr;
944
11.5M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
11.5M
                            pattern+pattern[0]);
946
11.5M
                    if (ret) {
947
11.5M
                        if (state->repeat)
948
10.1M
                            MARK_POP_DISCARD(ctx->lastmark);
949
11.5M
                        RETURN_ON_ERROR(ret);
950
11.5M
                        RETURN_SUCCESS;
951
11.5M
                    }
952
547
                    if (state->repeat)
953
547
                        MARK_POP_KEEP(ctx->lastmark);
954
547
                    LASTMARK_RESTORE();
955
956
547
                    ptr--;
957
547
                    ctx->count--;
958
547
                }
959
29.3M
                if (state->repeat)
960
27.9M
                    MARK_POP_DISCARD(ctx->lastmark);
961
100M
            } else {
962
                /* general case */
963
103M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
102M
                    state->ptr = ptr;
965
102M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
102M
                            pattern+pattern[0]);
967
102M
                    if (ret) {
968
98.7M
                        if (state->repeat)
969
51.0M
                            MARK_POP_DISCARD(ctx->lastmark);
970
98.7M
                        RETURN_ON_ERROR(ret);
971
98.7M
                        RETURN_SUCCESS;
972
98.7M
                    }
973
3.64M
                    if (state->repeat)
974
1.99M
                        MARK_POP_KEEP(ctx->lastmark);
975
3.64M
                    LASTMARK_RESTORE();
976
977
3.64M
                    ptr--;
978
3.64M
                    ctx->count--;
979
3.64M
                }
980
1.25M
                if (state->repeat)
981
1.04M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.25M
            }
983
30.6M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
57.1M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
57.1M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
57.1M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
57.1M
            ctx->u.rep = repeat_pool_malloc(state);
1127
57.1M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
57.1M
            ctx->u.rep->count = -1;
1131
57.1M
            ctx->u.rep->pattern = pattern;
1132
57.1M
            ctx->u.rep->prev = state->repeat;
1133
57.1M
            ctx->u.rep->last_ptr = NULL;
1134
57.1M
            state->repeat = ctx->u.rep;
1135
1136
57.1M
            state->ptr = ptr;
1137
57.1M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
57.1M
            state->repeat = ctx->u.rep->prev;
1139
57.1M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
57.1M
            if (ret) {
1142
57.0M
                RETURN_ON_ERROR(ret);
1143
57.0M
                RETURN_SUCCESS;
1144
57.0M
            }
1145
102k
            RETURN_FAILURE;
1146
1147
134M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
134M
            ctx->u.rep = state->repeat;
1155
134M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
134M
            state->ptr = ptr;
1159
1160
134M
            ctx->count = ctx->u.rep->count+1;
1161
1162
134M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
134M
                   ptr, ctx->count));
1164
1165
134M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
134M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
9.86M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
124M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
124M
                ctx->u.rep->count = ctx->count;
1185
124M
                LASTMARK_SAVE();
1186
124M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
124M
                LAST_PTR_PUSH();
1189
124M
                ctx->u.rep->last_ptr = state->ptr;
1190
124M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
124M
                        ctx->u.rep->pattern+3);
1192
124M
                LAST_PTR_POP();
1193
124M
                if (ret) {
1194
75.9M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
75.9M
                    RETURN_ON_ERROR(ret);
1196
75.9M
                    RETURN_SUCCESS;
1197
75.9M
                }
1198
48.2M
                MARK_POP(ctx->lastmark);
1199
48.2M
                LASTMARK_RESTORE();
1200
48.2M
                ctx->u.rep->count = ctx->count-1;
1201
48.2M
                state->ptr = ptr;
1202
48.2M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
58.1M
            state->repeat = ctx->u.rep->prev;
1207
58.1M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
58.1M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
58.1M
            RETURN_ON_SUCCESS(ret);
1211
1.10M
            state->ptr = ptr;
1212
1.10M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
38.8M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
38.8M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
38.8M
                   ptr, pattern[1]));
1565
38.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
38.8M
            state->ptr = ptr - pattern[1];
1568
38.8M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
38.8M
            RETURN_ON_FAILURE(ret);
1570
34.4M
            pattern += pattern[0];
1571
34.4M
            DISPATCH;
1572
1573
34.4M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
25.3M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
25.3M
                   ptr, pattern[1]));
1578
25.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
25.3M
                state->ptr = ptr - pattern[1];
1580
25.3M
                LASTMARK_SAVE();
1581
25.3M
                if (state->repeat)
1582
25.3M
                    MARK_PUSH(ctx->lastmark);
1583
1584
50.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
50.7M
                if (ret) {
1586
14.3k
                    if (state->repeat)
1587
14.3k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
14.3k
                    RETURN_ON_ERROR(ret);
1589
14.3k
                    RETURN_FAILURE;
1590
14.3k
                }
1591
25.3M
                if (state->repeat)
1592
25.3M
                    MARK_POP(ctx->lastmark);
1593
25.3M
                LASTMARK_RESTORE();
1594
25.3M
            }
1595
25.3M
            pattern += pattern[0];
1596
25.3M
            DISPATCH;
1597
1598
25.3M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.00G
exit:
1620
1.00G
    ctx_pos = ctx->last_ctx_pos;
1621
1.00G
    jump = ctx->jump;
1622
1.00G
    DATA_POP_DISCARD(ctx);
1623
1.00G
    if (ctx_pos == -1) {
1624
441M
        state->sigcount = sigcount;
1625
441M
        return ret;
1626
441M
    }
1627
561M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
561M
    switch (jump) {
1630
124M
        case JUMP_MAX_UNTIL_2:
1631
124M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
124M
            goto jump_max_until_2;
1633
58.1M
        case JUMP_MAX_UNTIL_3:
1634
58.1M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
58.1M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
143M
        case JUMP_BRANCH:
1643
143M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
143M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
57.1M
        case JUMP_REPEAT:
1658
57.1M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
57.1M
            goto jump_repeat;
1660
11.5M
        case JUMP_REPEAT_ONE_1:
1661
11.5M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
11.5M
            goto jump_repeat_one_1;
1663
102M
        case JUMP_REPEAT_ONE_2:
1664
102M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
102M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
38.8M
        case JUMP_ASSERT:
1673
38.8M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
38.8M
            goto jump_assert;
1675
25.3M
        case JUMP_ASSERT_NOT:
1676
25.3M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
25.3M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
561M
    }
1683
1684
0
    return ret; /* should never get here */
1685
561M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
140M
{
601
140M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
140M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
140M
    Py_ssize_t ret = 0;
604
140M
    int jump;
605
140M
    unsigned int sigcount = state->sigcount;
606
607
140M
    SRE(match_context)* ctx;
608
140M
    SRE(match_context)* nextctx;
609
140M
    INIT_TRACE(state);
610
611
140M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
140M
    DATA_ALLOC(SRE(match_context), ctx);
614
140M
    ctx->last_ctx_pos = -1;
615
140M
    ctx->jump = JUMP_NONE;
616
140M
    ctx->toplevel = toplevel;
617
140M
    ctx_pos = alloc_pos;
618
619
140M
#if USE_COMPUTED_GOTOS
620
140M
#include "sre_targets.h"
621
140M
#endif
622
623
292M
entrance:
624
625
292M
    ;  // Fashion statement.
626
292M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
292M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
29.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.50M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.50M
                   end - ptr, (size_t) pattern[3]));
634
4.50M
            RETURN_FAILURE;
635
4.50M
        }
636
25.1M
        pattern += pattern[1] + 1;
637
25.1M
    }
638
639
288M
#if USE_COMPUTED_GOTOS
640
288M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
288M
    {
647
648
288M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
123M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
123M
                   ptr, pattern[0]));
653
123M
            {
654
123M
                int i = pattern[0];
655
123M
                if (i & 1)
656
20.2M
                    state->lastindex = i/2 + 1;
657
123M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
119M
                    int j = state->lastmark + 1;
663
123M
                    while (j < i)
664
3.79M
                        state->mark[j++] = NULL;
665
119M
                    state->lastmark = i;
666
119M
                }
667
123M
                state->mark[i] = ptr;
668
123M
            }
669
123M
            pattern++;
670
123M
            DISPATCH;
671
672
123M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
59.0M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
59.0M
                   ptr, *pattern));
677
59.0M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
19.7M
                RETURN_FAILURE;
679
39.2M
            pattern++;
680
39.2M
            ptr++;
681
39.2M
            DISPATCH;
682
683
39.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
47.4M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
47.4M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
47.4M
            if (ctx->toplevel &&
698
18.5M
                ((state->match_all && ptr != state->end) ||
699
18.5M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
47.4M
            state->ptr = ptr;
704
47.4M
            RETURN_SUCCESS;
705
706
12.3M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
12.3M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
12.3M
            if (!SRE(at)(state, ptr, *pattern))
711
2.25M
                RETURN_FAILURE;
712
10.1M
            pattern++;
713
10.1M
            DISPATCH;
714
715
10.1M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
32.1M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
32.1M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
32.1M
            if (ptr >= end ||
749
32.1M
                !SRE(charset)(state, pattern + 1, *ptr))
750
533k
                RETURN_FAILURE;
751
31.6M
            pattern += pattern[0];
752
31.6M
            ptr++;
753
31.6M
            DISPATCH;
754
755
31.6M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
858k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
858k
                   pattern, ptr, pattern[0]));
758
858k
            if (ptr >= end ||
759
858k
                sre_lower_ascii(*ptr) != *pattern)
760
104k
                RETURN_FAILURE;
761
753k
            pattern++;
762
753k
            ptr++;
763
753k
            DISPATCH;
764
765
753k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
27.7M
        TARGET(SRE_OP_JUMP):
845
27.7M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
27.7M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
27.7M
                   ptr, pattern[0]));
850
27.7M
            pattern += pattern[0];
851
27.7M
            DISPATCH;
852
853
50.2M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
50.2M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
50.2M
            LASTMARK_SAVE();
858
50.2M
            if (state->repeat)
859
11.0M
                MARK_PUSH(ctx->lastmark);
860
144M
            for (; pattern[0]; pattern += pattern[0]) {
861
120M
                if (pattern[1] == SRE_OP_LITERAL &&
862
56.9M
                    (ptr >= end ||
863
56.9M
                     (SRE_CODE) *ptr != pattern[2]))
864
23.7M
                    continue;
865
96.9M
                if (pattern[1] == SRE_OP_IN &&
866
12.0M
                    (ptr >= end ||
867
12.0M
                     !SRE(charset)(state, pattern + 3,
868
12.0M
                                   (SRE_CODE) *ptr)))
869
6.76M
                    continue;
870
90.2M
                state->ptr = ptr;
871
90.2M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
90.2M
                if (ret) {
873
26.1M
                    if (state->repeat)
874
10.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
26.1M
                    RETURN_ON_ERROR(ret);
876
26.1M
                    RETURN_SUCCESS;
877
26.1M
                }
878
64.0M
                if (state->repeat)
879
5.55k
                    MARK_POP_KEEP(ctx->lastmark);
880
64.0M
                LASTMARK_RESTORE();
881
64.0M
            }
882
24.0M
            if (state->repeat)
883
265k
                MARK_POP_DISCARD(ctx->lastmark);
884
24.0M
            RETURN_FAILURE;
885
886
140M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
140M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
140M
                   pattern[1], pattern[2]));
898
899
140M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.02M
                RETURN_FAILURE; /* cannot match */
901
902
139M
            state->ptr = ptr;
903
904
139M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
139M
            RETURN_ON_ERROR(ret);
906
139M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
139M
            ctx->count = ret;
908
139M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
139M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
113M
                RETURN_FAILURE;
917
918
26.2M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
658k
                ptr == state->end &&
920
52.4k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
52.4k
            {
922
                /* tail is empty.  we're finished */
923
52.4k
                state->ptr = ptr;
924
52.4k
                RETURN_SUCCESS;
925
52.4k
            }
926
927
26.1M
            LASTMARK_SAVE();
928
26.1M
            if (state->repeat)
929
11.8M
                MARK_PUSH(ctx->lastmark);
930
931
26.1M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
4.62M
                ctx->u.chr = pattern[pattern[0]+1];
935
4.62M
                for (;;) {
936
15.7M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
13.0M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
11.1M
                        ptr--;
939
11.1M
                        ctx->count--;
940
11.1M
                    }
941
4.62M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.64M
                        break;
943
1.98M
                    state->ptr = ptr;
944
1.98M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
1.98M
                            pattern+pattern[0]);
946
1.98M
                    if (ret) {
947
1.98M
                        if (state->repeat)
948
647k
                            MARK_POP_DISCARD(ctx->lastmark);
949
1.98M
                        RETURN_ON_ERROR(ret);
950
1.98M
                        RETURN_SUCCESS;
951
1.98M
                    }
952
106
                    if (state->repeat)
953
106
                        MARK_POP_KEEP(ctx->lastmark);
954
106
                    LASTMARK_RESTORE();
955
956
106
                    ptr--;
957
106
                    ctx->count--;
958
106
                }
959
2.64M
                if (state->repeat)
960
1.27M
                    MARK_POP_DISCARD(ctx->lastmark);
961
21.5M
            } else {
962
                /* general case */
963
23.7M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
22.9M
                    state->ptr = ptr;
965
22.9M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
22.9M
                            pattern+pattern[0]);
967
22.9M
                    if (ret) {
968
20.7M
                        if (state->repeat)
969
9.25M
                            MARK_POP_DISCARD(ctx->lastmark);
970
20.7M
                        RETURN_ON_ERROR(ret);
971
20.7M
                        RETURN_SUCCESS;
972
20.7M
                    }
973
2.22M
                    if (state->repeat)
974
1.17M
                        MARK_POP_KEEP(ctx->lastmark);
975
2.22M
                    LASTMARK_RESTORE();
976
977
2.22M
                    ptr--;
978
2.22M
                    ctx->count--;
979
2.22M
                }
980
843k
                if (state->repeat)
981
638k
                    MARK_POP_DISCARD(ctx->lastmark);
982
843k
            }
983
3.48M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
5.71M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
5.71M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
5.71M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
5.71M
            ctx->u.rep = repeat_pool_malloc(state);
1127
5.71M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
5.71M
            ctx->u.rep->count = -1;
1131
5.71M
            ctx->u.rep->pattern = pattern;
1132
5.71M
            ctx->u.rep->prev = state->repeat;
1133
5.71M
            ctx->u.rep->last_ptr = NULL;
1134
5.71M
            state->repeat = ctx->u.rep;
1135
1136
5.71M
            state->ptr = ptr;
1137
5.71M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
5.71M
            state->repeat = ctx->u.rep->prev;
1139
5.71M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
5.71M
            if (ret) {
1142
5.61M
                RETURN_ON_ERROR(ret);
1143
5.61M
                RETURN_SUCCESS;
1144
5.61M
            }
1145
100k
            RETURN_FAILURE;
1146
1147
21.5M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
21.5M
            ctx->u.rep = state->repeat;
1155
21.5M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
21.5M
            state->ptr = ptr;
1159
1160
21.5M
            ctx->count = ctx->u.rep->count+1;
1161
1162
21.5M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
21.5M
                   ptr, ctx->count));
1164
1165
21.5M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
21.5M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.92M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
17.5M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
17.5M
                ctx->u.rep->count = ctx->count;
1185
17.5M
                LASTMARK_SAVE();
1186
17.5M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
17.5M
                LAST_PTR_PUSH();
1189
17.5M
                ctx->u.rep->last_ptr = state->ptr;
1190
17.5M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
17.5M
                        ctx->u.rep->pattern+3);
1192
17.5M
                LAST_PTR_POP();
1193
17.5M
                if (ret) {
1194
15.2M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
15.2M
                    RETURN_ON_ERROR(ret);
1196
15.2M
                    RETURN_SUCCESS;
1197
15.2M
                }
1198
2.38M
                MARK_POP(ctx->lastmark);
1199
2.38M
                LASTMARK_RESTORE();
1200
2.38M
                ctx->u.rep->count = ctx->count-1;
1201
2.38M
                state->ptr = ptr;
1202
2.38M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
6.31M
            state->repeat = ctx->u.rep->prev;
1207
6.31M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
6.31M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
6.31M
            RETURN_ON_SUCCESS(ret);
1211
696k
            state->ptr = ptr;
1212
696k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
1.86M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
1.86M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
1.86M
                   ptr, pattern[1]));
1565
1.86M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
1.86M
            state->ptr = ptr - pattern[1];
1568
1.86M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
1.86M
            RETURN_ON_FAILURE(ret);
1570
1.68M
            pattern += pattern[0];
1571
1.68M
            DISPATCH;
1572
1573
5.27M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.27M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.27M
                   ptr, pattern[1]));
1578
5.27M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.27M
                state->ptr = ptr - pattern[1];
1580
5.27M
                LASTMARK_SAVE();
1581
5.27M
                if (state->repeat)
1582
5.27M
                    MARK_PUSH(ctx->lastmark);
1583
1584
10.5M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
10.5M
                if (ret) {
1586
1.09k
                    if (state->repeat)
1587
1.09k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.09k
                    RETURN_ON_ERROR(ret);
1589
1.09k
                    RETURN_FAILURE;
1590
1.09k
                }
1591
5.27M
                if (state->repeat)
1592
5.27M
                    MARK_POP(ctx->lastmark);
1593
5.27M
                LASTMARK_RESTORE();
1594
5.27M
            }
1595
5.27M
            pattern += pattern[0];
1596
5.27M
            DISPATCH;
1597
1598
5.27M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
292M
exit:
1620
292M
    ctx_pos = ctx->last_ctx_pos;
1621
292M
    jump = ctx->jump;
1622
292M
    DATA_POP_DISCARD(ctx);
1623
292M
    if (ctx_pos == -1) {
1624
140M
        state->sigcount = sigcount;
1625
140M
        return ret;
1626
140M
    }
1627
151M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
151M
    switch (jump) {
1630
17.5M
        case JUMP_MAX_UNTIL_2:
1631
17.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
17.5M
            goto jump_max_until_2;
1633
6.31M
        case JUMP_MAX_UNTIL_3:
1634
6.31M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
6.31M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
90.2M
        case JUMP_BRANCH:
1643
90.2M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
90.2M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
5.71M
        case JUMP_REPEAT:
1658
5.71M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
5.71M
            goto jump_repeat;
1660
1.98M
        case JUMP_REPEAT_ONE_1:
1661
1.98M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
1.98M
            goto jump_repeat_one_1;
1663
22.9M
        case JUMP_REPEAT_ONE_2:
1664
22.9M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
22.9M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
1.86M
        case JUMP_ASSERT:
1673
1.86M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
1.86M
            goto jump_assert;
1675
5.27M
        case JUMP_ASSERT_NOT:
1676
5.27M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.27M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
151M
    }
1683
1684
0
    return ret; /* should never get here */
1685
151M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
215M
{
601
215M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
215M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
215M
    Py_ssize_t ret = 0;
604
215M
    int jump;
605
215M
    unsigned int sigcount = state->sigcount;
606
607
215M
    SRE(match_context)* ctx;
608
215M
    SRE(match_context)* nextctx;
609
215M
    INIT_TRACE(state);
610
611
215M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
215M
    DATA_ALLOC(SRE(match_context), ctx);
614
215M
    ctx->last_ctx_pos = -1;
615
215M
    ctx->jump = JUMP_NONE;
616
215M
    ctx->toplevel = toplevel;
617
215M
    ctx_pos = alloc_pos;
618
619
215M
#if USE_COMPUTED_GOTOS
620
215M
#include "sre_targets.h"
621
215M
#endif
622
623
361M
entrance:
624
625
361M
    ;  // Fashion statement.
626
361M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
361M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
13.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
865
            TRACE(("reject (got %tu chars, need %zu)\n",
633
865
                   end - ptr, (size_t) pattern[3]));
634
865
            RETURN_FAILURE;
635
865
        }
636
13.7M
        pattern += pattern[1] + 1;
637
13.7M
    }
638
639
361M
#if USE_COMPUTED_GOTOS
640
361M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
361M
    {
647
648
361M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
183M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
183M
                   ptr, pattern[0]));
653
183M
            {
654
183M
                int i = pattern[0];
655
183M
                if (i & 1)
656
11.3M
                    state->lastindex = i/2 + 1;
657
183M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
182M
                    int j = state->lastmark + 1;
663
183M
                    while (j < i)
664
1.01M
                        state->mark[j++] = NULL;
665
182M
                    state->lastmark = i;
666
182M
                }
667
183M
                state->mark[i] = ptr;
668
183M
            }
669
183M
            pattern++;
670
183M
            DISPATCH;
671
672
183M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
26.4M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
26.4M
                   ptr, *pattern));
677
26.4M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
8.08M
                RETURN_FAILURE;
679
18.3M
            pattern++;
680
18.3M
            ptr++;
681
18.3M
            DISPATCH;
682
683
18.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
63.7M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
63.7M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
63.7M
            if (ctx->toplevel &&
698
10.7M
                ((state->match_all && ptr != state->end) ||
699
10.7M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
63.7M
            state->ptr = ptr;
704
63.7M
            RETURN_SUCCESS;
705
706
771k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
771k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
771k
            if (!SRE(at)(state, ptr, *pattern))
711
741k
                RETURN_FAILURE;
712
29.8k
            pattern++;
713
29.8k
            DISPATCH;
714
715
29.8k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
76.4M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
76.4M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
76.4M
            if (ptr >= end ||
749
76.4M
                !SRE(charset)(state, pattern + 1, *ptr))
750
5.43M
                RETURN_FAILURE;
751
71.0M
            pattern += pattern[0];
752
71.0M
            ptr++;
753
71.0M
            DISPATCH;
754
755
71.0M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
3.32M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
3.32M
                   pattern, ptr, pattern[0]));
758
3.32M
            if (ptr >= end ||
759
3.32M
                sre_lower_ascii(*ptr) != *pattern)
760
77.0k
                RETURN_FAILURE;
761
3.25M
            pattern++;
762
3.25M
            ptr++;
763
3.25M
            DISPATCH;
764
765
3.25M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
19.2M
        TARGET(SRE_OP_JUMP):
845
19.2M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
19.2M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
19.2M
                   ptr, pattern[0]));
850
19.2M
            pattern += pattern[0];
851
19.2M
            DISPATCH;
852
853
26.0M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
26.0M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
26.0M
            LASTMARK_SAVE();
858
26.0M
            if (state->repeat)
859
22.6M
                MARK_PUSH(ctx->lastmark);
860
53.1M
            for (; pattern[0]; pattern += pattern[0]) {
861
46.0M
                if (pattern[1] == SRE_OP_LITERAL &&
862
20.6M
                    (ptr >= end ||
863
20.6M
                     (SRE_CODE) *ptr != pattern[2]))
864
9.91M
                    continue;
865
36.1M
                if (pattern[1] == SRE_OP_IN &&
866
20.5M
                    (ptr >= end ||
867
20.5M
                     !SRE(charset)(state, pattern + 3,
868
20.5M
                                   (SRE_CODE) *ptr)))
869
13.2M
                    continue;
870
22.8M
                state->ptr = ptr;
871
22.8M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
22.8M
                if (ret) {
873
18.8M
                    if (state->repeat)
874
17.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
18.8M
                    RETURN_ON_ERROR(ret);
876
18.8M
                    RETURN_SUCCESS;
877
18.8M
                }
878
4.01M
                if (state->repeat)
879
2.09k
                    MARK_POP_KEEP(ctx->lastmark);
880
4.01M
                LASTMARK_RESTORE();
881
4.01M
            }
882
7.17M
            if (state->repeat)
883
5.51M
                MARK_POP_DISCARD(ctx->lastmark);
884
7.17M
            RETURN_FAILURE;
885
886
199M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
199M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
199M
                   pattern[1], pattern[2]));
898
899
199M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
189k
                RETURN_FAILURE; /* cannot match */
901
902
199M
            state->ptr = ptr;
903
904
199M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
199M
            RETURN_ON_ERROR(ret);
906
199M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
199M
            ctx->count = ret;
908
199M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
199M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
159M
                RETURN_FAILURE;
917
918
40.1M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
3.60M
                ptr == state->end &&
920
17.5k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
17.5k
            {
922
                /* tail is empty.  we're finished */
923
17.5k
                state->ptr = ptr;
924
17.5k
                RETURN_SUCCESS;
925
17.5k
            }
926
927
40.1M
            LASTMARK_SAVE();
928
40.1M
            if (state->repeat)
929
23.5M
                MARK_PUSH(ctx->lastmark);
930
931
40.1M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
10.5M
                ctx->u.chr = pattern[pattern[0]+1];
935
10.5M
                for (;;) {
936
22.6M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
16.0M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
12.1M
                        ptr--;
939
12.1M
                        ctx->count--;
940
12.1M
                    }
941
10.5M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
6.55M
                        break;
943
3.95M
                    state->ptr = ptr;
944
3.95M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.95M
                            pattern+pattern[0]);
946
3.95M
                    if (ret) {
947
3.95M
                        if (state->repeat)
948
3.92M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.95M
                        RETURN_ON_ERROR(ret);
950
3.95M
                        RETURN_SUCCESS;
951
3.95M
                    }
952
201
                    if (state->repeat)
953
201
                        MARK_POP_KEEP(ctx->lastmark);
954
201
                    LASTMARK_RESTORE();
955
956
201
                    ptr--;
957
201
                    ctx->count--;
958
201
                }
959
6.55M
                if (state->repeat)
960
6.54M
                    MARK_POP_DISCARD(ctx->lastmark);
961
29.6M
            } else {
962
                /* general case */
963
30.4M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
30.1M
                    state->ptr = ptr;
965
30.1M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
30.1M
                            pattern+pattern[0]);
967
30.1M
                    if (ret) {
968
29.2M
                        if (state->repeat)
969
12.7M
                            MARK_POP_DISCARD(ctx->lastmark);
970
29.2M
                        RETURN_ON_ERROR(ret);
971
29.2M
                        RETURN_SUCCESS;
972
29.2M
                    }
973
823k
                    if (state->repeat)
974
656k
                        MARK_POP_KEEP(ctx->lastmark);
975
823k
                    LASTMARK_RESTORE();
976
977
823k
                    ptr--;
978
823k
                    ctx->count--;
979
823k
                }
980
330k
                if (state->repeat)
981
328k
                    MARK_POP_DISCARD(ctx->lastmark);
982
330k
            }
983
6.88M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
15.9M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
15.9M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
15.9M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
15.9M
            ctx->u.rep = repeat_pool_malloc(state);
1127
15.9M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
15.9M
            ctx->u.rep->count = -1;
1131
15.9M
            ctx->u.rep->pattern = pattern;
1132
15.9M
            ctx->u.rep->prev = state->repeat;
1133
15.9M
            ctx->u.rep->last_ptr = NULL;
1134
15.9M
            state->repeat = ctx->u.rep;
1135
1136
15.9M
            state->ptr = ptr;
1137
15.9M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
15.9M
            state->repeat = ctx->u.rep->prev;
1139
15.9M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
15.9M
            if (ret) {
1142
15.9M
                RETURN_ON_ERROR(ret);
1143
15.9M
                RETURN_SUCCESS;
1144
15.9M
            }
1145
1.16k
            RETURN_FAILURE;
1146
1147
38.3M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
38.3M
            ctx->u.rep = state->repeat;
1155
38.3M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
38.3M
            state->ptr = ptr;
1159
1160
38.3M
            ctx->count = ctx->u.rep->count+1;
1161
1162
38.3M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
38.3M
                   ptr, ctx->count));
1164
1165
38.3M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
38.3M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
2.60M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
35.7M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
35.7M
                ctx->u.rep->count = ctx->count;
1185
35.7M
                LASTMARK_SAVE();
1186
35.7M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
35.7M
                LAST_PTR_PUSH();
1189
35.7M
                ctx->u.rep->last_ptr = state->ptr;
1190
35.7M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
35.7M
                        ctx->u.rep->pattern+3);
1192
35.7M
                LAST_PTR_POP();
1193
35.7M
                if (ret) {
1194
22.1M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
22.1M
                    RETURN_ON_ERROR(ret);
1196
22.1M
                    RETURN_SUCCESS;
1197
22.1M
                }
1198
13.6M
                MARK_POP(ctx->lastmark);
1199
13.6M
                LASTMARK_RESTORE();
1200
13.6M
                ctx->u.rep->count = ctx->count-1;
1201
13.6M
                state->ptr = ptr;
1202
13.6M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
16.2M
            state->repeat = ctx->u.rep->prev;
1207
16.2M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
16.2M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
16.2M
            RETURN_ON_SUCCESS(ret);
1211
329k
            state->ptr = ptr;
1212
329k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
12.2M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
12.2M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
12.2M
                   ptr, pattern[1]));
1565
12.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
12.2M
            state->ptr = ptr - pattern[1];
1568
12.2M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
12.2M
            RETURN_ON_FAILURE(ret);
1570
9.19M
            pattern += pattern[0];
1571
9.19M
            DISPATCH;
1572
1573
9.19M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
8.20M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
8.20M
                   ptr, pattern[1]));
1578
8.20M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
8.20M
                state->ptr = ptr - pattern[1];
1580
8.20M
                LASTMARK_SAVE();
1581
8.20M
                if (state->repeat)
1582
8.20M
                    MARK_PUSH(ctx->lastmark);
1583
1584
16.4M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
16.4M
                if (ret) {
1586
1.86k
                    if (state->repeat)
1587
1.86k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.86k
                    RETURN_ON_ERROR(ret);
1589
1.86k
                    RETURN_FAILURE;
1590
1.86k
                }
1591
8.20M
                if (state->repeat)
1592
8.20M
                    MARK_POP(ctx->lastmark);
1593
8.20M
                LASTMARK_RESTORE();
1594
8.20M
            }
1595
8.20M
            pattern += pattern[0];
1596
8.20M
            DISPATCH;
1597
1598
8.20M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
361M
exit:
1620
361M
    ctx_pos = ctx->last_ctx_pos;
1621
361M
    jump = ctx->jump;
1622
361M
    DATA_POP_DISCARD(ctx);
1623
361M
    if (ctx_pos == -1) {
1624
215M
        state->sigcount = sigcount;
1625
215M
        return ret;
1626
215M
    }
1627
145M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
145M
    switch (jump) {
1630
35.7M
        case JUMP_MAX_UNTIL_2:
1631
35.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
35.7M
            goto jump_max_until_2;
1633
16.2M
        case JUMP_MAX_UNTIL_3:
1634
16.2M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
16.2M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
22.8M
        case JUMP_BRANCH:
1643
22.8M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
22.8M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
15.9M
        case JUMP_REPEAT:
1658
15.9M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
15.9M
            goto jump_repeat;
1660
3.95M
        case JUMP_REPEAT_ONE_1:
1661
3.95M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.95M
            goto jump_repeat_one_1;
1663
30.1M
        case JUMP_REPEAT_ONE_2:
1664
30.1M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
30.1M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
12.2M
        case JUMP_ASSERT:
1673
12.2M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
12.2M
            goto jump_assert;
1675
8.20M
        case JUMP_ASSERT_NOT:
1676
8.20M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
8.20M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
145M
    }
1683
1684
0
    return ret; /* should never get here */
1685
145M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
85.3M
{
601
85.3M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
85.3M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
85.3M
    Py_ssize_t ret = 0;
604
85.3M
    int jump;
605
85.3M
    unsigned int sigcount = state->sigcount;
606
607
85.3M
    SRE(match_context)* ctx;
608
85.3M
    SRE(match_context)* nextctx;
609
85.3M
    INIT_TRACE(state);
610
611
85.3M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
85.3M
    DATA_ALLOC(SRE(match_context), ctx);
614
85.3M
    ctx->last_ctx_pos = -1;
615
85.3M
    ctx->jump = JUMP_NONE;
616
85.3M
    ctx->toplevel = toplevel;
617
85.3M
    ctx_pos = alloc_pos;
618
619
85.3M
#if USE_COMPUTED_GOTOS
620
85.3M
#include "sre_targets.h"
621
85.3M
#endif
622
623
349M
entrance:
624
625
349M
    ;  // Fashion statement.
626
349M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
349M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
13.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
369
            TRACE(("reject (got %tu chars, need %zu)\n",
633
369
                   end - ptr, (size_t) pattern[3]));
634
369
            RETURN_FAILURE;
635
369
        }
636
13.5M
        pattern += pattern[1] + 1;
637
13.5M
    }
638
639
349M
#if USE_COMPUTED_GOTOS
640
349M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
349M
    {
647
648
349M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
111M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
111M
                   ptr, pattern[0]));
653
111M
            {
654
111M
                int i = pattern[0];
655
111M
                if (i & 1)
656
18.9M
                    state->lastindex = i/2 + 1;
657
111M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
109M
                    int j = state->lastmark + 1;
663
111M
                    while (j < i)
664
1.54M
                        state->mark[j++] = NULL;
665
109M
                    state->lastmark = i;
666
109M
                }
667
111M
                state->mark[i] = ptr;
668
111M
            }
669
111M
            pattern++;
670
111M
            DISPATCH;
671
672
111M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
32.3M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
32.3M
                   ptr, *pattern));
677
32.3M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
11.4M
                RETURN_FAILURE;
679
20.8M
            pattern++;
680
20.8M
            ptr++;
681
20.8M
            DISPATCH;
682
683
20.8M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
43.0M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
43.0M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
43.0M
            if (ctx->toplevel &&
698
12.9M
                ((state->match_all && ptr != state->end) ||
699
12.9M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
43.0M
            state->ptr = ptr;
704
43.0M
            RETURN_SUCCESS;
705
706
567k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
567k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
567k
            if (!SRE(at)(state, ptr, *pattern))
711
562k
                RETURN_FAILURE;
712
4.12k
            pattern++;
713
4.12k
            DISPATCH;
714
715
4.12k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
66.4M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
66.4M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
66.4M
            if (ptr >= end ||
749
66.4M
                !SRE(charset)(state, pattern + 1, *ptr))
750
2.39M
                RETURN_FAILURE;
751
64.0M
            pattern += pattern[0];
752
64.0M
            ptr++;
753
64.0M
            DISPATCH;
754
755
64.0M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.85M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.85M
                   pattern, ptr, pattern[0]));
758
1.85M
            if (ptr >= end ||
759
1.85M
                sre_lower_ascii(*ptr) != *pattern)
760
25.6k
                RETURN_FAILURE;
761
1.83M
            pattern++;
762
1.83M
            ptr++;
763
1.83M
            DISPATCH;
764
765
1.83M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
29.7M
        TARGET(SRE_OP_JUMP):
845
29.7M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
29.7M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
29.7M
                   ptr, pattern[0]));
850
29.7M
            pattern += pattern[0];
851
29.7M
            DISPATCH;
852
853
41.4M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
41.4M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
41.4M
            LASTMARK_SAVE();
858
41.4M
            if (state->repeat)
859
37.7M
                MARK_PUSH(ctx->lastmark);
860
86.6M
            for (; pattern[0]; pattern += pattern[0]) {
861
74.4M
                if (pattern[1] == SRE_OP_LITERAL &&
862
36.0M
                    (ptr >= end ||
863
36.0M
                     (SRE_CODE) *ptr != pattern[2]))
864
21.7M
                    continue;
865
52.7M
                if (pattern[1] == SRE_OP_IN &&
866
34.5M
                    (ptr >= end ||
867
34.5M
                     !SRE(charset)(state, pattern + 3,
868
34.5M
                                   (SRE_CODE) *ptr)))
869
22.5M
                    continue;
870
30.1M
                state->ptr = ptr;
871
30.1M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
30.1M
                if (ret) {
873
29.2M
                    if (state->repeat)
874
26.2M
                        MARK_POP_DISCARD(ctx->lastmark);
875
29.2M
                    RETURN_ON_ERROR(ret);
876
29.2M
                    RETURN_SUCCESS;
877
29.2M
                }
878
904k
                if (state->repeat)
879
11.6k
                    MARK_POP_KEEP(ctx->lastmark);
880
904k
                LASTMARK_RESTORE();
881
904k
            }
882
12.1M
            if (state->repeat)
883
11.5M
                MARK_POP_DISCARD(ctx->lastmark);
884
12.1M
            RETURN_FAILURE;
885
886
139M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
139M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
139M
                   pattern[1], pattern[2]));
898
899
139M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
21.6k
                RETURN_FAILURE; /* cannot match */
901
902
139M
            state->ptr = ptr;
903
904
139M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
139M
            RETURN_ON_ERROR(ret);
906
139M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
139M
            ctx->count = ret;
908
139M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
139M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
64.5M
                RETURN_FAILURE;
917
918
74.5M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.05M
                ptr == state->end &&
920
3.53k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.53k
            {
922
                /* tail is empty.  we're finished */
923
3.53k
                state->ptr = ptr;
924
3.53k
                RETURN_SUCCESS;
925
3.53k
            }
926
927
74.5M
            LASTMARK_SAVE();
928
74.5M
            if (state->repeat)
929
54.8M
                MARK_PUSH(ctx->lastmark);
930
931
74.5M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
25.7M
                ctx->u.chr = pattern[pattern[0]+1];
935
25.7M
                for (;;) {
936
62.7M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
42.6M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
37.0M
                        ptr--;
939
37.0M
                        ctx->count--;
940
37.0M
                    }
941
25.7M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
20.1M
                        break;
943
5.58M
                    state->ptr = ptr;
944
5.58M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
5.58M
                            pattern+pattern[0]);
946
5.58M
                    if (ret) {
947
5.58M
                        if (state->repeat)
948
5.57M
                            MARK_POP_DISCARD(ctx->lastmark);
949
5.58M
                        RETURN_ON_ERROR(ret);
950
5.58M
                        RETURN_SUCCESS;
951
5.58M
                    }
952
240
                    if (state->repeat)
953
240
                        MARK_POP_KEEP(ctx->lastmark);
954
240
                    LASTMARK_RESTORE();
955
956
240
                    ptr--;
957
240
                    ctx->count--;
958
240
                }
959
20.1M
                if (state->repeat)
960
20.1M
                    MARK_POP_DISCARD(ctx->lastmark);
961
48.8M
            } else {
962
                /* general case */
963
49.4M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
49.3M
                    state->ptr = ptr;
965
49.3M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
49.3M
                            pattern+pattern[0]);
967
49.3M
                    if (ret) {
968
48.7M
                        if (state->repeat)
969
29.0M
                            MARK_POP_DISCARD(ctx->lastmark);
970
48.7M
                        RETURN_ON_ERROR(ret);
971
48.7M
                        RETURN_SUCCESS;
972
48.7M
                    }
973
599k
                    if (state->repeat)
974
162k
                        MARK_POP_KEEP(ctx->lastmark);
975
599k
                    LASTMARK_RESTORE();
976
977
599k
                    ptr--;
978
599k
                    ctx->count--;
979
599k
                }
980
82.1k
                if (state->repeat)
981
81.4k
                    MARK_POP_DISCARD(ctx->lastmark);
982
82.1k
            }
983
20.2M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
35.4M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
35.4M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
35.4M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
35.4M
            ctx->u.rep = repeat_pool_malloc(state);
1127
35.4M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
35.4M
            ctx->u.rep->count = -1;
1131
35.4M
            ctx->u.rep->pattern = pattern;
1132
35.4M
            ctx->u.rep->prev = state->repeat;
1133
35.4M
            ctx->u.rep->last_ptr = NULL;
1134
35.4M
            state->repeat = ctx->u.rep;
1135
1136
35.4M
            state->ptr = ptr;
1137
35.4M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
35.4M
            state->repeat = ctx->u.rep->prev;
1139
35.4M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
35.4M
            if (ret) {
1142
35.4M
                RETURN_ON_ERROR(ret);
1143
35.4M
                RETURN_SUCCESS;
1144
35.4M
            }
1145
741
            RETURN_FAILURE;
1146
1147
74.1M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
74.1M
            ctx->u.rep = state->repeat;
1155
74.1M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
74.1M
            state->ptr = ptr;
1159
1160
74.1M
            ctx->count = ctx->u.rep->count+1;
1161
1162
74.1M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
74.1M
                   ptr, ctx->count));
1164
1165
74.1M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
74.1M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.34M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
70.8M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
70.8M
                ctx->u.rep->count = ctx->count;
1185
70.8M
                LASTMARK_SAVE();
1186
70.8M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
70.8M
                LAST_PTR_PUSH();
1189
70.8M
                ctx->u.rep->last_ptr = state->ptr;
1190
70.8M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
70.8M
                        ctx->u.rep->pattern+3);
1192
70.8M
                LAST_PTR_POP();
1193
70.8M
                if (ret) {
1194
38.6M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
38.6M
                    RETURN_ON_ERROR(ret);
1196
38.6M
                    RETURN_SUCCESS;
1197
38.6M
                }
1198
32.2M
                MARK_POP(ctx->lastmark);
1199
32.2M
                LASTMARK_RESTORE();
1200
32.2M
                ctx->u.rep->count = ctx->count-1;
1201
32.2M
                state->ptr = ptr;
1202
32.2M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
35.5M
            state->repeat = ctx->u.rep->prev;
1207
35.5M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
35.5M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
35.5M
            RETURN_ON_SUCCESS(ret);
1211
81.9k
            state->ptr = ptr;
1212
81.9k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
24.7M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
24.7M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
24.7M
                   ptr, pattern[1]));
1565
24.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
24.7M
            state->ptr = ptr - pattern[1];
1568
24.7M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
24.7M
            RETURN_ON_FAILURE(ret);
1570
23.5M
            pattern += pattern[0];
1571
23.5M
            DISPATCH;
1572
1573
23.5M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
11.9M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
11.9M
                   ptr, pattern[1]));
1578
11.9M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
11.9M
                state->ptr = ptr - pattern[1];
1580
11.9M
                LASTMARK_SAVE();
1581
11.9M
                if (state->repeat)
1582
11.9M
                    MARK_PUSH(ctx->lastmark);
1583
1584
23.8M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
23.8M
                if (ret) {
1586
11.3k
                    if (state->repeat)
1587
11.3k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
11.3k
                    RETURN_ON_ERROR(ret);
1589
11.3k
                    RETURN_FAILURE;
1590
11.3k
                }
1591
11.9M
                if (state->repeat)
1592
11.9M
                    MARK_POP(ctx->lastmark);
1593
11.9M
                LASTMARK_RESTORE();
1594
11.9M
            }
1595
11.9M
            pattern += pattern[0];
1596
11.9M
            DISPATCH;
1597
1598
11.9M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
349M
exit:
1620
349M
    ctx_pos = ctx->last_ctx_pos;
1621
349M
    jump = ctx->jump;
1622
349M
    DATA_POP_DISCARD(ctx);
1623
349M
    if (ctx_pos == -1) {
1624
85.3M
        state->sigcount = sigcount;
1625
85.3M
        return ret;
1626
85.3M
    }
1627
263M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
263M
    switch (jump) {
1630
70.8M
        case JUMP_MAX_UNTIL_2:
1631
70.8M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
70.8M
            goto jump_max_until_2;
1633
35.5M
        case JUMP_MAX_UNTIL_3:
1634
35.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
35.5M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
30.1M
        case JUMP_BRANCH:
1643
30.1M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
30.1M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
35.4M
        case JUMP_REPEAT:
1658
35.4M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
35.4M
            goto jump_repeat;
1660
5.58M
        case JUMP_REPEAT_ONE_1:
1661
5.58M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
5.58M
            goto jump_repeat_one_1;
1663
49.3M
        case JUMP_REPEAT_ONE_2:
1664
49.3M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
49.3M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
24.7M
        case JUMP_ASSERT:
1673
24.7M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
24.7M
            goto jump_assert;
1675
11.9M
        case JUMP_ASSERT_NOT:
1676
11.9M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
11.9M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
263M
    }
1683
1684
0
    return ret; /* should never get here */
1685
263M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
306M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
84.6M
{
1694
84.6M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
84.6M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
84.6M
    Py_ssize_t status = 0;
1697
84.6M
    Py_ssize_t prefix_len = 0;
1698
84.6M
    Py_ssize_t prefix_skip = 0;
1699
84.6M
    SRE_CODE* prefix = NULL;
1700
84.6M
    SRE_CODE* charset = NULL;
1701
84.6M
    SRE_CODE* overlap = NULL;
1702
84.6M
    int flags = 0;
1703
84.6M
    INIT_TRACE(state);
1704
1705
84.6M
    if (ptr > end)
1706
0
        return 0;
1707
1708
84.6M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
84.6M
        flags = pattern[2];
1713
1714
84.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.82M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.82M
                   end - ptr, (size_t) pattern[3]));
1717
1.82M
            return 0;
1718
1.82M
        }
1719
82.7M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
7.42M
            end -= pattern[3] - 1;
1723
7.42M
            if (end <= ptr)
1724
0
                end = ptr;
1725
7.42M
        }
1726
1727
82.7M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
7.43M
            prefix_len = pattern[5];
1731
7.43M
            prefix_skip = pattern[6];
1732
7.43M
            prefix = pattern + 7;
1733
7.43M
            overlap = prefix + prefix_len - 1;
1734
75.3M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
70.3M
            charset = pattern + 5;
1738
1739
82.7M
        pattern += 1 + pattern[1];
1740
82.7M
    }
1741
1742
82.7M
    TRACE(("prefix = %p %zd %zd\n",
1743
82.7M
           prefix, prefix_len, prefix_skip));
1744
82.7M
    TRACE(("charset = %p\n", charset));
1745
1746
82.7M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
6.88M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
3.83M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
3.83M
#endif
1753
3.83M
        end = (SRE_CHAR *)state->end;
1754
3.83M
        state->must_advance = 0;
1755
7.40M
        while (ptr < end) {
1756
94.1M
            while (*ptr != c) {
1757
87.2M
                if (++ptr >= end)
1758
450k
                    return 0;
1759
87.2M
            }
1760
6.93M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
6.93M
            state->start = ptr;
1762
6.93M
            state->ptr = ptr + prefix_skip;
1763
6.93M
            if (flags & SRE_INFO_LITERAL)
1764
3.75k
                return 1; /* we got all of it */
1765
6.92M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
6.92M
            if (status != 0)
1767
6.40M
                return status;
1768
518k
            ++ptr;
1769
518k
            RESET_CAPTURE_GROUP();
1770
518k
        }
1771
23.2k
        return 0;
1772
3.83M
    }
1773
1774
75.8M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
542k
        Py_ssize_t i = 0;
1778
1779
542k
        end = (SRE_CHAR *)state->end;
1780
542k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.27M
        for (i = 0; i < prefix_len; i++)
1784
848k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
424k
#endif
1787
1.36M
        while (ptr < end) {
1788
1.36M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
7.56M
            while (*ptr++ != c) {
1790
6.20M
                if (ptr >= end)
1791
310
                    return 0;
1792
6.20M
            }
1793
1.36M
            if (ptr >= end)
1794
57
                return 0;
1795
1796
1.36M
            i = 1;
1797
1.36M
            state->must_advance = 0;
1798
1.36M
            do {
1799
1.36M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.24M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.24M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.24M
                    state->start = ptr - (prefix_len - 1);
1808
1.24M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.24M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.24M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.24M
                    if (status != 0)
1813
542k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
707k
                    if (++ptr >= end)
1816
33
                        return 0;
1817
707k
                    RESET_CAPTURE_GROUP();
1818
707k
                }
1819
820k
                i = overlap[i];
1820
820k
            } while (i != 0);
1821
1.36M
        }
1822
0
        return 0;
1823
542k
    }
1824
1825
75.3M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
70.3M
        end = (SRE_CHAR *)state->end;
1828
70.3M
        state->must_advance = 0;
1829
72.8M
        for (;;) {
1830
331M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
258M
                ptr++;
1832
72.8M
            if (ptr >= end)
1833
3.83M
                return 0;
1834
69.0M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
69.0M
            state->start = ptr;
1836
69.0M
            state->ptr = ptr;
1837
69.0M
            status = SRE(match)(state, pattern, 0);
1838
69.0M
            if (status != 0)
1839
66.5M
                break;
1840
2.45M
            ptr++;
1841
2.45M
            RESET_CAPTURE_GROUP();
1842
2.45M
        }
1843
70.3M
    } else {
1844
        /* general case */
1845
4.94M
        assert(ptr <= end);
1846
4.94M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
4.94M
        state->start = state->ptr = ptr;
1848
4.94M
        status = SRE(match)(state, pattern, 1);
1849
4.94M
        state->must_advance = 0;
1850
4.94M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
307M
        while (status == 0 && ptr < end) {
1858
302M
            ptr++;
1859
302M
            RESET_CAPTURE_GROUP();
1860
302M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
302M
            state->start = state->ptr = ptr;
1862
302M
            status = SRE(match)(state, pattern, 0);
1863
302M
        }
1864
4.94M
    }
1865
1866
71.5M
    return status;
1867
75.3M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
32.6M
{
1694
32.6M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
32.6M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
32.6M
    Py_ssize_t status = 0;
1697
32.6M
    Py_ssize_t prefix_len = 0;
1698
32.6M
    Py_ssize_t prefix_skip = 0;
1699
32.6M
    SRE_CODE* prefix = NULL;
1700
32.6M
    SRE_CODE* charset = NULL;
1701
32.6M
    SRE_CODE* overlap = NULL;
1702
32.6M
    int flags = 0;
1703
32.6M
    INIT_TRACE(state);
1704
1705
32.6M
    if (ptr > end)
1706
0
        return 0;
1707
1708
32.6M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
32.6M
        flags = pattern[2];
1713
1714
32.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.70M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.70M
                   end - ptr, (size_t) pattern[3]));
1717
1.70M
            return 0;
1718
1.70M
        }
1719
30.9M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.17M
            end -= pattern[3] - 1;
1723
2.17M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.17M
        }
1726
1727
30.9M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.17M
            prefix_len = pattern[5];
1731
2.17M
            prefix_skip = pattern[6];
1732
2.17M
            prefix = pattern + 7;
1733
2.17M
            overlap = prefix + prefix_len - 1;
1734
28.7M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
25.1M
            charset = pattern + 5;
1738
1739
30.9M
        pattern += 1 + pattern[1];
1740
30.9M
    }
1741
1742
30.9M
    TRACE(("prefix = %p %zd %zd\n",
1743
30.9M
           prefix, prefix_len, prefix_skip));
1744
30.9M
    TRACE(("charset = %p\n", charset));
1745
1746
30.9M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.12M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.12M
#if SIZEOF_SRE_CHAR < 4
1750
2.12M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.12M
#endif
1753
2.12M
        end = (SRE_CHAR *)state->end;
1754
2.12M
        state->must_advance = 0;
1755
2.29M
        while (ptr < end) {
1756
28.2M
            while (*ptr != c) {
1757
26.4M
                if (++ptr >= end)
1758
386k
                    return 0;
1759
26.4M
            }
1760
1.89M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.89M
            state->start = ptr;
1762
1.89M
            state->ptr = ptr + prefix_skip;
1763
1.89M
            if (flags & SRE_INFO_LITERAL)
1764
246
                return 1; /* we got all of it */
1765
1.88M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.88M
            if (status != 0)
1767
1.72M
                return status;
1768
169k
            ++ptr;
1769
169k
            RESET_CAPTURE_GROUP();
1770
169k
        }
1771
20.4k
        return 0;
1772
2.12M
    }
1773
1774
28.8M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
49.6k
        Py_ssize_t i = 0;
1778
1779
49.6k
        end = (SRE_CHAR *)state->end;
1780
49.6k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
49.6k
#if SIZEOF_SRE_CHAR < 4
1783
149k
        for (i = 0; i < prefix_len; i++)
1784
99.3k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
49.6k
#endif
1787
224k
        while (ptr < end) {
1788
224k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
1.55M
            while (*ptr++ != c) {
1790
1.32M
                if (ptr >= end)
1791
52
                    return 0;
1792
1.32M
            }
1793
224k
            if (ptr >= end)
1794
27
                return 0;
1795
1796
224k
            i = 1;
1797
224k
            state->must_advance = 0;
1798
225k
            do {
1799
225k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
179k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
179k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
179k
                    state->start = ptr - (prefix_len - 1);
1808
179k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
179k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
179k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
179k
                    if (status != 0)
1813
49.5k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
130k
                    if (++ptr >= end)
1816
14
                        return 0;
1817
130k
                    RESET_CAPTURE_GROUP();
1818
130k
                }
1819
175k
                i = overlap[i];
1820
175k
            } while (i != 0);
1821
224k
        }
1822
0
        return 0;
1823
49.6k
    }
1824
1825
28.7M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
25.1M
        end = (SRE_CHAR *)state->end;
1828
25.1M
        state->must_advance = 0;
1829
26.6M
        for (;;) {
1830
75.1M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
48.4M
                ptr++;
1832
26.6M
            if (ptr >= end)
1833
2.68M
                return 0;
1834
24.0M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
24.0M
            state->start = ptr;
1836
24.0M
            state->ptr = ptr;
1837
24.0M
            status = SRE(match)(state, pattern, 0);
1838
24.0M
            if (status != 0)
1839
22.4M
                break;
1840
1.55M
            ptr++;
1841
1.55M
            RESET_CAPTURE_GROUP();
1842
1.55M
        }
1843
25.1M
    } else {
1844
        /* general case */
1845
3.63M
        assert(ptr <= end);
1846
3.63M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
3.63M
        state->start = state->ptr = ptr;
1848
3.63M
        status = SRE(match)(state, pattern, 1);
1849
3.63M
        state->must_advance = 0;
1850
3.63M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
84.9M
        while (status == 0 && ptr < end) {
1858
81.3M
            ptr++;
1859
81.3M
            RESET_CAPTURE_GROUP();
1860
81.3M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
81.3M
            state->start = state->ptr = ptr;
1862
81.3M
            status = SRE(match)(state, pattern, 0);
1863
81.3M
        }
1864
3.63M
    }
1865
1866
26.0M
    return status;
1867
28.7M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
45.2M
{
1694
45.2M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
45.2M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
45.2M
    Py_ssize_t status = 0;
1697
45.2M
    Py_ssize_t prefix_len = 0;
1698
45.2M
    Py_ssize_t prefix_skip = 0;
1699
45.2M
    SRE_CODE* prefix = NULL;
1700
45.2M
    SRE_CODE* charset = NULL;
1701
45.2M
    SRE_CODE* overlap = NULL;
1702
45.2M
    int flags = 0;
1703
45.2M
    INIT_TRACE(state);
1704
1705
45.2M
    if (ptr > end)
1706
0
        return 0;
1707
1708
45.2M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
45.2M
        flags = pattern[2];
1713
1714
45.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
121k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
121k
                   end - ptr, (size_t) pattern[3]));
1717
121k
            return 0;
1718
121k
        }
1719
45.1M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.07M
            end -= pattern[3] - 1;
1723
2.07M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.07M
        }
1726
1727
45.1M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.08M
            prefix_len = pattern[5];
1731
2.08M
            prefix_skip = pattern[6];
1732
2.08M
            prefix = pattern + 7;
1733
2.08M
            overlap = prefix + prefix_len - 1;
1734
43.0M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
41.9M
            charset = pattern + 5;
1738
1739
45.1M
        pattern += 1 + pattern[1];
1740
45.1M
    }
1741
1742
45.1M
    TRACE(("prefix = %p %zd %zd\n",
1743
45.1M
           prefix, prefix_len, prefix_skip));
1744
45.1M
    TRACE(("charset = %p\n", charset));
1745
1746
45.1M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.70M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.70M
#if SIZEOF_SRE_CHAR < 4
1750
1.70M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.70M
#endif
1753
1.70M
        end = (SRE_CHAR *)state->end;
1754
1.70M
        state->must_advance = 0;
1755
1.88M
        while (ptr < end) {
1756
43.8M
            while (*ptr != c) {
1757
42.0M
                if (++ptr >= end)
1758
59.7k
                    return 0;
1759
42.0M
            }
1760
1.81M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.81M
            state->start = ptr;
1762
1.81M
            state->ptr = ptr + prefix_skip;
1763
1.81M
            if (flags & SRE_INFO_LITERAL)
1764
1.22k
                return 1; /* we got all of it */
1765
1.81M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.81M
            if (status != 0)
1767
1.64M
                return status;
1768
173k
            ++ptr;
1769
173k
            RESET_CAPTURE_GROUP();
1770
173k
        }
1771
1.87k
        return 0;
1772
1.70M
    }
1773
1774
43.4M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
374k
        Py_ssize_t i = 0;
1778
1779
374k
        end = (SRE_CHAR *)state->end;
1780
374k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
374k
#if SIZEOF_SRE_CHAR < 4
1783
1.12M
        for (i = 0; i < prefix_len; i++)
1784
748k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
374k
#endif
1787
730k
        while (ptr < end) {
1788
730k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.06M
            while (*ptr++ != c) {
1790
2.33M
                if (ptr >= end)
1791
111
                    return 0;
1792
2.33M
            }
1793
730k
            if (ptr >= end)
1794
15
                return 0;
1795
1796
730k
            i = 1;
1797
730k
            state->must_advance = 0;
1798
730k
            do {
1799
730k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
686k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
686k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
686k
                    state->start = ptr - (prefix_len - 1);
1808
686k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
686k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
686k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
686k
                    if (status != 0)
1813
374k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
311k
                    if (++ptr >= end)
1816
9
                        return 0;
1817
311k
                    RESET_CAPTURE_GROUP();
1818
311k
                }
1819
356k
                i = overlap[i];
1820
356k
            } while (i != 0);
1821
730k
        }
1822
0
        return 0;
1823
374k
    }
1824
1825
43.0M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
41.9M
        end = (SRE_CHAR *)state->end;
1828
41.9M
        state->must_advance = 0;
1829
42.3M
        for (;;) {
1830
189M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
147M
                ptr++;
1832
42.3M
            if (ptr >= end)
1833
1.09M
                return 0;
1834
41.2M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
41.2M
            state->start = ptr;
1836
41.2M
            state->ptr = ptr;
1837
41.2M
            status = SRE(match)(state, pattern, 0);
1838
41.2M
            if (status != 0)
1839
40.8M
                break;
1840
412k
            ptr++;
1841
412k
            RESET_CAPTURE_GROUP();
1842
412k
        }
1843
41.9M
    } else {
1844
        /* general case */
1845
1.10M
        assert(ptr <= end);
1846
1.10M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
1.10M
        state->start = state->ptr = ptr;
1848
1.10M
        status = SRE(match)(state, pattern, 1);
1849
1.10M
        state->must_advance = 0;
1850
1.10M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
158M
        while (status == 0 && ptr < end) {
1858
157M
            ptr++;
1859
157M
            RESET_CAPTURE_GROUP();
1860
157M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
157M
            state->start = state->ptr = ptr;
1862
157M
            status = SRE(match)(state, pattern, 0);
1863
157M
        }
1864
1.10M
    }
1865
1866
41.9M
    return status;
1867
43.0M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
6.70M
{
1694
6.70M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
6.70M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
6.70M
    Py_ssize_t status = 0;
1697
6.70M
    Py_ssize_t prefix_len = 0;
1698
6.70M
    Py_ssize_t prefix_skip = 0;
1699
6.70M
    SRE_CODE* prefix = NULL;
1700
6.70M
    SRE_CODE* charset = NULL;
1701
6.70M
    SRE_CODE* overlap = NULL;
1702
6.70M
    int flags = 0;
1703
6.70M
    INIT_TRACE(state);
1704
1705
6.70M
    if (ptr > end)
1706
0
        return 0;
1707
1708
6.70M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
6.70M
        flags = pattern[2];
1713
1714
6.70M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.96k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.96k
                   end - ptr, (size_t) pattern[3]));
1717
6.96k
            return 0;
1718
6.96k
        }
1719
6.69M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.16M
            end -= pattern[3] - 1;
1723
3.16M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.16M
        }
1726
1727
6.69M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.17M
            prefix_len = pattern[5];
1731
3.17M
            prefix_skip = pattern[6];
1732
3.17M
            prefix = pattern + 7;
1733
3.17M
            overlap = prefix + prefix_len - 1;
1734
3.52M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.31M
            charset = pattern + 5;
1738
1739
6.69M
        pattern += 1 + pattern[1];
1740
6.69M
    }
1741
1742
6.69M
    TRACE(("prefix = %p %zd %zd\n",
1743
6.69M
           prefix, prefix_len, prefix_skip));
1744
6.69M
    TRACE(("charset = %p\n", charset));
1745
1746
6.69M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.05M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
3.05M
        end = (SRE_CHAR *)state->end;
1754
3.05M
        state->must_advance = 0;
1755
3.22M
        while (ptr < end) {
1756
22.0M
            while (*ptr != c) {
1757
18.7M
                if (++ptr >= end)
1758
4.24k
                    return 0;
1759
18.7M
            }
1760
3.22M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.22M
            state->start = ptr;
1762
3.22M
            state->ptr = ptr + prefix_skip;
1763
3.22M
            if (flags & SRE_INFO_LITERAL)
1764
2.28k
                return 1; /* we got all of it */
1765
3.22M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.22M
            if (status != 0)
1767
3.04M
                return status;
1768
175k
            ++ptr;
1769
175k
            RESET_CAPTURE_GROUP();
1770
175k
        }
1771
866
        return 0;
1772
3.05M
    }
1773
1774
3.64M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
118k
        Py_ssize_t i = 0;
1778
1779
118k
        end = (SRE_CHAR *)state->end;
1780
118k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
406k
        while (ptr < end) {
1788
406k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.94M
            while (*ptr++ != c) {
1790
2.54M
                if (ptr >= end)
1791
147
                    return 0;
1792
2.54M
            }
1793
405k
            if (ptr >= end)
1794
15
                return 0;
1795
1796
405k
            i = 1;
1797
405k
            state->must_advance = 0;
1798
407k
            do {
1799
407k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
383k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
383k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
383k
                    state->start = ptr - (prefix_len - 1);
1808
383k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
383k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
383k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
383k
                    if (status != 0)
1813
118k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
265k
                    if (++ptr >= end)
1816
10
                        return 0;
1817
265k
                    RESET_CAPTURE_GROUP();
1818
265k
                }
1819
288k
                i = overlap[i];
1820
288k
            } while (i != 0);
1821
405k
        }
1822
0
        return 0;
1823
118k
    }
1824
1825
3.52M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.31M
        end = (SRE_CHAR *)state->end;
1828
3.31M
        state->must_advance = 0;
1829
3.79M
        for (;;) {
1830
66.3M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
62.5M
                ptr++;
1832
3.79M
            if (ptr >= end)
1833
52.2k
                return 0;
1834
3.74M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
3.74M
            state->start = ptr;
1836
3.74M
            state->ptr = ptr;
1837
3.74M
            status = SRE(match)(state, pattern, 0);
1838
3.74M
            if (status != 0)
1839
3.26M
                break;
1840
481k
            ptr++;
1841
481k
            RESET_CAPTURE_GROUP();
1842
481k
        }
1843
3.31M
    } else {
1844
        /* general case */
1845
208k
        assert(ptr <= end);
1846
208k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
208k
        state->start = state->ptr = ptr;
1848
208k
        status = SRE(match)(state, pattern, 1);
1849
208k
        state->must_advance = 0;
1850
208k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
64.4M
        while (status == 0 && ptr < end) {
1858
64.2M
            ptr++;
1859
64.2M
            RESET_CAPTURE_GROUP();
1860
64.2M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
64.2M
            state->start = state->ptr = ptr;
1862
64.2M
            status = SRE(match)(state, pattern, 0);
1863
64.2M
        }
1864
208k
    }
1865
1866
3.47M
    return status;
1867
3.52M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/