Coverage Report

Created: 2025-09-04 06:25

/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
13.9M
{
18
    /* check if pointer is at given position */
19
20
13.9M
    Py_ssize_t thisp, thatp;
21
22
13.9M
    switch (at) {
23
24
6.70M
    case SRE_AT_BEGINNING:
25
6.70M
    case SRE_AT_BEGINNING_STRING:
26
6.70M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.74M
    case SRE_AT_END:
33
4.74M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
4.74M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.74M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.47M
    case SRE_AT_END_STRING:
42
2.47M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
13.9M
    }
87
88
0
    return 0;
89
13.9M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
12.6M
{
18
    /* check if pointer is at given position */
19
20
12.6M
    Py_ssize_t thisp, thatp;
21
22
12.6M
    switch (at) {
23
24
6.67M
    case SRE_AT_BEGINNING:
25
6.67M
    case SRE_AT_BEGINNING_STRING:
26
6.67M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.34M
    case SRE_AT_END:
33
4.34M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
4.34M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.34M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.59M
    case SRE_AT_END_STRING:
42
1.59M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
12.6M
    }
87
88
0
    return 0;
89
12.6M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
771k
{
18
    /* check if pointer is at given position */
19
20
771k
    Py_ssize_t thisp, thatp;
21
22
771k
    switch (at) {
23
24
27.6k
    case SRE_AT_BEGINNING:
25
27.6k
    case SRE_AT_BEGINNING_STRING:
26
27.6k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
320k
    case SRE_AT_END:
33
320k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
320k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
320k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
423k
    case SRE_AT_END_STRING:
42
423k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
771k
    }
87
88
0
    return 0;
89
771k
}
sre.c:sre_ucs4_at
Line
Count
Source
17
545k
{
18
    /* check if pointer is at given position */
19
20
545k
    Py_ssize_t thisp, thatp;
21
22
545k
    switch (at) {
23
24
4.55k
    case SRE_AT_BEGINNING:
25
4.55k
    case SRE_AT_BEGINNING_STRING:
26
4.55k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
80.8k
    case SRE_AT_END:
33
80.8k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
80.8k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
80.8k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
460k
    case SRE_AT_END_STRING:
42
460k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
545k
    }
87
88
0
    return 0;
89
545k
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.85G
{
94
    /* check if character is a member of the given set */
95
96
1.85G
    int ok = 1;
97
98
4.01G
    for (;;) {
99
4.01G
        switch (*set++) {
100
101
1.16G
        case SRE_OP_FAILURE:
102
1.16G
            return !ok;
103
104
1.00G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.00G
            if (ch == set[0])
107
4.47M
                return ok;
108
1.00G
            set++;
109
1.00G
            break;
110
111
12.4M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
12.4M
            if (sre_category(set[0], (int) ch))
114
8.65M
                return ok;
115
3.83M
            set++;
116
3.83M
            break;
117
118
1.05G
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
1.05G
            if (ch < 256 &&
121
1.05G
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
464M
                return ok;
123
594M
            set += 256/SRE_CODE_BITS;
124
594M
            break;
125
126
337M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
337M
            if (set[0] <= ch && ch <= set[1])
129
209M
                return ok;
130
127M
            set += 2;
131
127M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
427M
        case SRE_OP_NEGATE:
148
427M
            ok = !ok;
149
427M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
4.01G
        }
175
4.01G
    }
176
1.85G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
336M
{
94
    /* check if character is a member of the given set */
95
96
336M
    int ok = 1;
97
98
700M
    for (;;) {
99
700M
        switch (*set++) {
100
101
187M
        case SRE_OP_FAILURE:
102
187M
            return !ok;
103
104
226M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
226M
            if (ch == set[0])
107
2.17M
                return ok;
108
224M
            set++;
109
224M
            break;
110
111
11.2M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
11.2M
            if (sre_category(set[0], (int) ch))
114
7.39M
                return ok;
115
3.82M
            set++;
116
3.82M
            break;
117
118
78.4M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
78.4M
            if (ch < 256 &&
121
78.4M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
39.4M
                return ok;
123
38.9M
            set += 256/SRE_CODE_BITS;
124
38.9M
            break;
125
126
162M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
162M
            if (set[0] <= ch && ch <= set[1])
129
99.8M
                return ok;
130
62.3M
            set += 2;
131
62.3M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
34.0M
        case SRE_OP_NEGATE:
148
34.0M
            ok = !ok;
149
34.0M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
700M
        }
175
700M
    }
176
336M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
809M
{
94
    /* check if character is a member of the given set */
95
96
809M
    int ok = 1;
97
98
1.84G
    for (;;) {
99
1.84G
        switch (*set++) {
100
101
551M
        case SRE_OP_FAILURE:
102
551M
            return !ok;
103
104
540M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
540M
            if (ch == set[0])
107
1.38M
                return ok;
108
538M
            set++;
109
538M
            break;
110
111
177k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
177k
            if (sre_category(set[0], (int) ch))
114
170k
                return ok;
115
6.96k
            set++;
116
6.96k
            break;
117
118
411M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
411M
            if (ch < 256 &&
121
411M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
156M
                return ok;
123
255M
            set += 256/SRE_CODE_BITS;
124
255M
            break;
125
126
153M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
153M
            if (set[0] <= ch && ch <= set[1])
129
99.8M
                return ok;
130
54.1M
            set += 2;
131
54.1M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
183M
        case SRE_OP_NEGATE:
148
183M
            ok = !ok;
149
183M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.84G
        }
175
1.84G
    }
176
809M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
707M
{
94
    /* check if character is a member of the given set */
95
96
707M
    int ok = 1;
97
98
1.47G
    for (;;) {
99
1.47G
        switch (*set++) {
100
101
426M
        case SRE_OP_FAILURE:
102
426M
            return !ok;
103
104
242M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
242M
            if (ch == set[0])
107
916k
                return ok;
108
241M
            set++;
109
241M
            break;
110
111
1.08M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
1.08M
            if (sre_category(set[0], (int) ch))
114
1.08M
                return ok;
115
847
            set++;
116
847
            break;
117
118
568M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
568M
            if (ch < 256 &&
121
568M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
268M
                return ok;
123
300M
            set += 256/SRE_CODE_BITS;
124
300M
            break;
125
126
21.1M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
21.1M
            if (set[0] <= ch && ch <= set[1])
129
9.72M
                return ok;
130
11.3M
            set += 2;
131
11.3M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
210M
        case SRE_OP_NEGATE:
148
210M
            ok = !ok;
149
210M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.47G
        }
175
1.47G
    }
176
707M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
687M
{
195
687M
    SRE_CODE chr;
196
687M
    SRE_CHAR c;
197
687M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
687M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
687M
    Py_ssize_t i;
200
687M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
687M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
14.5M
        end = ptr + maxcount;
205
206
687M
    switch (pattern[0]) {
207
208
615M
    case SRE_OP_IN:
209
        /* repeated set */
210
615M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
1.01G
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
399M
            ptr++;
213
615M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
65.3M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
65.3M
        chr = pattern[1];
232
65.3M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
65.3M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
62.4M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
62.4M
        else
238
62.4M
#endif
239
69.7M
        while (ptr < end && *ptr == c)
240
4.41M
            ptr++;
241
65.3M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
6.87M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
6.87M
        chr = pattern[1];
270
6.87M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
6.87M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
3.05M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
3.05M
        else
276
3.05M
#endif
277
39.6M
        while (ptr < end && *ptr != c)
278
32.7M
            ptr++;
279
6.87M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
687M
    }
319
320
687M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
687M
           ptr - (SRE_CHAR*) state->ptr));
322
687M
    return ptr - (SRE_CHAR*) state->ptr;
323
687M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
164M
{
195
164M
    SRE_CODE chr;
196
164M
    SRE_CHAR c;
197
164M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
164M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
164M
    Py_ssize_t i;
200
164M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
164M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
3.49M
        end = ptr + maxcount;
205
206
164M
    switch (pattern[0]) {
207
208
108M
    case SRE_OP_IN:
209
        /* repeated set */
210
108M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
213M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
105M
            ptr++;
213
108M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
56.0M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
56.0M
        chr = pattern[1];
232
56.0M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
56.0M
        c = (SRE_CHAR) chr;
234
56.0M
#if SIZEOF_SRE_CHAR < 4
235
56.0M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
56.0M
        else
238
56.0M
#endif
239
57.9M
        while (ptr < end && *ptr == c)
240
1.89M
            ptr++;
241
56.0M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
254k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
254k
        chr = pattern[1];
270
254k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
254k
        c = (SRE_CHAR) chr;
272
254k
#if SIZEOF_SRE_CHAR < 4
273
254k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
254k
        else
276
254k
#endif
277
6.70M
        while (ptr < end && *ptr != c)
278
6.45M
            ptr++;
279
254k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
164M
    }
319
320
164M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
164M
           ptr - (SRE_CHAR*) state->ptr));
322
164M
    return ptr - (SRE_CHAR*) state->ptr;
323
164M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
287M
{
195
287M
    SRE_CODE chr;
196
287M
    SRE_CHAR c;
197
287M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
287M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
287M
    Py_ssize_t i;
200
287M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
287M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
5.11M
        end = ptr + maxcount;
205
206
287M
    switch (pattern[0]) {
207
208
277M
    case SRE_OP_IN:
209
        /* repeated set */
210
277M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
413M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
135M
            ptr++;
213
277M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
6.40M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
6.40M
        chr = pattern[1];
232
6.40M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
6.40M
        c = (SRE_CHAR) chr;
234
6.40M
#if SIZEOF_SRE_CHAR < 4
235
6.40M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
6.40M
        else
238
6.40M
#endif
239
8.69M
        while (ptr < end && *ptr == c)
240
2.29M
            ptr++;
241
6.40M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.79M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.79M
        chr = pattern[1];
270
2.79M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.79M
        c = (SRE_CHAR) chr;
272
2.79M
#if SIZEOF_SRE_CHAR < 4
273
2.79M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
2.79M
        else
276
2.79M
#endif
277
9.41M
        while (ptr < end && *ptr != c)
278
6.61M
            ptr++;
279
2.79M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
287M
    }
319
320
287M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
287M
           ptr - (SRE_CHAR*) state->ptr));
322
287M
    return ptr - (SRE_CHAR*) state->ptr;
323
287M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
236M
{
195
236M
    SRE_CODE chr;
196
236M
    SRE_CHAR c;
197
236M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
236M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
236M
    Py_ssize_t i;
200
236M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
236M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
5.89M
        end = ptr + maxcount;
205
206
236M
    switch (pattern[0]) {
207
208
229M
    case SRE_OP_IN:
209
        /* repeated set */
210
229M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
387M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
158M
            ptr++;
213
229M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
2.85M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
2.85M
        chr = pattern[1];
232
2.85M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
2.85M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
3.07M
        while (ptr < end && *ptr == c)
240
227k
            ptr++;
241
2.85M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
3.82M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
3.82M
        chr = pattern[1];
270
3.82M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
3.82M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
23.5M
        while (ptr < end && *ptr != c)
278
19.6M
            ptr++;
279
3.82M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
236M
    }
319
320
236M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
236M
           ptr - (SRE_CHAR*) state->ptr));
322
236M
    return ptr - (SRE_CHAR*) state->ptr;
323
236M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
910M
    do { \
355
910M
        ctx->lastmark = state->lastmark; \
356
910M
        ctx->lastindex = state->lastindex; \
357
910M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
290M
    do { \
360
290M
        state->lastmark = ctx->lastmark; \
361
290M
        state->lastindex = ctx->lastindex; \
362
290M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
320M
    do { \
366
320M
        TRACE(("push last_ptr: %zd", \
367
320M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
320M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
320M
    } while (0)
370
#define LAST_PTR_POP()  \
371
320M
    do { \
372
320M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
320M
        TRACE(("pop last_ptr: %zd", \
374
320M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
320M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
624M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
1.13G
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.66G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
177M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
118M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.75G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.75G
do { \
390
1.75G
    alloc_pos = state->data_stack_base; \
391
1.75G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.75G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.75G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
183M
        int j = data_stack_grow(state, sizeof(type)); \
395
183M
        if (j < 0) return j; \
396
183M
        if (ctx_pos != -1) \
397
183M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
183M
    } \
399
1.75G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.75G
    state->data_stack_base += sizeof(type); \
401
1.75G
} while (0)
402
403
1.95G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.95G
do { \
405
1.95G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.95G
    ptr = (type*)(state->data_stack+pos); \
407
1.95G
} while (0)
408
409
787M
#define DATA_STACK_PUSH(state, data, size) \
410
787M
do { \
411
787M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
787M
           data, state->data_stack_base, size)); \
413
787M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
87.0k
        int j = data_stack_grow(state, size); \
415
87.0k
        if (j < 0) return j; \
416
87.0k
        if (ctx_pos != -1) \
417
87.0k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
87.0k
    } \
419
787M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
787M
    state->data_stack_base += size; \
421
787M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
481M
#define DATA_STACK_POP(state, data, size, discard) \
427
481M
do { \
428
481M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
481M
           data, state->data_stack_base-size, size)); \
430
481M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
481M
    if (discard) \
432
481M
        state->data_stack_base -= size; \
433
481M
} while (0)
434
435
2.06G
#define DATA_STACK_POP_DISCARD(state, size) \
436
2.06G
do { \
437
2.06G
    TRACE(("discard data from %zd (%zd)\n", \
438
2.06G
           state->data_stack_base-size, size)); \
439
2.06G
    state->data_stack_base -= size; \
440
2.06G
} while(0)
441
442
#define DATA_PUSH(x) \
443
320M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
320M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.75G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.75G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.95G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
776M
    do if (lastmark >= 0) { \
473
466M
        MARK_TRACE("push", (lastmark)); \
474
466M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
466M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
776M
    } while (0)
477
#define MARK_POP(lastmark) \
478
216M
    do if (lastmark >= 0) { \
479
159M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
159M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
159M
        MARK_TRACE("pop", (lastmark)); \
482
216M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.02M
    do if (lastmark >= 0) { \
485
2.02M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
2.02M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
2.02M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.02M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
560M
    do if (lastmark >= 0) { \
491
307M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
307M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
307M
        MARK_TRACE("pop discard", (lastmark)); \
494
560M
    } while (0)
495
496
494M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
320M
#define JUMP_MAX_UNTIL_2     2
499
177M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
176M
#define JUMP_REPEAT          7
504
16.6M
#define JUMP_REPEAT_ONE_1    8
505
223M
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
181M
#define JUMP_BRANCH          11
508
118M
#define JUMP_ASSERT          12
509
51.6M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
1.26G
    ctx->pattern = pattern; \
516
1.26G
    ctx->ptr = ptr; \
517
1.26G
    DATA_ALLOC(SRE(match_context), nextctx); \
518
1.26G
    nextctx->pattern = nextpattern; \
519
1.26G
    nextctx->toplevel = toplevel_; \
520
1.26G
    nextctx->jump = jumpvalue; \
521
1.26G
    nextctx->last_ctx_pos = ctx_pos; \
522
1.26G
    pattern = nextpattern; \
523
1.26G
    ctx_pos = alloc_pos; \
524
1.26G
    ctx = nextctx; \
525
1.26G
    goto entrance; \
526
1.26G
    jumplabel: \
527
1.26G
    pattern = ctx->pattern; \
528
1.26G
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
1.09G
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
169M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
3.05G
    do {                                                           \
553
3.05G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
3.05G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
3.05G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
3.16G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
3.05G
        do {                               \
588
3.05G
            MAYBE_CHECK_SIGNALS;           \
589
3.05G
            goto *sre_targets[*pattern++]; \
590
3.05G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
494M
{
601
494M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
494M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
494M
    Py_ssize_t ret = 0;
604
494M
    int jump;
605
494M
    unsigned int sigcount = state->sigcount;
606
607
494M
    SRE(match_context)* ctx;
608
494M
    SRE(match_context)* nextctx;
609
494M
    INIT_TRACE(state);
610
611
494M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
494M
    DATA_ALLOC(SRE(match_context), ctx);
614
494M
    ctx->last_ctx_pos = -1;
615
494M
    ctx->jump = JUMP_NONE;
616
494M
    ctx->toplevel = toplevel;
617
494M
    ctx_pos = alloc_pos;
618
619
494M
#if USE_COMPUTED_GOTOS
620
494M
#include "sre_targets.h"
621
494M
#endif
622
623
1.75G
entrance:
624
625
1.75G
    ;  // Fashion statement.
626
1.75G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.75G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
95.3M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.12M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.12M
                   end - ptr, (size_t) pattern[3]));
634
5.12M
            RETURN_FAILURE;
635
5.12M
        }
636
90.2M
        pattern += pattern[1] + 1;
637
90.2M
    }
638
639
1.75G
#if USE_COMPUTED_GOTOS
640
1.75G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.75G
    {
647
648
1.75G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
545M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
545M
                   ptr, pattern[0]));
653
545M
            {
654
545M
                int i = pattern[0];
655
545M
                if (i & 1)
656
92.5M
                    state->lastindex = i/2 + 1;
657
545M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
537M
                    int j = state->lastmark + 1;
663
544M
                    while (j < i)
664
7.55M
                        state->mark[j++] = NULL;
665
537M
                    state->lastmark = i;
666
537M
                }
667
545M
                state->mark[i] = ptr;
668
545M
            }
669
545M
            pattern++;
670
545M
            DISPATCH;
671
672
545M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
173M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
173M
                   ptr, *pattern));
677
173M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
64.4M
                RETURN_FAILURE;
679
108M
            pattern++;
680
108M
            ptr++;
681
108M
            DISPATCH;
682
683
108M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
280M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
280M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
280M
            if (ctx->toplevel &&
698
280M
                ((state->match_all && ptr != state->end) ||
699
79.8M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
280M
            state->ptr = ptr;
704
280M
            RETURN_SUCCESS;
705
706
13.9M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
13.9M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
13.9M
            if (!SRE(at)(state, ptr, *pattern))
711
3.59M
                RETURN_FAILURE;
712
10.3M
            pattern++;
713
10.3M
            DISPATCH;
714
715
10.3M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
354M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
354M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
354M
            if (ptr >= end ||
749
354M
                !SRE(charset)(state, pattern + 1, *ptr))
750
8.12M
                RETURN_FAILURE;
751
346M
            pattern += pattern[0];
752
346M
            ptr++;
753
346M
            DISPATCH;
754
755
346M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
7.24M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
7.24M
                   pattern, ptr, pattern[0]));
758
7.24M
            if (ptr >= end ||
759
7.24M
                sre_lower_ascii(*ptr) != *pattern)
760
492k
                RETURN_FAILURE;
761
6.75M
            pattern++;
762
6.75M
            ptr++;
763
6.75M
            DISPATCH;
764
765
6.75M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
113M
        TARGET(SRE_OP_JUMP):
845
113M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
113M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
113M
                   ptr, pattern[0]));
850
113M
            pattern += pattern[0];
851
113M
            DISPATCH;
852
853
195M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
195M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
195M
            LASTMARK_SAVE();
858
195M
            if (state->repeat)
859
150M
                MARK_PUSH(ctx->lastmark);
860
469M
            for (; pattern[0]; pattern += pattern[0]) {
861
385M
                if (pattern[1] == SRE_OP_LITERAL &&
862
385M
                    (ptr >= end ||
863
178M
                     (SRE_CODE) *ptr != pattern[2]))
864
95.8M
                    continue;
865
290M
                if (pattern[1] == SRE_OP_IN &&
866
290M
                    (ptr >= end ||
867
143M
                     !SRE(charset)(state, pattern + 3,
868
143M
                                   (SRE_CODE) *ptr)))
869
108M
                    continue;
870
181M
                state->ptr = ptr;
871
181M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
181M
                if (ret) {
873
111M
                    if (state->repeat)
874
93.5M
                        MARK_POP_DISCARD(ctx->lastmark);
875
111M
                    RETURN_ON_ERROR(ret);
876
111M
                    RETURN_SUCCESS;
877
111M
                }
878
70.0M
                if (state->repeat)
879
29.3k
                    MARK_POP_KEEP(ctx->lastmark);
880
70.0M
                LASTMARK_RESTORE();
881
70.0M
            }
882
84.0M
            if (state->repeat)
883
56.8M
                MARK_POP_DISCARD(ctx->lastmark);
884
84.0M
            RETURN_FAILURE;
885
886
689M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
689M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
689M
                   pattern[1], pattern[2]));
898
899
689M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.09M
                RETURN_FAILURE; /* cannot match */
901
902
687M
            state->ptr = ptr;
903
904
687M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
687M
            RETURN_ON_ERROR(ret);
906
687M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
687M
            ctx->count = ret;
908
687M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
687M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
344M
                RETURN_FAILURE;
917
918
343M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
343M
                ptr == state->end &&
920
343M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
69.8k
            {
922
                /* tail is empty.  we're finished */
923
69.8k
                state->ptr = ptr;
924
69.8k
                RETURN_SUCCESS;
925
69.8k
            }
926
927
343M
            LASTMARK_SAVE();
928
343M
            if (state->repeat)
929
254M
                MARK_PUSH(ctx->lastmark);
930
931
343M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
122M
                ctx->u.chr = pattern[pattern[0]+1];
935
122M
                for (;;) {
936
283M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
283M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
160M
                        ptr--;
939
160M
                        ctx->count--;
940
160M
                    }
941
122M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
106M
                        break;
943
16.6M
                    state->ptr = ptr;
944
16.6M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
16.6M
                            pattern+pattern[0]);
946
16.6M
                    if (ret) {
947
16.6M
                        if (state->repeat)
948
15.0M
                            MARK_POP_DISCARD(ctx->lastmark);
949
16.6M
                        RETURN_ON_ERROR(ret);
950
16.6M
                        RETURN_SUCCESS;
951
16.6M
                    }
952
567
                    if (state->repeat)
953
567
                        MARK_POP_KEEP(ctx->lastmark);
954
567
                    LASTMARK_RESTORE();
955
956
567
                    ptr--;
957
567
                    ctx->count--;
958
567
                }
959
106M
                if (state->repeat)
960
105M
                    MARK_POP_DISCARD(ctx->lastmark);
961
220M
            } else {
962
                /* general case */
963
224M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
223M
                    state->ptr = ptr;
965
223M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
223M
                            pattern+pattern[0]);
967
223M
                    if (ret) {
968
219M
                        if (state->repeat)
969
133M
                            MARK_POP_DISCARD(ctx->lastmark);
970
219M
                        RETURN_ON_ERROR(ret);
971
219M
                        RETURN_SUCCESS;
972
219M
                    }
973
4.32M
                    if (state->repeat)
974
1.99M
                        MARK_POP_KEEP(ctx->lastmark);
975
4.32M
                    LASTMARK_RESTORE();
976
977
4.32M
                    ptr--;
978
4.32M
                    ctx->count--;
979
4.32M
                }
980
1.26M
                if (state->repeat)
981
1.05M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.26M
            }
983
107M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
176M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
176M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
176M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
176M
            ctx->u.rep = repeat_pool_malloc(state);
1127
176M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
176M
            ctx->u.rep->count = -1;
1131
176M
            ctx->u.rep->pattern = pattern;
1132
176M
            ctx->u.rep->prev = state->repeat;
1133
176M
            ctx->u.rep->last_ptr = NULL;
1134
176M
            state->repeat = ctx->u.rep;
1135
1136
176M
            state->ptr = ptr;
1137
176M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
176M
            state->repeat = ctx->u.rep->prev;
1139
176M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
176M
            if (ret) {
1142
176M
                RETURN_ON_ERROR(ret);
1143
176M
                RETURN_SUCCESS;
1144
176M
            }
1145
113k
            RETURN_FAILURE;
1146
1147
332M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
332M
            ctx->u.rep = state->repeat;
1155
332M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
332M
            state->ptr = ptr;
1159
1160
332M
            ctx->count = ctx->u.rep->count+1;
1161
1162
332M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
332M
                   ptr, ctx->count));
1164
1165
332M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
332M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
332M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
332M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
320M
                ctx->u.rep->count = ctx->count;
1185
320M
                LASTMARK_SAVE();
1186
320M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
320M
                LAST_PTR_PUSH();
1189
320M
                ctx->u.rep->last_ptr = state->ptr;
1190
320M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
320M
                        ctx->u.rep->pattern+3);
1192
320M
                LAST_PTR_POP();
1193
320M
                if (ret) {
1194
155M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
155M
                    RETURN_ON_ERROR(ret);
1196
155M
                    RETURN_SUCCESS;
1197
155M
                }
1198
164M
                MARK_POP(ctx->lastmark);
1199
164M
                LASTMARK_RESTORE();
1200
164M
                ctx->u.rep->count = ctx->count-1;
1201
164M
                state->ptr = ptr;
1202
164M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
177M
            state->repeat = ctx->u.rep->prev;
1207
177M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
177M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
177M
            RETURN_ON_SUCCESS(ret);
1211
1.11M
            state->ptr = ptr;
1212
1.11M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
118M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
118M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
118M
                   ptr, pattern[1]));
1565
118M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
118M
            state->ptr = ptr - pattern[1];
1568
118M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
118M
            RETURN_ON_FAILURE(ret);
1570
113M
            pattern += pattern[0];
1571
113M
            DISPATCH;
1572
1573
113M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
51.6M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
51.6M
                   ptr, pattern[1]));
1578
51.6M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
51.6M
                state->ptr = ptr - pattern[1];
1580
51.6M
                LASTMARK_SAVE();
1581
51.6M
                if (state->repeat)
1582
51.6M
                    MARK_PUSH(ctx->lastmark);
1583
1584
103M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
103M
                if (ret) {
1586
23.6k
                    if (state->repeat)
1587
23.6k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
23.6k
                    RETURN_ON_ERROR(ret);
1589
23.6k
                    RETURN_FAILURE;
1590
23.6k
                }
1591
51.6M
                if (state->repeat)
1592
51.6M
                    MARK_POP(ctx->lastmark);
1593
51.6M
                LASTMARK_RESTORE();
1594
51.6M
            }
1595
51.6M
            pattern += pattern[0];
1596
51.6M
            DISPATCH;
1597
1598
51.6M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.75G
exit:
1620
1.75G
    ctx_pos = ctx->last_ctx_pos;
1621
1.75G
    jump = ctx->jump;
1622
1.75G
    DATA_POP_DISCARD(ctx);
1623
1.75G
    if (ctx_pos == -1) {
1624
494M
        state->sigcount = sigcount;
1625
494M
        return ret;
1626
494M
    }
1627
1.26G
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
1.26G
    switch (jump) {
1630
320M
        case JUMP_MAX_UNTIL_2:
1631
320M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
320M
            goto jump_max_until_2;
1633
177M
        case JUMP_MAX_UNTIL_3:
1634
177M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
177M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
181M
        case JUMP_BRANCH:
1643
181M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
181M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
176M
        case JUMP_REPEAT:
1658
176M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
176M
            goto jump_repeat;
1660
16.6M
        case JUMP_REPEAT_ONE_1:
1661
16.6M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
16.6M
            goto jump_repeat_one_1;
1663
223M
        case JUMP_REPEAT_ONE_2:
1664
223M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
223M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
118M
        case JUMP_ASSERT:
1673
118M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
118M
            goto jump_assert;
1675
51.6M
        case JUMP_ASSERT_NOT:
1676
51.6M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
51.6M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
1.26G
    }
1683
1684
0
    return ret; /* should never get here */
1685
1.26G
}
sre.c:sre_ucs1_match
Line
Count
Source
600
166M
{
601
166M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
166M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
166M
    Py_ssize_t ret = 0;
604
166M
    int jump;
605
166M
    unsigned int sigcount = state->sigcount;
606
607
166M
    SRE(match_context)* ctx;
608
166M
    SRE(match_context)* nextctx;
609
166M
    INIT_TRACE(state);
610
611
166M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
166M
    DATA_ALLOC(SRE(match_context), ctx);
614
166M
    ctx->last_ctx_pos = -1;
615
166M
    ctx->jump = JUMP_NONE;
616
166M
    ctx->toplevel = toplevel;
617
166M
    ctx_pos = alloc_pos;
618
619
166M
#if USE_COMPUTED_GOTOS
620
166M
#include "sre_targets.h"
621
166M
#endif
622
623
315M
entrance:
624
625
315M
    ;  // Fashion statement.
626
315M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
315M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
28.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.12M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.12M
                   end - ptr, (size_t) pattern[3]));
634
5.12M
            RETURN_FAILURE;
635
5.12M
        }
636
23.0M
        pattern += pattern[1] + 1;
637
23.0M
    }
638
639
310M
#if USE_COMPUTED_GOTOS
640
310M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
310M
    {
647
648
310M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
145M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
145M
                   ptr, pattern[0]));
653
145M
            {
654
145M
                int i = pattern[0];
655
145M
                if (i & 1)
656
20.4M
                    state->lastindex = i/2 + 1;
657
145M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
142M
                    int j = state->lastmark + 1;
663
146M
                    while (j < i)
664
3.82M
                        state->mark[j++] = NULL;
665
142M
                    state->lastmark = i;
666
142M
                }
667
145M
                state->mark[i] = ptr;
668
145M
            }
669
145M
            pattern++;
670
145M
            DISPATCH;
671
672
145M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
51.7M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
51.7M
                   ptr, *pattern));
677
51.7M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
17.0M
                RETURN_FAILURE;
679
34.7M
            pattern++;
680
34.7M
            ptr++;
681
34.7M
            DISPATCH;
682
683
34.7M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
50.1M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
50.1M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
50.1M
            if (ctx->toplevel &&
698
50.1M
                ((state->match_all && ptr != state->end) ||
699
16.5M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
50.1M
            state->ptr = ptr;
704
50.1M
            RETURN_SUCCESS;
705
706
12.6M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
12.6M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
12.6M
            if (!SRE(at)(state, ptr, *pattern))
711
2.30M
                RETURN_FAILURE;
712
10.3M
            pattern++;
713
10.3M
            DISPATCH;
714
715
10.3M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
36.6M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
36.6M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
36.6M
            if (ptr >= end ||
749
36.6M
                !SRE(charset)(state, pattern + 1, *ptr))
750
444k
                RETURN_FAILURE;
751
36.1M
            pattern += pattern[0];
752
36.1M
            ptr++;
753
36.1M
            DISPATCH;
754
755
36.1M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.72M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.72M
                   pattern, ptr, pattern[0]));
758
1.72M
            if (ptr >= end ||
759
1.72M
                sre_lower_ascii(*ptr) != *pattern)
760
316k
                RETURN_FAILURE;
761
1.40M
            pattern++;
762
1.40M
            ptr++;
763
1.40M
            DISPATCH;
764
765
1.40M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
24.5M
        TARGET(SRE_OP_JUMP):
845
24.5M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
24.5M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
24.5M
                   ptr, pattern[0]));
850
24.5M
            pattern += pattern[0];
851
24.5M
            DISPATCH;
852
853
48.0M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
48.0M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
48.0M
            LASTMARK_SAVE();
858
48.0M
            if (state->repeat)
859
9.65M
                MARK_PUSH(ctx->lastmark);
860
143M
            for (; pattern[0]; pattern += pattern[0]) {
861
118M
                if (pattern[1] == SRE_OP_LITERAL &&
862
118M
                    (ptr >= end ||
863
52.4M
                     (SRE_CODE) *ptr != pattern[2]))
864
23.8M
                    continue;
865
94.4M
                if (pattern[1] == SRE_OP_IN &&
866
94.4M
                    (ptr >= end ||
867
10.6M
                     !SRE(charset)(state, pattern + 3,
868
10.6M
                                   (SRE_CODE) *ptr)))
869
6.03M
                    continue;
870
88.3M
                state->ptr = ptr;
871
88.3M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
88.3M
                if (ret) {
873
23.0M
                    if (state->repeat)
874
9.40M
                        MARK_POP_DISCARD(ctx->lastmark);
875
23.0M
                    RETURN_ON_ERROR(ret);
876
23.0M
                    RETURN_SUCCESS;
877
23.0M
                }
878
65.3M
                if (state->repeat)
879
6.48k
                    MARK_POP_KEEP(ctx->lastmark);
880
65.3M
                LASTMARK_RESTORE();
881
65.3M
            }
882
25.0M
            if (state->repeat)
883
248k
                MARK_POP_DISCARD(ctx->lastmark);
884
25.0M
            RETURN_FAILURE;
885
886
165M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
165M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
165M
                   pattern[1], pattern[2]));
898
899
165M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
917k
                RETURN_FAILURE; /* cannot match */
901
902
164M
            state->ptr = ptr;
903
904
164M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
164M
            RETURN_ON_ERROR(ret);
906
164M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
164M
            ctx->count = ret;
908
164M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
164M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
138M
                RETURN_FAILURE;
917
918
26.6M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
26.6M
                ptr == state->end &&
920
26.6M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
50.0k
            {
922
                /* tail is empty.  we're finished */
923
50.0k
                state->ptr = ptr;
924
50.0k
                RETURN_SUCCESS;
925
50.0k
            }
926
927
26.5M
            LASTMARK_SAVE();
928
26.5M
            if (state->repeat)
929
12.5M
                MARK_PUSH(ctx->lastmark);
930
931
26.5M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
4.87M
                ctx->u.chr = pattern[pattern[0]+1];
935
4.87M
                for (;;) {
936
17.0M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
17.0M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
12.1M
                        ptr--;
939
12.1M
                        ctx->count--;
940
12.1M
                    }
941
4.87M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.74M
                        break;
943
2.12M
                    state->ptr = ptr;
944
2.12M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.12M
                            pattern+pattern[0]);
946
2.12M
                    if (ret) {
947
2.12M
                        if (state->repeat)
948
585k
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.12M
                        RETURN_ON_ERROR(ret);
950
2.12M
                        RETURN_SUCCESS;
951
2.12M
                    }
952
115
                    if (state->repeat)
953
115
                        MARK_POP_KEEP(ctx->lastmark);
954
115
                    LASTMARK_RESTORE();
955
956
115
                    ptr--;
957
115
                    ctx->count--;
958
115
                }
959
2.74M
                if (state->repeat)
960
1.46M
                    MARK_POP_DISCARD(ctx->lastmark);
961
21.6M
            } else {
962
                /* general case */
963
23.9M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
23.0M
                    state->ptr = ptr;
965
23.0M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
23.0M
                            pattern+pattern[0]);
967
23.0M
                    if (ret) {
968
20.8M
                        if (state->repeat)
969
9.84M
                            MARK_POP_DISCARD(ctx->lastmark);
970
20.8M
                        RETURN_ON_ERROR(ret);
971
20.8M
                        RETURN_SUCCESS;
972
20.8M
                    }
973
2.26M
                    if (state->repeat)
974
1.19M
                        MARK_POP_KEEP(ctx->lastmark);
975
2.26M
                    LASTMARK_RESTORE();
976
977
2.26M
                    ptr--;
978
2.26M
                    ctx->count--;
979
2.26M
                }
980
861k
                if (state->repeat)
981
652k
                    MARK_POP_DISCARD(ctx->lastmark);
982
861k
            }
983
3.60M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
5.86M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
5.86M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
5.86M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
5.86M
            ctx->u.rep = repeat_pool_malloc(state);
1127
5.86M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
5.86M
            ctx->u.rep->count = -1;
1131
5.86M
            ctx->u.rep->pattern = pattern;
1132
5.86M
            ctx->u.rep->prev = state->repeat;
1133
5.86M
            ctx->u.rep->last_ptr = NULL;
1134
5.86M
            state->repeat = ctx->u.rep;
1135
1136
5.86M
            state->ptr = ptr;
1137
5.86M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
5.86M
            state->repeat = ctx->u.rep->prev;
1139
5.86M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
5.86M
            if (ret) {
1142
5.75M
                RETURN_ON_ERROR(ret);
1143
5.75M
                RETURN_SUCCESS;
1144
5.75M
            }
1145
111k
            RETURN_FAILURE;
1146
1147
20.6M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
20.6M
            ctx->u.rep = state->repeat;
1155
20.6M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
20.6M
            state->ptr = ptr;
1159
1160
20.6M
            ctx->count = ctx->u.rep->count+1;
1161
1162
20.6M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
20.6M
                   ptr, ctx->count));
1164
1165
20.6M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
20.6M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
20.6M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
20.6M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
16.5M
                ctx->u.rep->count = ctx->count;
1185
16.5M
                LASTMARK_SAVE();
1186
16.5M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
16.5M
                LAST_PTR_PUSH();
1189
16.5M
                ctx->u.rep->last_ptr = state->ptr;
1190
16.5M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
16.5M
                        ctx->u.rep->pattern+3);
1192
16.5M
                LAST_PTR_POP();
1193
16.5M
                if (ret) {
1194
14.1M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
14.1M
                    RETURN_ON_ERROR(ret);
1196
14.1M
                    RETURN_SUCCESS;
1197
14.1M
                }
1198
2.39M
                MARK_POP(ctx->lastmark);
1199
2.39M
                LASTMARK_RESTORE();
1200
2.39M
                ctx->u.rep->count = ctx->count-1;
1201
2.39M
                state->ptr = ptr;
1202
2.39M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
6.47M
            state->repeat = ctx->u.rep->prev;
1207
6.47M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
6.47M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
6.47M
            RETURN_ON_SUCCESS(ret);
1211
717k
            state->ptr = ptr;
1212
717k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
1.93M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
1.93M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
1.93M
                   ptr, pattern[1]));
1565
1.93M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
1.93M
            state->ptr = ptr - pattern[1];
1568
1.93M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
1.93M
            RETURN_ON_FAILURE(ret);
1570
1.81M
            pattern += pattern[0];
1571
1.81M
            DISPATCH;
1572
1573
4.64M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
4.64M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
4.64M
                   ptr, pattern[1]));
1578
4.64M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
4.64M
                state->ptr = ptr - pattern[1];
1580
4.64M
                LASTMARK_SAVE();
1581
4.64M
                if (state->repeat)
1582
4.64M
                    MARK_PUSH(ctx->lastmark);
1583
1584
9.28M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
9.28M
                if (ret) {
1586
1.32k
                    if (state->repeat)
1587
1.32k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.32k
                    RETURN_ON_ERROR(ret);
1589
1.32k
                    RETURN_FAILURE;
1590
1.32k
                }
1591
4.64M
                if (state->repeat)
1592
4.64M
                    MARK_POP(ctx->lastmark);
1593
4.64M
                LASTMARK_RESTORE();
1594
4.64M
            }
1595
4.64M
            pattern += pattern[0];
1596
4.64M
            DISPATCH;
1597
1598
4.64M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
315M
exit:
1620
315M
    ctx_pos = ctx->last_ctx_pos;
1621
315M
    jump = ctx->jump;
1622
315M
    DATA_POP_DISCARD(ctx);
1623
315M
    if (ctx_pos == -1) {
1624
166M
        state->sigcount = sigcount;
1625
166M
        return ret;
1626
166M
    }
1627
149M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
149M
    switch (jump) {
1630
16.5M
        case JUMP_MAX_UNTIL_2:
1631
16.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
16.5M
            goto jump_max_until_2;
1633
6.47M
        case JUMP_MAX_UNTIL_3:
1634
6.47M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
6.47M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
88.3M
        case JUMP_BRANCH:
1643
88.3M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
88.3M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
5.86M
        case JUMP_REPEAT:
1658
5.86M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
5.86M
            goto jump_repeat;
1660
2.12M
        case JUMP_REPEAT_ONE_1:
1661
2.12M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.12M
            goto jump_repeat_one_1;
1663
23.0M
        case JUMP_REPEAT_ONE_2:
1664
23.0M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
23.0M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
1.93M
        case JUMP_ASSERT:
1673
1.93M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
1.93M
            goto jump_assert;
1675
4.64M
        case JUMP_ASSERT_NOT:
1676
4.64M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
4.64M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
149M
    }
1683
1684
0
    return ret; /* should never get here */
1685
149M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
236M
{
601
236M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
236M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
236M
    Py_ssize_t ret = 0;
604
236M
    int jump;
605
236M
    unsigned int sigcount = state->sigcount;
606
607
236M
    SRE(match_context)* ctx;
608
236M
    SRE(match_context)* nextctx;
609
236M
    INIT_TRACE(state);
610
611
236M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
236M
    DATA_ALLOC(SRE(match_context), ctx);
614
236M
    ctx->last_ctx_pos = -1;
615
236M
    ctx->jump = JUMP_NONE;
616
236M
    ctx->toplevel = toplevel;
617
236M
    ctx_pos = alloc_pos;
618
619
236M
#if USE_COMPUTED_GOTOS
620
236M
#include "sre_targets.h"
621
236M
#endif
622
623
706M
entrance:
624
625
706M
    ;  // Fashion statement.
626
706M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
706M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
32.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
700
            TRACE(("reject (got %tu chars, need %zu)\n",
633
700
                   end - ptr, (size_t) pattern[3]));
634
700
            RETURN_FAILURE;
635
700
        }
636
32.8M
        pattern += pattern[1] + 1;
637
32.8M
    }
638
639
706M
#if USE_COMPUTED_GOTOS
640
706M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
706M
    {
647
648
706M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
234M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
234M
                   ptr, pattern[0]));
653
234M
            {
654
234M
                int i = pattern[0];
655
234M
                if (i & 1)
656
29.8M
                    state->lastindex = i/2 + 1;
657
234M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
232M
                    int j = state->lastmark + 1;
663
234M
                    while (j < i)
664
1.53M
                        state->mark[j++] = NULL;
665
232M
                    state->lastmark = i;
666
232M
                }
667
234M
                state->mark[i] = ptr;
668
234M
            }
669
234M
            pattern++;
670
234M
            DISPATCH;
671
672
234M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
65.1M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
65.1M
                   ptr, *pattern));
677
65.1M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
25.5M
                RETURN_FAILURE;
679
39.5M
            pattern++;
680
39.5M
            ptr++;
681
39.5M
            DISPATCH;
682
683
39.5M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
121M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
121M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
121M
            if (ctx->toplevel &&
698
121M
                ((state->match_all && ptr != state->end) ||
699
29.4M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
121M
            state->ptr = ptr;
704
121M
            RETURN_SUCCESS;
705
706
771k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
771k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
771k
            if (!SRE(at)(state, ptr, *pattern))
711
742k
                RETURN_FAILURE;
712
28.6k
            pattern++;
713
28.6k
            DISPATCH;
714
715
28.6k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
154M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
154M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
154M
            if (ptr >= end ||
749
154M
                !SRE(charset)(state, pattern + 1, *ptr))
750
6.10M
                RETURN_FAILURE;
751
148M
            pattern += pattern[0];
752
148M
            ptr++;
753
148M
            DISPATCH;
754
755
148M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.55M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.55M
                   pattern, ptr, pattern[0]));
758
4.55M
            if (ptr >= end ||
759
4.55M
                sre_lower_ascii(*ptr) != *pattern)
760
156k
                RETURN_FAILURE;
761
4.40M
            pattern++;
762
4.40M
            ptr++;
763
4.40M
            DISPATCH;
764
765
4.40M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
38.9M
        TARGET(SRE_OP_JUMP):
845
38.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
38.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
38.9M
                   ptr, pattern[0]));
850
38.9M
            pattern += pattern[0];
851
38.9M
            DISPATCH;
852
853
63.3M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
63.3M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
63.3M
            LASTMARK_SAVE();
858
63.3M
            if (state->repeat)
859
60.3M
                MARK_PUSH(ctx->lastmark);
860
143M
            for (; pattern[0]; pattern += pattern[0]) {
861
118M
                if (pattern[1] == SRE_OP_LITERAL &&
862
118M
                    (ptr >= end ||
863
57.1M
                     (SRE_CODE) *ptr != pattern[2]))
864
28.2M
                    continue;
865
90.5M
                if (pattern[1] == SRE_OP_IN &&
866
90.5M
                    (ptr >= end ||
867
57.1M
                     !SRE(charset)(state, pattern + 3,
868
57.1M
                                   (SRE_CODE) *ptr)))
869
48.4M
                    continue;
870
42.0M
                state->ptr = ptr;
871
42.0M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
42.0M
                if (ret) {
873
38.5M
                    if (state->repeat)
874
37.0M
                        MARK_POP_DISCARD(ctx->lastmark);
875
38.5M
                    RETURN_ON_ERROR(ret);
876
38.5M
                    RETURN_SUCCESS;
877
38.5M
                }
878
3.55M
                if (state->repeat)
879
6.21k
                    MARK_POP_KEEP(ctx->lastmark);
880
3.55M
                LASTMARK_RESTORE();
881
3.55M
            }
882
24.8M
            if (state->repeat)
883
23.3M
                MARK_POP_DISCARD(ctx->lastmark);
884
24.8M
            RETURN_FAILURE;
885
886
287M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
287M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
287M
                   pattern[1], pattern[2]));
898
899
287M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
156k
                RETURN_FAILURE; /* cannot match */
901
902
287M
            state->ptr = ptr;
903
904
287M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
287M
            RETURN_ON_ERROR(ret);
906
287M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
287M
            ctx->count = ret;
908
287M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
287M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
156M
                RETURN_FAILURE;
917
918
130M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
130M
                ptr == state->end &&
920
130M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
16.1k
            {
922
                /* tail is empty.  we're finished */
923
16.1k
                state->ptr = ptr;
924
16.1k
                RETURN_SUCCESS;
925
16.1k
            }
926
927
130M
            LASTMARK_SAVE();
928
130M
            if (state->repeat)
929
95.6M
                MARK_PUSH(ctx->lastmark);
930
931
130M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
46.6M
                ctx->u.chr = pattern[pattern[0]+1];
935
46.6M
                for (;;) {
936
92.4M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
92.4M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
45.8M
                        ptr--;
939
45.8M
                        ctx->count--;
940
45.8M
                    }
941
46.6M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
40.5M
                        break;
943
6.09M
                    state->ptr = ptr;
944
6.09M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
6.09M
                            pattern+pattern[0]);
946
6.09M
                    if (ret) {
947
6.09M
                        if (state->repeat)
948
6.07M
                            MARK_POP_DISCARD(ctx->lastmark);
949
6.09M
                        RETURN_ON_ERROR(ret);
950
6.09M
                        RETURN_SUCCESS;
951
6.09M
                    }
952
212
                    if (state->repeat)
953
212
                        MARK_POP_KEEP(ctx->lastmark);
954
212
                    LASTMARK_RESTORE();
955
956
212
                    ptr--;
957
212
                    ctx->count--;
958
212
                }
959
40.5M
                if (state->repeat)
960
40.4M
                    MARK_POP_DISCARD(ctx->lastmark);
961
84.0M
            } else {
962
                /* general case */
963
84.9M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
84.5M
                    state->ptr = ptr;
965
84.5M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
84.5M
                            pattern+pattern[0]);
967
84.5M
                    if (ret) {
968
83.7M
                        if (state->repeat)
969
48.7M
                            MARK_POP_DISCARD(ctx->lastmark);
970
83.7M
                        RETURN_ON_ERROR(ret);
971
83.7M
                        RETURN_SUCCESS;
972
83.7M
                    }
973
809k
                    if (state->repeat)
974
639k
                        MARK_POP_KEEP(ctx->lastmark);
975
809k
                    LASTMARK_RESTORE();
976
977
809k
                    ptr--;
978
809k
                    ctx->count--;
979
809k
                }
980
321k
                if (state->repeat)
981
319k
                    MARK_POP_DISCARD(ctx->lastmark);
982
321k
            }
983
40.8M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
69.2M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
69.2M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
69.2M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
69.2M
            ctx->u.rep = repeat_pool_malloc(state);
1127
69.2M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
69.2M
            ctx->u.rep->count = -1;
1131
69.2M
            ctx->u.rep->pattern = pattern;
1132
69.2M
            ctx->u.rep->prev = state->repeat;
1133
69.2M
            ctx->u.rep->last_ptr = NULL;
1134
69.2M
            state->repeat = ctx->u.rep;
1135
1136
69.2M
            state->ptr = ptr;
1137
69.2M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
69.2M
            state->repeat = ctx->u.rep->prev;
1139
69.2M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
69.2M
            if (ret) {
1142
69.2M
                RETURN_ON_ERROR(ret);
1143
69.2M
                RETURN_SUCCESS;
1144
69.2M
            }
1145
948
            RETURN_FAILURE;
1146
1147
129M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
129M
            ctx->u.rep = state->repeat;
1155
129M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
129M
            state->ptr = ptr;
1159
1160
129M
            ctx->count = ctx->u.rep->count+1;
1161
1162
129M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
129M
                   ptr, ctx->count));
1164
1165
129M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
129M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
129M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
129M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
125M
                ctx->u.rep->count = ctx->count;
1185
125M
                LASTMARK_SAVE();
1186
125M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
125M
                LAST_PTR_PUSH();
1189
125M
                ctx->u.rep->last_ptr = state->ptr;
1190
125M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
125M
                        ctx->u.rep->pattern+3);
1192
125M
                LAST_PTR_POP();
1193
125M
                if (ret) {
1194
59.5M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
59.5M
                    RETURN_ON_ERROR(ret);
1196
59.5M
                    RETURN_SUCCESS;
1197
59.5M
                }
1198
65.9M
                MARK_POP(ctx->lastmark);
1199
65.9M
                LASTMARK_RESTORE();
1200
65.9M
                ctx->u.rep->count = ctx->count-1;
1201
65.9M
                state->ptr = ptr;
1202
65.9M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
69.5M
            state->repeat = ctx->u.rep->prev;
1207
69.5M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
69.5M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
69.5M
            RETURN_ON_SUCCESS(ret);
1211
320k
            state->ptr = ptr;
1212
320k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
48.1M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
48.1M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
48.1M
                   ptr, pattern[1]));
1565
48.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
48.1M
            state->ptr = ptr - pattern[1];
1568
48.1M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
48.1M
            RETURN_ON_FAILURE(ret);
1570
44.3M
            pattern += pattern[0];
1571
44.3M
            DISPATCH;
1572
1573
44.3M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
25.5M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
25.5M
                   ptr, pattern[1]));
1578
25.5M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
25.5M
                state->ptr = ptr - pattern[1];
1580
25.5M
                LASTMARK_SAVE();
1581
25.5M
                if (state->repeat)
1582
25.5M
                    MARK_PUSH(ctx->lastmark);
1583
1584
51.0M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
51.0M
                if (ret) {
1586
5.97k
                    if (state->repeat)
1587
5.97k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
5.97k
                    RETURN_ON_ERROR(ret);
1589
5.97k
                    RETURN_FAILURE;
1590
5.97k
                }
1591
25.5M
                if (state->repeat)
1592
25.5M
                    MARK_POP(ctx->lastmark);
1593
25.5M
                LASTMARK_RESTORE();
1594
25.5M
            }
1595
25.5M
            pattern += pattern[0];
1596
25.5M
            DISPATCH;
1597
1598
25.5M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
706M
exit:
1620
706M
    ctx_pos = ctx->last_ctx_pos;
1621
706M
    jump = ctx->jump;
1622
706M
    DATA_POP_DISCARD(ctx);
1623
706M
    if (ctx_pos == -1) {
1624
236M
        state->sigcount = sigcount;
1625
236M
        return ret;
1626
236M
    }
1627
470M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
470M
    switch (jump) {
1630
125M
        case JUMP_MAX_UNTIL_2:
1631
125M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
125M
            goto jump_max_until_2;
1633
69.5M
        case JUMP_MAX_UNTIL_3:
1634
69.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
69.5M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
42.0M
        case JUMP_BRANCH:
1643
42.0M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
42.0M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
69.2M
        case JUMP_REPEAT:
1658
69.2M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
69.2M
            goto jump_repeat;
1660
6.09M
        case JUMP_REPEAT_ONE_1:
1661
6.09M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
6.09M
            goto jump_repeat_one_1;
1663
84.5M
        case JUMP_REPEAT_ONE_2:
1664
84.5M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
84.5M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
48.1M
        case JUMP_ASSERT:
1673
48.1M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
48.1M
            goto jump_assert;
1675
25.5M
        case JUMP_ASSERT_NOT:
1676
25.5M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
25.5M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
470M
    }
1683
1684
0
    return ret; /* should never get here */
1685
470M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
91.6M
{
601
91.6M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
91.6M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
91.6M
    Py_ssize_t ret = 0;
604
91.6M
    int jump;
605
91.6M
    unsigned int sigcount = state->sigcount;
606
607
91.6M
    SRE(match_context)* ctx;
608
91.6M
    SRE(match_context)* nextctx;
609
91.6M
    INIT_TRACE(state);
610
611
91.6M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
91.6M
    DATA_ALLOC(SRE(match_context), ctx);
614
91.6M
    ctx->last_ctx_pos = -1;
615
91.6M
    ctx->jump = JUMP_NONE;
616
91.6M
    ctx->toplevel = toplevel;
617
91.6M
    ctx_pos = alloc_pos;
618
619
91.6M
#if USE_COMPUTED_GOTOS
620
91.6M
#include "sre_targets.h"
621
91.6M
#endif
622
623
737M
entrance:
624
625
737M
    ;  // Fashion statement.
626
737M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
737M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
34.3M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
366
            TRACE(("reject (got %tu chars, need %zu)\n",
633
366
                   end - ptr, (size_t) pattern[3]));
634
366
            RETURN_FAILURE;
635
366
        }
636
34.3M
        pattern += pattern[1] + 1;
637
34.3M
    }
638
639
737M
#if USE_COMPUTED_GOTOS
640
737M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
737M
    {
647
648
737M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
164M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
164M
                   ptr, pattern[0]));
653
164M
            {
654
164M
                int i = pattern[0];
655
164M
                if (i & 1)
656
42.3M
                    state->lastindex = i/2 + 1;
657
164M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
161M
                    int j = state->lastmark + 1;
663
164M
                    while (j < i)
664
2.20M
                        state->mark[j++] = NULL;
665
161M
                    state->lastmark = i;
666
161M
                }
667
164M
                state->mark[i] = ptr;
668
164M
            }
669
164M
            pattern++;
670
164M
            DISPATCH;
671
672
164M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
56.3M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
56.3M
                   ptr, *pattern));
677
56.3M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
21.8M
                RETURN_FAILURE;
679
34.4M
            pattern++;
680
34.4M
            ptr++;
681
34.4M
            DISPATCH;
682
683
34.4M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
108M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
108M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
108M
            if (ctx->toplevel &&
698
108M
                ((state->match_all && ptr != state->end) ||
699
33.9M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
108M
            state->ptr = ptr;
704
108M
            RETURN_SUCCESS;
705
706
545k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
545k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
545k
            if (!SRE(at)(state, ptr, *pattern))
711
540k
                RETURN_FAILURE;
712
4.91k
            pattern++;
713
4.91k
            DISPATCH;
714
715
4.91k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
163M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
163M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
163M
            if (ptr >= end ||
749
163M
                !SRE(charset)(state, pattern + 1, *ptr))
750
1.57M
                RETURN_FAILURE;
751
161M
            pattern += pattern[0];
752
161M
            ptr++;
753
161M
            DISPATCH;
754
755
161M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
968k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
968k
                   pattern, ptr, pattern[0]));
758
968k
            if (ptr >= end ||
759
968k
                sre_lower_ascii(*ptr) != *pattern)
760
19.3k
                RETURN_FAILURE;
761
948k
            pattern++;
762
948k
            ptr++;
763
948k
            DISPATCH;
764
765
948k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
50.3M
        TARGET(SRE_OP_JUMP):
845
50.3M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
50.3M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
50.3M
                   ptr, pattern[0]));
850
50.3M
            pattern += pattern[0];
851
50.3M
            DISPATCH;
852
853
84.0M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
84.0M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
84.0M
            LASTMARK_SAVE();
858
84.0M
            if (state->repeat)
859
80.3M
                MARK_PUSH(ctx->lastmark);
860
182M
            for (; pattern[0]; pattern += pattern[0]) {
861
148M
                if (pattern[1] == SRE_OP_LITERAL &&
862
148M
                    (ptr >= end ||
863
69.3M
                     (SRE_CODE) *ptr != pattern[2]))
864
43.7M
                    continue;
865
105M
                if (pattern[1] == SRE_OP_IN &&
866
105M
                    (ptr >= end ||
867
75.9M
                     !SRE(charset)(state, pattern + 3,
868
75.9M
                                   (SRE_CODE) *ptr)))
869
54.0M
                    continue;
870
51.0M
                state->ptr = ptr;
871
51.0M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
51.0M
                if (ret) {
873
49.9M
                    if (state->repeat)
874
47.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
49.9M
                    RETURN_ON_ERROR(ret);
876
49.9M
                    RETURN_SUCCESS;
877
49.9M
                }
878
1.09M
                if (state->repeat)
879
16.6k
                    MARK_POP_KEEP(ctx->lastmark);
880
1.09M
                LASTMARK_RESTORE();
881
1.09M
            }
882
34.0M
            if (state->repeat)
883
33.2M
                MARK_POP_DISCARD(ctx->lastmark);
884
34.0M
            RETURN_FAILURE;
885
886
236M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
236M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
236M
                   pattern[1], pattern[2]));
898
899
236M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
25.5k
                RETURN_FAILURE; /* cannot match */
901
902
236M
            state->ptr = ptr;
903
904
236M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
236M
            RETURN_ON_ERROR(ret);
906
236M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
236M
            ctx->count = ret;
908
236M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
236M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
49.8M
                RETURN_FAILURE;
917
918
186M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
186M
                ptr == state->end &&
920
186M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.71k
            {
922
                /* tail is empty.  we're finished */
923
3.71k
                state->ptr = ptr;
924
3.71k
                RETURN_SUCCESS;
925
3.71k
            }
926
927
186M
            LASTMARK_SAVE();
928
186M
            if (state->repeat)
929
146M
                MARK_PUSH(ctx->lastmark);
930
931
186M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
71.4M
                ctx->u.chr = pattern[pattern[0]+1];
935
71.4M
                for (;;) {
936
173M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
173M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
102M
                        ptr--;
939
102M
                        ctx->count--;
940
102M
                    }
941
71.4M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
63.0M
                        break;
943
8.40M
                    state->ptr = ptr;
944
8.40M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
8.40M
                            pattern+pattern[0]);
946
8.40M
                    if (ret) {
947
8.40M
                        if (state->repeat)
948
8.39M
                            MARK_POP_DISCARD(ctx->lastmark);
949
8.40M
                        RETURN_ON_ERROR(ret);
950
8.40M
                        RETURN_SUCCESS;
951
8.40M
                    }
952
240
                    if (state->repeat)
953
240
                        MARK_POP_KEEP(ctx->lastmark);
954
240
                    LASTMARK_RESTORE();
955
956
240
                    ptr--;
957
240
                    ctx->count--;
958
240
                }
959
63.0M
                if (state->repeat)
960
63.0M
                    MARK_POP_DISCARD(ctx->lastmark);
961
114M
            } else {
962
                /* general case */
963
115M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
115M
                    state->ptr = ptr;
965
115M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
115M
                            pattern+pattern[0]);
967
115M
                    if (ret) {
968
114M
                        if (state->repeat)
969
74.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
114M
                        RETURN_ON_ERROR(ret);
971
114M
                        RETURN_SUCCESS;
972
114M
                    }
973
1.24M
                    if (state->repeat)
974
160k
                        MARK_POP_KEEP(ctx->lastmark);
975
1.24M
                    LASTMARK_RESTORE();
976
977
1.24M
                    ptr--;
978
1.24M
                    ctx->count--;
979
1.24M
                }
980
81.1k
                if (state->repeat)
981
80.4k
                    MARK_POP_DISCARD(ctx->lastmark);
982
81.1k
            }
983
63.1M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
101M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
101M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
101M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
101M
            ctx->u.rep = repeat_pool_malloc(state);
1127
101M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
101M
            ctx->u.rep->count = -1;
1131
101M
            ctx->u.rep->pattern = pattern;
1132
101M
            ctx->u.rep->prev = state->repeat;
1133
101M
            ctx->u.rep->last_ptr = NULL;
1134
101M
            state->repeat = ctx->u.rep;
1135
1136
101M
            state->ptr = ptr;
1137
101M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
101M
            state->repeat = ctx->u.rep->prev;
1139
101M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
101M
            if (ret) {
1142
101M
                RETURN_ON_ERROR(ret);
1143
101M
                RETURN_SUCCESS;
1144
101M
            }
1145
740
            RETURN_FAILURE;
1146
1147
182M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
182M
            ctx->u.rep = state->repeat;
1155
182M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
182M
            state->ptr = ptr;
1159
1160
182M
            ctx->count = ctx->u.rep->count+1;
1161
1162
182M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
182M
                   ptr, ctx->count));
1164
1165
182M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
182M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
182M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
182M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
178M
                ctx->u.rep->count = ctx->count;
1185
178M
                LASTMARK_SAVE();
1186
178M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
178M
                LAST_PTR_PUSH();
1189
178M
                ctx->u.rep->last_ptr = state->ptr;
1190
178M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
178M
                        ctx->u.rep->pattern+3);
1192
178M
                LAST_PTR_POP();
1193
178M
                if (ret) {
1194
81.7M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
81.7M
                    RETURN_ON_ERROR(ret);
1196
81.7M
                    RETURN_SUCCESS;
1197
81.7M
                }
1198
96.5M
                MARK_POP(ctx->lastmark);
1199
96.5M
                LASTMARK_RESTORE();
1200
96.5M
                ctx->u.rep->count = ctx->count-1;
1201
96.5M
                state->ptr = ptr;
1202
96.5M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
101M
            state->repeat = ctx->u.rep->prev;
1207
101M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
101M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
101M
            RETURN_ON_SUCCESS(ret);
1211
80.8k
            state->ptr = ptr;
1212
80.8k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
68.2M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
68.2M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
68.2M
                   ptr, pattern[1]));
1565
68.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
68.2M
            state->ptr = ptr - pattern[1];
1568
68.2M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
68.2M
            RETURN_ON_FAILURE(ret);
1570
67.7M
            pattern += pattern[0];
1571
67.7M
            DISPATCH;
1572
1573
67.7M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
21.4M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
21.4M
                   ptr, pattern[1]));
1578
21.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
21.4M
                state->ptr = ptr - pattern[1];
1580
21.4M
                LASTMARK_SAVE();
1581
21.4M
                if (state->repeat)
1582
21.4M
                    MARK_PUSH(ctx->lastmark);
1583
1584
42.9M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
42.9M
                if (ret) {
1586
16.3k
                    if (state->repeat)
1587
16.3k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
16.3k
                    RETURN_ON_ERROR(ret);
1589
16.3k
                    RETURN_FAILURE;
1590
16.3k
                }
1591
21.4M
                if (state->repeat)
1592
21.4M
                    MARK_POP(ctx->lastmark);
1593
21.4M
                LASTMARK_RESTORE();
1594
21.4M
            }
1595
21.4M
            pattern += pattern[0];
1596
21.4M
            DISPATCH;
1597
1598
21.4M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
737M
exit:
1620
737M
    ctx_pos = ctx->last_ctx_pos;
1621
737M
    jump = ctx->jump;
1622
737M
    DATA_POP_DISCARD(ctx);
1623
737M
    if (ctx_pos == -1) {
1624
91.6M
        state->sigcount = sigcount;
1625
91.6M
        return ret;
1626
91.6M
    }
1627
645M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
645M
    switch (jump) {
1630
178M
        case JUMP_MAX_UNTIL_2:
1631
178M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
178M
            goto jump_max_until_2;
1633
101M
        case JUMP_MAX_UNTIL_3:
1634
101M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
101M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
51.0M
        case JUMP_BRANCH:
1643
51.0M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
51.0M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
101M
        case JUMP_REPEAT:
1658
101M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
101M
            goto jump_repeat;
1660
8.40M
        case JUMP_REPEAT_ONE_1:
1661
8.40M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
8.40M
            goto jump_repeat_one_1;
1663
115M
        case JUMP_REPEAT_ONE_2:
1664
115M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
115M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
68.2M
        case JUMP_ASSERT:
1673
68.2M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
68.2M
            goto jump_assert;
1675
21.4M
        case JUMP_ASSERT_NOT:
1676
21.4M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
21.4M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
645M
    }
1683
1684
0
    return ret; /* should never get here */
1685
645M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
311M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
92.8M
{
1694
92.8M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
92.8M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
92.8M
    Py_ssize_t status = 0;
1697
92.8M
    Py_ssize_t prefix_len = 0;
1698
92.8M
    Py_ssize_t prefix_skip = 0;
1699
92.8M
    SRE_CODE* prefix = NULL;
1700
92.8M
    SRE_CODE* charset = NULL;
1701
92.8M
    SRE_CODE* overlap = NULL;
1702
92.8M
    int flags = 0;
1703
92.8M
    INIT_TRACE(state);
1704
1705
92.8M
    if (ptr > end)
1706
0
        return 0;
1707
1708
92.8M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
92.8M
        flags = pattern[2];
1713
1714
92.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.58M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.58M
                   end - ptr, (size_t) pattern[3]));
1717
1.58M
            return 0;
1718
1.58M
        }
1719
91.2M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
7.18M
            end -= pattern[3] - 1;
1723
7.18M
            if (end <= ptr)
1724
0
                end = ptr;
1725
7.18M
        }
1726
1727
91.2M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
7.19M
            prefix_len = pattern[5];
1731
7.19M
            prefix_skip = pattern[6];
1732
7.19M
            prefix = pattern + 7;
1733
7.19M
            overlap = prefix + prefix_len - 1;
1734
84.0M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
79.9M
            charset = pattern + 5;
1738
1739
91.2M
        pattern += 1 + pattern[1];
1740
91.2M
    }
1741
1742
91.2M
    TRACE(("prefix = %p %zd %zd\n",
1743
91.2M
           prefix, prefix_len, prefix_skip));
1744
91.2M
    TRACE(("charset = %p\n", charset));
1745
1746
91.2M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
6.58M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
3.79M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
3.79M
#endif
1753
3.79M
        end = (SRE_CHAR *)state->end;
1754
3.79M
        state->must_advance = 0;
1755
7.28M
        while (ptr < end) {
1756
101M
            while (*ptr != c) {
1757
94.9M
                if (++ptr >= end)
1758
551k
                    return 0;
1759
94.9M
            }
1760
6.72M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
6.72M
            state->start = ptr;
1762
6.72M
            state->ptr = ptr + prefix_skip;
1763
6.72M
            if (flags & SRE_INFO_LITERAL)
1764
4.27k
                return 1; /* we got all of it */
1765
6.71M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
6.71M
            if (status != 0)
1767
6.02M
                return status;
1768
696k
            ++ptr;
1769
696k
            RESET_CAPTURE_GROUP();
1770
696k
        }
1771
10.7k
        return 0;
1772
3.79M
    }
1773
1774
84.7M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
601k
        Py_ssize_t i = 0;
1778
1779
601k
        end = (SRE_CHAR *)state->end;
1780
601k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.78M
        for (i = 0; i < prefix_len; i++)
1784
1.19M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
595k
#endif
1787
1.70M
        while (ptr < end) {
1788
1.70M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
9.79M
            while (*ptr++ != c) {
1790
8.08M
                if (ptr >= end)
1791
319
                    return 0;
1792
8.08M
            }
1793
1.70M
            if (ptr >= end)
1794
50
                return 0;
1795
1796
1.70M
            i = 1;
1797
1.70M
            state->must_advance = 0;
1798
1.70M
            do {
1799
1.70M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.48M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.48M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.48M
                    state->start = ptr - (prefix_len - 1);
1808
1.48M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.48M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.48M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.48M
                    if (status != 0)
1813
600k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
886k
                    if (++ptr >= end)
1816
25
                        return 0;
1817
886k
                    RESET_CAPTURE_GROUP();
1818
886k
                }
1819
1.10M
                i = overlap[i];
1820
1.10M
            } while (i != 0);
1821
1.70M
        }
1822
0
        return 0;
1823
601k
    }
1824
1825
84.0M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
79.9M
        end = (SRE_CHAR *)state->end;
1828
79.9M
        state->must_advance = 0;
1829
82.3M
        for (;;) {
1830
346M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
264M
                ptr++;
1832
82.3M
            if (ptr >= end)
1833
3.50M
                return 0;
1834
78.8M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
78.8M
            state->start = ptr;
1836
78.8M
            state->ptr = ptr;
1837
78.8M
            status = SRE(match)(state, pattern, 0);
1838
78.8M
            if (status != 0)
1839
76.4M
                break;
1840
2.47M
            ptr++;
1841
2.47M
            RESET_CAPTURE_GROUP();
1842
2.47M
        }
1843
79.9M
    } else {
1844
        /* general case */
1845
4.18M
        assert(ptr <= end);
1846
4.18M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
4.18M
        state->start = state->ptr = ptr;
1848
4.18M
        status = SRE(match)(state, pattern, 1);
1849
4.18M
        state->must_advance = 0;
1850
4.18M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.18M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
312M
        while (status == 0 && ptr < end) {
1858
307M
            ptr++;
1859
307M
            RESET_CAPTURE_GROUP();
1860
307M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
307M
            state->start = state->ptr = ptr;
1862
307M
            status = SRE(match)(state, pattern, 0);
1863
307M
        }
1864
4.18M
    }
1865
1866
80.5M
    return status;
1867
84.0M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
36.9M
{
1694
36.9M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
36.9M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
36.9M
    Py_ssize_t status = 0;
1697
36.9M
    Py_ssize_t prefix_len = 0;
1698
36.9M
    Py_ssize_t prefix_skip = 0;
1699
36.9M
    SRE_CODE* prefix = NULL;
1700
36.9M
    SRE_CODE* charset = NULL;
1701
36.9M
    SRE_CODE* overlap = NULL;
1702
36.9M
    int flags = 0;
1703
36.9M
    INIT_TRACE(state);
1704
1705
36.9M
    if (ptr > end)
1706
0
        return 0;
1707
1708
36.9M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
36.9M
        flags = pattern[2];
1713
1714
36.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.46M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.46M
                   end - ptr, (size_t) pattern[3]));
1717
1.46M
            return 0;
1718
1.46M
        }
1719
35.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.31M
            end -= pattern[3] - 1;
1723
2.31M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.31M
        }
1726
1727
35.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.31M
            prefix_len = pattern[5];
1731
2.31M
            prefix_skip = pattern[6];
1732
2.31M
            prefix = pattern + 7;
1733
2.31M
            overlap = prefix + prefix_len - 1;
1734
33.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
30.0M
            charset = pattern + 5;
1738
1739
35.4M
        pattern += 1 + pattern[1];
1740
35.4M
    }
1741
1742
35.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
35.4M
           prefix, prefix_len, prefix_skip));
1744
35.4M
    TRACE(("charset = %p\n", charset));
1745
1746
35.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.29M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.29M
#if SIZEOF_SRE_CHAR < 4
1750
2.29M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.29M
#endif
1753
2.29M
        end = (SRE_CHAR *)state->end;
1754
2.29M
        state->must_advance = 0;
1755
2.45M
        while (ptr < end) {
1756
30.1M
            while (*ptr != c) {
1757
28.1M
                if (++ptr >= end)
1758
483k
                    return 0;
1759
28.1M
            }
1760
1.96M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.96M
            state->start = ptr;
1762
1.96M
            state->ptr = ptr + prefix_skip;
1763
1.96M
            if (flags & SRE_INFO_LITERAL)
1764
347
                return 1; /* we got all of it */
1765
1.96M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.96M
            if (status != 0)
1767
1.80M
                return status;
1768
159k
            ++ptr;
1769
159k
            RESET_CAPTURE_GROUP();
1770
159k
        }
1771
8.44k
        return 0;
1772
2.29M
    }
1773
1774
33.1M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
12.0k
        Py_ssize_t i = 0;
1778
1779
12.0k
        end = (SRE_CHAR *)state->end;
1780
12.0k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
12.0k
#if SIZEOF_SRE_CHAR < 4
1783
36.1k
        for (i = 0; i < prefix_len; i++)
1784
24.0k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
12.0k
#endif
1787
483k
        while (ptr < end) {
1788
483k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.32M
            while (*ptr++ != c) {
1790
2.84M
                if (ptr >= end)
1791
67
                    return 0;
1792
2.84M
            }
1793
483k
            if (ptr >= end)
1794
25
                return 0;
1795
1796
483k
            i = 1;
1797
483k
            state->must_advance = 0;
1798
483k
            do {
1799
483k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
353k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
353k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
353k
                    state->start = ptr - (prefix_len - 1);
1808
353k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
353k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
353k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
353k
                    if (status != 0)
1813
11.9k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
341k
                    if (++ptr >= end)
1816
12
                        return 0;
1817
341k
                    RESET_CAPTURE_GROUP();
1818
341k
                }
1819
471k
                i = overlap[i];
1820
471k
            } while (i != 0);
1821
483k
        }
1822
0
        return 0;
1823
12.0k
    }
1824
1825
33.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
30.0M
        end = (SRE_CHAR *)state->end;
1828
30.0M
        state->must_advance = 0;
1829
31.6M
        for (;;) {
1830
81.5M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
49.9M
                ptr++;
1832
31.6M
            if (ptr >= end)
1833
2.50M
                return 0;
1834
29.1M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
29.1M
            state->start = ptr;
1836
29.1M
            state->ptr = ptr;
1837
29.1M
            status = SRE(match)(state, pattern, 0);
1838
29.1M
            if (status != 0)
1839
27.5M
                break;
1840
1.58M
            ptr++;
1841
1.58M
            RESET_CAPTURE_GROUP();
1842
1.58M
        }
1843
30.0M
    } else {
1844
        /* general case */
1845
3.10M
        assert(ptr <= end);
1846
3.10M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
3.10M
        state->start = state->ptr = ptr;
1848
3.10M
        status = SRE(match)(state, pattern, 1);
1849
3.10M
        state->must_advance = 0;
1850
3.10M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
3.10M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
107M
        while (status == 0 && ptr < end) {
1858
104M
            ptr++;
1859
104M
            RESET_CAPTURE_GROUP();
1860
104M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
104M
            state->start = state->ptr = ptr;
1862
104M
            status = SRE(match)(state, pattern, 0);
1863
104M
        }
1864
3.10M
    }
1865
1866
30.6M
    return status;
1867
33.1M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
49.0M
{
1694
49.0M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
49.0M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
49.0M
    Py_ssize_t status = 0;
1697
49.0M
    Py_ssize_t prefix_len = 0;
1698
49.0M
    Py_ssize_t prefix_skip = 0;
1699
49.0M
    SRE_CODE* prefix = NULL;
1700
49.0M
    SRE_CODE* charset = NULL;
1701
49.0M
    SRE_CODE* overlap = NULL;
1702
49.0M
    int flags = 0;
1703
49.0M
    INIT_TRACE(state);
1704
1705
49.0M
    if (ptr > end)
1706
0
        return 0;
1707
1708
49.0M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
49.0M
        flags = pattern[2];
1713
1714
49.0M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
112k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
112k
                   end - ptr, (size_t) pattern[3]));
1717
112k
            return 0;
1718
112k
        }
1719
48.9M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.07M
            end -= pattern[3] - 1;
1723
2.07M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.07M
        }
1726
1727
48.9M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.07M
            prefix_len = pattern[5];
1731
2.07M
            prefix_skip = pattern[6];
1732
2.07M
            prefix = pattern + 7;
1733
2.07M
            overlap = prefix + prefix_len - 1;
1734
46.8M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
45.9M
            charset = pattern + 5;
1738
1739
48.9M
        pattern += 1 + pattern[1];
1740
48.9M
    }
1741
1742
48.9M
    TRACE(("prefix = %p %zd %zd\n",
1743
48.9M
           prefix, prefix_len, prefix_skip));
1744
48.9M
    TRACE(("charset = %p\n", charset));
1745
1746
48.9M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.49M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.49M
#if SIZEOF_SRE_CHAR < 4
1750
1.49M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.49M
#endif
1753
1.49M
        end = (SRE_CHAR *)state->end;
1754
1.49M
        state->must_advance = 0;
1755
1.67M
        while (ptr < end) {
1756
48.5M
            while (*ptr != c) {
1757
46.9M
                if (++ptr >= end)
1758
63.5k
                    return 0;
1759
46.9M
            }
1760
1.61M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.61M
            state->start = ptr;
1762
1.61M
            state->ptr = ptr + prefix_skip;
1763
1.61M
            if (flags & SRE_INFO_LITERAL)
1764
1.36k
                return 1; /* we got all of it */
1765
1.60M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.60M
            if (status != 0)
1767
1.42M
                return status;
1768
183k
            ++ptr;
1769
183k
            RESET_CAPTURE_GROUP();
1770
183k
        }
1771
1.53k
        return 0;
1772
1.49M
    }
1773
1774
47.4M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
583k
        Py_ssize_t i = 0;
1778
1779
583k
        end = (SRE_CHAR *)state->end;
1780
583k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
583k
#if SIZEOF_SRE_CHAR < 4
1783
1.75M
        for (i = 0; i < prefix_len; i++)
1784
1.16M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
583k
#endif
1787
1.01M
        while (ptr < end) {
1788
1.01M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
4.17M
            while (*ptr++ != c) {
1790
3.16M
                if (ptr >= end)
1791
129
                    return 0;
1792
3.16M
            }
1793
1.01M
            if (ptr >= end)
1794
12
                return 0;
1795
1796
1.01M
            i = 1;
1797
1.01M
            state->must_advance = 0;
1798
1.01M
            do {
1799
1.01M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
932k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
932k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
932k
                    state->start = ptr - (prefix_len - 1);
1808
932k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
932k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
932k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
932k
                    if (status != 0)
1813
583k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
348k
                    if (++ptr >= end)
1816
9
                        return 0;
1817
348k
                    RESET_CAPTURE_GROUP();
1818
348k
                }
1819
431k
                i = overlap[i];
1820
431k
            } while (i != 0);
1821
1.01M
        }
1822
0
        return 0;
1823
583k
    }
1824
1825
46.8M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
45.9M
        end = (SRE_CHAR *)state->end;
1828
45.9M
        state->must_advance = 0;
1829
46.3M
        for (;;) {
1830
185M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
139M
                ptr++;
1832
46.3M
            if (ptr >= end)
1833
941k
                return 0;
1834
45.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
45.4M
            state->start = ptr;
1836
45.4M
            state->ptr = ptr;
1837
45.4M
            status = SRE(match)(state, pattern, 0);
1838
45.4M
            if (status != 0)
1839
45.0M
                break;
1840
422k
            ptr++;
1841
422k
            RESET_CAPTURE_GROUP();
1842
422k
        }
1843
45.9M
    } else {
1844
        /* general case */
1845
866k
        assert(ptr <= end);
1846
866k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
866k
        state->start = state->ptr = ptr;
1848
866k
        status = SRE(match)(state, pattern, 1);
1849
866k
        state->must_advance = 0;
1850
866k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
866k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
155M
        while (status == 0 && ptr < end) {
1858
154M
            ptr++;
1859
154M
            RESET_CAPTURE_GROUP();
1860
154M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
154M
            state->start = state->ptr = ptr;
1862
154M
            status = SRE(match)(state, pattern, 0);
1863
154M
        }
1864
866k
    }
1865
1866
45.8M
    return status;
1867
46.8M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
6.93M
{
1694
6.93M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
6.93M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
6.93M
    Py_ssize_t status = 0;
1697
6.93M
    Py_ssize_t prefix_len = 0;
1698
6.93M
    Py_ssize_t prefix_skip = 0;
1699
6.93M
    SRE_CODE* prefix = NULL;
1700
6.93M
    SRE_CODE* charset = NULL;
1701
6.93M
    SRE_CODE* overlap = NULL;
1702
6.93M
    int flags = 0;
1703
6.93M
    INIT_TRACE(state);
1704
1705
6.93M
    if (ptr > end)
1706
0
        return 0;
1707
1708
6.93M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
6.93M
        flags = pattern[2];
1713
1714
6.93M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.00k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.00k
                   end - ptr, (size_t) pattern[3]));
1717
6.00k
            return 0;
1718
6.00k
        }
1719
6.93M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.80M
            end -= pattern[3] - 1;
1723
2.80M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.80M
        }
1726
1727
6.93M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.80M
            prefix_len = pattern[5];
1731
2.80M
            prefix_skip = pattern[6];
1732
2.80M
            prefix = pattern + 7;
1733
2.80M
            overlap = prefix + prefix_len - 1;
1734
4.13M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.91M
            charset = pattern + 5;
1738
1739
6.93M
        pattern += 1 + pattern[1];
1740
6.93M
    }
1741
1742
6.93M
    TRACE(("prefix = %p %zd %zd\n",
1743
6.93M
           prefix, prefix_len, prefix_skip));
1744
6.93M
    TRACE(("charset = %p\n", charset));
1745
1746
6.93M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.79M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.79M
        end = (SRE_CHAR *)state->end;
1754
2.79M
        state->must_advance = 0;
1755
3.15M
        while (ptr < end) {
1756
22.9M
            while (*ptr != c) {
1757
19.8M
                if (++ptr >= end)
1758
4.02k
                    return 0;
1759
19.8M
            }
1760
3.14M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.14M
            state->start = ptr;
1762
3.14M
            state->ptr = ptr + prefix_skip;
1763
3.14M
            if (flags & SRE_INFO_LITERAL)
1764
2.56k
                return 1; /* we got all of it */
1765
3.14M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.14M
            if (status != 0)
1767
2.79M
                return status;
1768
353k
            ++ptr;
1769
353k
            RESET_CAPTURE_GROUP();
1770
353k
        }
1771
760
        return 0;
1772
2.79M
    }
1773
1774
4.13M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
5.57k
        Py_ssize_t i = 0;
1778
1779
5.57k
        end = (SRE_CHAR *)state->end;
1780
5.57k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
209k
        while (ptr < end) {
1788
209k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.29M
            while (*ptr++ != c) {
1790
2.08M
                if (ptr >= end)
1791
123
                    return 0;
1792
2.08M
            }
1793
209k
            if (ptr >= end)
1794
13
                return 0;
1795
1796
208k
            i = 1;
1797
208k
            state->must_advance = 0;
1798
209k
            do {
1799
209k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
201k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
201k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
201k
                    state->start = ptr - (prefix_len - 1);
1808
201k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
201k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
201k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
201k
                    if (status != 0)
1813
5.43k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
196k
                    if (++ptr >= end)
1816
4
                        return 0;
1817
196k
                    RESET_CAPTURE_GROUP();
1818
196k
                }
1819
204k
                i = overlap[i];
1820
204k
            } while (i != 0);
1821
208k
        }
1822
0
        return 0;
1823
5.57k
    }
1824
1825
4.13M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.91M
        end = (SRE_CHAR *)state->end;
1828
3.91M
        state->must_advance = 0;
1829
4.37M
        for (;;) {
1830
79.7M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
75.4M
                ptr++;
1832
4.37M
            if (ptr >= end)
1833
52.7k
                return 0;
1834
4.32M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
4.32M
            state->start = ptr;
1836
4.32M
            state->ptr = ptr;
1837
4.32M
            status = SRE(match)(state, pattern, 0);
1838
4.32M
            if (status != 0)
1839
3.86M
                break;
1840
460k
            ptr++;
1841
460k
            RESET_CAPTURE_GROUP();
1842
460k
        }
1843
3.91M
    } else {
1844
        /* general case */
1845
211k
        assert(ptr <= end);
1846
211k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
211k
        state->start = state->ptr = ptr;
1848
211k
        status = SRE(match)(state, pattern, 1);
1849
211k
        state->must_advance = 0;
1850
211k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
211k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
49.5M
        while (status == 0 && ptr < end) {
1858
49.3M
            ptr++;
1859
49.3M
            RESET_CAPTURE_GROUP();
1860
49.3M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
49.3M
            state->start = state->ptr = ptr;
1862
49.3M
            status = SRE(match)(state, pattern, 0);
1863
49.3M
        }
1864
211k
    }
1865
1866
4.07M
    return status;
1867
4.13M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/