Coverage Report

Created: 2025-07-11 06:24

/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
12.0M
{
18
    /* check if pointer is at given position */
19
20
12.0M
    Py_ssize_t thisp, thatp;
21
22
12.0M
    switch (at) {
23
24
5.38M
    case SRE_AT_BEGINNING:
25
5.38M
    case SRE_AT_BEGINNING_STRING:
26
5.38M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
3.76M
    case SRE_AT_END:
33
3.76M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
3.76M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
3.76M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.89M
    case SRE_AT_END_STRING:
42
2.89M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
12.0M
    }
87
88
0
    return 0;
89
12.0M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
10.5M
{
18
    /* check if pointer is at given position */
19
20
10.5M
    Py_ssize_t thisp, thatp;
21
22
10.5M
    switch (at) {
23
24
5.34M
    case SRE_AT_BEGINNING:
25
5.34M
    case SRE_AT_BEGINNING_STRING:
26
5.34M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
3.31M
    case SRE_AT_END:
33
3.31M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
3.31M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
3.31M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.92M
    case SRE_AT_END_STRING:
42
1.92M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
10.5M
    }
87
88
0
    return 0;
89
10.5M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
865k
{
18
    /* check if pointer is at given position */
19
20
865k
    Py_ssize_t thisp, thatp;
21
22
865k
    switch (at) {
23
24
30.4k
    case SRE_AT_BEGINNING:
25
30.4k
    case SRE_AT_BEGINNING_STRING:
26
30.4k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
365k
    case SRE_AT_END:
33
365k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
365k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
365k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
468k
    case SRE_AT_END_STRING:
42
468k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
865k
    }
87
88
0
    return 0;
89
865k
}
sre.c:sre_ucs4_at
Line
Count
Source
17
582k
{
18
    /* check if pointer is at given position */
19
20
582k
    Py_ssize_t thisp, thatp;
21
22
582k
    switch (at) {
23
24
5.16k
    case SRE_AT_BEGINNING:
25
5.16k
    case SRE_AT_BEGINNING_STRING:
26
5.16k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
80.6k
    case SRE_AT_END:
33
80.6k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
80.6k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
80.6k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
496k
    case SRE_AT_END_STRING:
42
496k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
582k
    }
87
88
0
    return 0;
89
582k
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.57G
{
94
    /* check if character is a member of the given set */
95
96
1.57G
    int ok = 1;
97
98
3.65G
    for (;;) {
99
3.65G
        switch (*set++) {
100
101
1.05G
        case SRE_OP_FAILURE:
102
1.05G
            return !ok;
103
104
1.19G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.19G
            if (ch == set[0])
107
5.23M
                return ok;
108
1.19G
            set++;
109
1.19G
            break;
110
111
9.17M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
9.17M
            if (sre_category(set[0], (int) ch))
114
6.32M
                return ok;
115
2.84M
            set++;
116
2.84M
            break;
117
118
712M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
712M
            if (ch < 256 &&
121
712M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
324M
                return ok;
123
387M
            set += 256/SRE_CODE_BITS;
124
387M
            break;
125
126
319M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
319M
            if (set[0] <= ch && ch <= set[1])
129
189M
                return ok;
130
129M
            set += 2;
131
129M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
363M
        case SRE_OP_NEGATE:
148
363M
            ok = !ok;
149
363M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.65G
        }
175
3.65G
    }
176
1.57G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
349M
{
94
    /* check if character is a member of the given set */
95
96
349M
    int ok = 1;
97
98
768M
    for (;;) {
99
768M
        switch (*set++) {
100
101
211M
        case SRE_OP_FAILURE:
102
211M
            return !ok;
103
104
286M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
286M
            if (ch == set[0])
107
2.60M
                return ok;
108
284M
            set++;
109
284M
            break;
110
111
8.05M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
8.05M
            if (sre_category(set[0], (int) ch))
114
5.24M
                return ok;
115
2.80M
            set++;
116
2.80M
            break;
117
118
74.0M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
74.0M
            if (ch < 256 &&
121
74.0M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
35.3M
                return ok;
123
38.6M
            set += 256/SRE_CODE_BITS;
124
38.6M
            break;
125
126
151M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
151M
            if (set[0] <= ch && ch <= set[1])
129
94.7M
                return ok;
130
56.7M
            set += 2;
131
56.7M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
37.2M
        case SRE_OP_NEGATE:
148
37.2M
            ok = !ok;
149
37.2M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
768M
        }
175
768M
    }
176
349M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
692M
{
94
    /* check if character is a member of the given set */
95
96
692M
    int ok = 1;
97
98
1.68G
    for (;;) {
99
1.68G
        switch (*set++) {
100
101
503M
        case SRE_OP_FAILURE:
102
503M
            return !ok;
103
104
625M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
625M
            if (ch == set[0])
107
1.43M
                return ok;
108
623M
            set++;
109
623M
            break;
110
111
171k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
171k
            if (sre_category(set[0], (int) ch))
114
150k
                return ok;
115
21.2k
            set++;
116
21.2k
            break;
117
118
262M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
262M
            if (ch < 256 &&
121
262M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
101M
                return ok;
123
160M
            set += 256/SRE_CODE_BITS;
124
160M
            break;
125
126
147M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
147M
            if (set[0] <= ch && ch <= set[1])
129
85.5M
                return ok;
130
62.1M
            set += 2;
131
62.1M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
147M
        case SRE_OP_NEGATE:
148
147M
            ok = !ok;
149
147M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.68G
        }
175
1.68G
    }
176
692M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
535M
{
94
    /* check if character is a member of the given set */
95
96
535M
    int ok = 1;
97
98
1.19G
    for (;;) {
99
1.19G
        switch (*set++) {
100
101
336M
        case SRE_OP_FAILURE:
102
336M
            return !ok;
103
104
286M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
286M
            if (ch == set[0])
107
1.19M
                return ok;
108
285M
            set++;
109
285M
            break;
110
111
950k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
950k
            if (sre_category(set[0], (int) ch))
114
928k
                return ok;
115
22.1k
            set++;
116
22.1k
            break;
117
118
375M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
375M
            if (ch < 256 &&
121
375M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
187M
                return ok;
123
188M
            set += 256/SRE_CODE_BITS;
124
188M
            break;
125
126
20.2M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
20.2M
            if (set[0] <= ch && ch <= set[1])
129
9.65M
                return ok;
130
10.5M
            set += 2;
131
10.5M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
178M
        case SRE_OP_NEGATE:
148
178M
            ok = !ok;
149
178M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.19G
        }
175
1.19G
    }
176
535M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
590M
{
195
590M
    SRE_CODE chr;
196
590M
    SRE_CHAR c;
197
590M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
590M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
590M
    Py_ssize_t i;
200
590M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
590M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
11.3M
        end = ptr + maxcount;
205
206
590M
    switch (pattern[0]) {
207
208
533M
    case SRE_OP_IN:
209
        /* repeated set */
210
533M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
853M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
320M
            ptr++;
213
533M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
50.1M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
50.1M
        chr = pattern[1];
232
50.1M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
50.1M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
47.8M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
47.8M
        else
238
47.8M
#endif
239
53.5M
        while (ptr < end && *ptr == c)
240
3.43M
            ptr++;
241
50.1M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
6.84M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
6.84M
        chr = pattern[1];
270
6.84M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
6.84M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
4.35M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
4.35M
        else
276
4.35M
#endif
277
27.9M
        while (ptr < end && *ptr != c)
278
21.1M
            ptr++;
279
6.84M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
590M
    }
319
320
590M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
590M
           ptr - (SRE_CHAR*) state->ptr));
322
590M
    return ptr - (SRE_CHAR*) state->ptr;
323
590M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
175M
{
195
175M
    SRE_CODE chr;
196
175M
    SRE_CHAR c;
197
175M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
175M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
175M
    Py_ssize_t i;
200
175M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
175M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
2.44M
        end = ptr + maxcount;
205
206
175M
    switch (pattern[0]) {
207
208
133M
    case SRE_OP_IN:
209
        /* repeated set */
210
133M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
230M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
96.8M
            ptr++;
213
133M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
42.6M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
42.6M
        chr = pattern[1];
232
42.6M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
42.6M
        c = (SRE_CHAR) chr;
234
42.6M
#if SIZEOF_SRE_CHAR < 4
235
42.6M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
42.6M
        else
238
42.6M
#endif
239
44.2M
        while (ptr < end && *ptr == c)
240
1.63M
            ptr++;
241
42.6M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
171k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
171k
        chr = pattern[1];
270
171k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
171k
        c = (SRE_CHAR) chr;
272
171k
#if SIZEOF_SRE_CHAR < 4
273
171k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
171k
        else
276
171k
#endif
277
2.14M
        while (ptr < end && *ptr != c)
278
1.97M
            ptr++;
279
171k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
175M
    }
319
320
175M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
175M
           ptr - (SRE_CHAR*) state->ptr));
322
175M
    return ptr - (SRE_CHAR*) state->ptr;
323
175M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
249M
{
195
249M
    SRE_CODE chr;
196
249M
    SRE_CHAR c;
197
249M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
249M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
249M
    Py_ssize_t i;
200
249M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
249M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
4.19M
        end = ptr + maxcount;
205
206
249M
    switch (pattern[0]) {
207
208
240M
    case SRE_OP_IN:
209
        /* repeated set */
210
240M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
355M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
114M
            ptr++;
213
240M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
5.19M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
5.19M
        chr = pattern[1];
232
5.19M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
5.19M
        c = (SRE_CHAR) chr;
234
5.19M
#if SIZEOF_SRE_CHAR < 4
235
5.19M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
5.19M
        else
238
5.19M
#endif
239
6.77M
        while (ptr < end && *ptr == c)
240
1.58M
            ptr++;
241
5.19M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
4.18M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
4.18M
        chr = pattern[1];
270
4.18M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
4.18M
        c = (SRE_CHAR) chr;
272
4.18M
#if SIZEOF_SRE_CHAR < 4
273
4.18M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
4.18M
        else
276
4.18M
#endif
277
9.95M
        while (ptr < end && *ptr != c)
278
5.77M
            ptr++;
279
4.18M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
249M
    }
319
320
249M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
249M
           ptr - (SRE_CHAR*) state->ptr));
322
249M
    return ptr - (SRE_CHAR*) state->ptr;
323
249M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
164M
{
195
164M
    SRE_CODE chr;
196
164M
    SRE_CHAR c;
197
164M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
164M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
164M
    Py_ssize_t i;
200
164M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
164M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
4.75M
        end = ptr + maxcount;
205
206
164M
    switch (pattern[0]) {
207
208
159M
    case SRE_OP_IN:
209
        /* repeated set */
210
159M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
268M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
109M
            ptr++;
213
159M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
2.34M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
2.34M
        chr = pattern[1];
232
2.34M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
2.34M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
2.55M
        while (ptr < end && *ptr == c)
240
213k
            ptr++;
241
2.34M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.49M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.49M
        chr = pattern[1];
270
2.49M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.49M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
15.8M
        while (ptr < end && *ptr != c)
278
13.3M
            ptr++;
279
2.49M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
164M
    }
319
320
164M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
164M
           ptr - (SRE_CHAR*) state->ptr));
322
164M
    return ptr - (SRE_CHAR*) state->ptr;
323
164M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
577M
    do { \
355
577M
        ctx->lastmark = state->lastmark; \
356
577M
        ctx->lastindex = state->lastindex; \
357
577M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
211M
    do { \
360
211M
        state->lastmark = ctx->lastmark; \
361
211M
        state->lastindex = ctx->lastindex; \
362
211M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
233M
    do { \
366
233M
        TRACE(("push last_ptr: %zd", \
367
233M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
233M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
233M
    } while (0)
370
#define LAST_PTR_POP()  \
371
233M
    do { \
372
233M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
233M
        TRACE(("pop last_ptr: %zd", \
374
233M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
233M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
624M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
848M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.30G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
132M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
88.9M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.47G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.47G
do { \
390
1.47G
    alloc_pos = state->data_stack_base; \
391
1.47G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.47G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.47G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
154M
        int j = data_stack_grow(state, sizeof(type)); \
395
154M
        if (j < 0) return j; \
396
154M
        if (ctx_pos != -1) \
397
154M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
154M
    } \
399
1.47G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.47G
    state->data_stack_base += sizeof(type); \
401
1.47G
} while (0)
402
403
1.51G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.51G
do { \
405
1.51G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.51G
    ptr = (type*)(state->data_stack+pos); \
407
1.51G
} while (0)
408
409
510M
#define DATA_STACK_PUSH(state, data, size) \
410
510M
do { \
411
510M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
510M
           data, state->data_stack_base, size)); \
413
510M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
44.1k
        int j = data_stack_grow(state, size); \
415
44.1k
        if (j < 0) return j; \
416
44.1k
        if (ctx_pos != -1) \
417
44.1k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
44.1k
    } \
419
510M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
510M
    state->data_stack_base += size; \
421
510M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
341M
#define DATA_STACK_POP(state, data, size, discard) \
427
341M
do { \
428
341M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
341M
           data, state->data_stack_base-size, size)); \
430
341M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
341M
    if (discard) \
432
341M
        state->data_stack_base -= size; \
433
341M
} while (0)
434
435
1.64G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.64G
do { \
437
1.64G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.64G
           state->data_stack_base-size, size)); \
439
1.64G
    state->data_stack_base -= size; \
440
1.64G
} while(0)
441
442
#define DATA_PUSH(x) \
443
233M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
233M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.47G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.47G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.51G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
472M
    do if (lastmark >= 0) { \
473
277M
        MARK_TRACE("push", (lastmark)); \
474
277M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
277M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
472M
    } while (0)
477
#define MARK_POP(lastmark) \
478
150M
    do if (lastmark >= 0) { \
479
106M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
106M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
106M
        MARK_TRACE("pop", (lastmark)); \
482
150M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.03M
    do if (lastmark >= 0) { \
485
2.03M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
2.03M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
2.03M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.03M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
321M
    do if (lastmark >= 0) { \
491
170M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
170M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
170M
        MARK_TRACE("pop discard", (lastmark)); \
494
321M
    } while (0)
495
496
547M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
233M
#define JUMP_MAX_UNTIL_2     2
499
132M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
131M
#define JUMP_REPEAT          7
504
8.13M
#define JUMP_REPEAT_ONE_1    8
505
164M
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
137M
#define JUMP_BRANCH          11
508
88.9M
#define JUMP_ASSERT          12
509
28.9M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
924M
    ctx->pattern = pattern; \
516
924M
    ctx->ptr = ptr; \
517
924M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
924M
    nextctx->pattern = nextpattern; \
519
924M
    nextctx->toplevel = toplevel_; \
520
924M
    nextctx->jump = jumpvalue; \
521
924M
    nextctx->last_ctx_pos = ctx_pos; \
522
924M
    pattern = nextpattern; \
523
924M
    ctx_pos = alloc_pos; \
524
924M
    ctx = nextctx; \
525
924M
    goto entrance; \
526
924M
    jumplabel: \
527
924M
    pattern = ctx->pattern; \
528
924M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
807M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
117M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.60G
    do {                                                           \
553
2.60G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.60G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.60G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.68G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.60G
        do {                               \
588
2.60G
            MAYBE_CHECK_SIGNALS;           \
589
2.60G
            goto *sre_targets[*pattern++]; \
590
2.60G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
547M
{
601
547M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
547M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
547M
    Py_ssize_t ret = 0;
604
547M
    int jump;
605
547M
    unsigned int sigcount = state->sigcount;
606
607
547M
    SRE(match_context)* ctx;
608
547M
    SRE(match_context)* nextctx;
609
547M
    INIT_TRACE(state);
610
611
547M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
547M
    DATA_ALLOC(SRE(match_context), ctx);
614
547M
    ctx->last_ctx_pos = -1;
615
547M
    ctx->jump = JUMP_NONE;
616
547M
    ctx->toplevel = toplevel;
617
547M
    ctx_pos = alloc_pos;
618
619
547M
#if USE_COMPUTED_GOTOS
620
547M
#include "sre_targets.h"
621
547M
#endif
622
623
1.47G
entrance:
624
625
1.47G
    ;  // Fashion statement.
626
1.47G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.47G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
75.0M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.98M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.98M
                   end - ptr, (size_t) pattern[3]));
634
4.98M
            RETURN_FAILURE;
635
4.98M
        }
636
70.1M
        pattern += pattern[1] + 1;
637
70.1M
    }
638
639
1.46G
#if USE_COMPUTED_GOTOS
640
1.46G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.46G
    {
647
648
1.46G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
568M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
568M
                   ptr, pattern[0]));
653
568M
            {
654
568M
                int i = pattern[0];
655
568M
                if (i & 1)
656
70.3M
                    state->lastindex = i/2 + 1;
657
568M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
561M
                    int j = state->lastmark + 1;
663
567M
                    while (j < i)
664
5.89M
                        state->mark[j++] = NULL;
665
561M
                    state->lastmark = i;
666
561M
                }
667
568M
                state->mark[i] = ptr;
668
568M
            }
669
568M
            pattern++;
670
568M
            DISPATCH;
671
672
568M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
203M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
203M
                   ptr, *pattern));
677
203M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
120M
                RETURN_FAILURE;
679
83.2M
            pattern++;
680
83.2M
            ptr++;
681
83.2M
            DISPATCH;
682
683
83.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
225M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
225M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
225M
            if (ctx->toplevel &&
698
225M
                ((state->match_all && ptr != state->end) ||
699
61.2M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
225M
            state->ptr = ptr;
704
225M
            RETURN_SUCCESS;
705
706
12.0M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
12.0M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
12.0M
            if (!SRE(at)(state, ptr, *pattern))
711
3.97M
                RETURN_FAILURE;
712
8.06M
            pattern++;
713
8.06M
            DISPATCH;
714
715
8.06M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
280M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
280M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
280M
            if (ptr >= end ||
749
280M
                !SRE(charset)(state, pattern + 1, *ptr))
750
5.37M
                RETURN_FAILURE;
751
274M
            pattern += pattern[0];
752
274M
            ptr++;
753
274M
            DISPATCH;
754
755
274M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.72M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.72M
                   pattern, ptr, pattern[0]));
758
4.72M
            if (ptr >= end ||
759
4.72M
                sre_lower_ascii(*ptr) != *pattern)
760
429k
                RETURN_FAILURE;
761
4.29M
            pattern++;
762
4.29M
            ptr++;
763
4.29M
            DISPATCH;
764
765
4.29M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
82.6M
        TARGET(SRE_OP_JUMP):
845
82.6M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
82.6M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
82.6M
                   ptr, pattern[0]));
850
82.6M
            pattern += pattern[0];
851
82.6M
            DISPATCH;
852
853
143M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
143M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
143M
            LASTMARK_SAVE();
858
143M
            if (state->repeat)
859
104M
                MARK_PUSH(ctx->lastmark);
860
347M
            for (; pattern[0]; pattern += pattern[0]) {
861
283M
                if (pattern[1] == SRE_OP_LITERAL &&
862
283M
                    (ptr >= end ||
863
135M
                     (SRE_CODE) *ptr != pattern[2]))
864
74.5M
                    continue;
865
208M
                if (pattern[1] == SRE_OP_IN &&
866
208M
                    (ptr >= end ||
867
98.9M
                     !SRE(charset)(state, pattern + 3,
868
98.9M
                                   (SRE_CODE) *ptr)))
869
70.8M
                    continue;
870
137M
                state->ptr = ptr;
871
137M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
137M
                if (ret) {
873
79.7M
                    if (state->repeat)
874
63.6M
                        MARK_POP_DISCARD(ctx->lastmark);
875
79.7M
                    RETURN_ON_ERROR(ret);
876
79.7M
                    RETURN_SUCCESS;
877
79.7M
                }
878
57.8M
                if (state->repeat)
879
15.7k
                    MARK_POP_KEEP(ctx->lastmark);
880
57.8M
                LASTMARK_RESTORE();
881
57.8M
            }
882
64.1M
            if (state->repeat)
883
41.1M
                MARK_POP_DISCARD(ctx->lastmark);
884
64.1M
            RETURN_FAILURE;
885
886
590M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
590M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
590M
                   pattern[1], pattern[2]));
898
899
590M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
898k
                RETURN_FAILURE; /* cannot match */
901
902
590M
            state->ptr = ptr;
903
904
590M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
590M
            RETURN_ON_ERROR(ret);
906
590M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
590M
            ctx->count = ret;
908
590M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
590M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
418M
                RETURN_FAILURE;
917
918
171M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
171M
                ptr == state->end &&
920
171M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
78.1k
            {
922
                /* tail is empty.  we're finished */
923
78.1k
                state->ptr = ptr;
924
78.1k
                RETURN_SUCCESS;
925
78.1k
            }
926
927
171M
            LASTMARK_SAVE();
928
171M
            if (state->repeat)
929
105M
                MARK_PUSH(ctx->lastmark);
930
931
171M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
9.47M
                ctx->u.chr = pattern[pattern[0]+1];
935
9.48M
                for (;;) {
936
26.9M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
26.9M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
17.4M
                        ptr--;
939
17.4M
                        ctx->count--;
940
17.4M
                    }
941
9.48M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.34M
                        break;
943
8.13M
                    state->ptr = ptr;
944
8.13M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
8.13M
                            pattern+pattern[0]);
946
8.13M
                    if (ret) {
947
8.12M
                        if (state->repeat)
948
6.84M
                            MARK_POP_DISCARD(ctx->lastmark);
949
8.12M
                        RETURN_ON_ERROR(ret);
950
8.12M
                        RETURN_SUCCESS;
951
8.12M
                    }
952
10.6k
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
10.6k
                    LASTMARK_RESTORE();
955
956
10.6k
                    ptr--;
957
10.6k
                    ctx->count--;
958
10.6k
                }
959
1.34M
                if (state->repeat)
960
502
                    MARK_POP_DISCARD(ctx->lastmark);
961
162M
            } else {
962
                /* general case */
963
166M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
164M
                    state->ptr = ptr;
965
164M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
164M
                            pattern+pattern[0]);
967
164M
                    if (ret) {
968
161M
                        if (state->repeat)
969
97.2M
                            MARK_POP_DISCARD(ctx->lastmark);
970
161M
                        RETURN_ON_ERROR(ret);
971
161M
                        RETURN_SUCCESS;
972
161M
                    }
973
3.77M
                    if (state->repeat)
974
2.02M
                        MARK_POP_KEEP(ctx->lastmark);
975
3.77M
                    LASTMARK_RESTORE();
976
977
3.77M
                    ptr--;
978
3.77M
                    ctx->count--;
979
3.77M
                }
980
1.19M
                if (state->repeat)
981
1.04M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.19M
            }
983
2.53M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
131M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
131M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
131M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
131M
            ctx->u.rep = repeat_pool_malloc(state);
1127
131M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
131M
            ctx->u.rep->count = -1;
1131
131M
            ctx->u.rep->pattern = pattern;
1132
131M
            ctx->u.rep->prev = state->repeat;
1133
131M
            ctx->u.rep->last_ptr = NULL;
1134
131M
            state->repeat = ctx->u.rep;
1135
1136
131M
            state->ptr = ptr;
1137
131M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
131M
            state->repeat = ctx->u.rep->prev;
1139
131M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
131M
            if (ret) {
1142
131M
                RETURN_ON_ERROR(ret);
1143
131M
                RETURN_SUCCESS;
1144
131M
            }
1145
77.5k
            RETURN_FAILURE;
1146
1147
244M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
244M
            ctx->u.rep = state->repeat;
1155
244M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
244M
            state->ptr = ptr;
1159
1160
244M
            ctx->count = ctx->u.rep->count+1;
1161
1162
244M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
244M
                   ptr, ctx->count));
1164
1165
244M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
244M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
244M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
244M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
233M
                ctx->u.rep->count = ctx->count;
1185
233M
                LASTMARK_SAVE();
1186
233M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
233M
                LAST_PTR_PUSH();
1189
233M
                ctx->u.rep->last_ptr = state->ptr;
1190
233M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
233M
                        ctx->u.rep->pattern+3);
1192
233M
                LAST_PTR_POP();
1193
233M
                if (ret) {
1194
111M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
111M
                    RETURN_ON_ERROR(ret);
1196
111M
                    RETURN_SUCCESS;
1197
111M
                }
1198
121M
                MARK_POP(ctx->lastmark);
1199
121M
                LASTMARK_RESTORE();
1200
121M
                ctx->u.rep->count = ctx->count-1;
1201
121M
                state->ptr = ptr;
1202
121M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
132M
            state->repeat = ctx->u.rep->prev;
1207
132M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
132M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
132M
            RETURN_ON_SUCCESS(ret);
1211
1.09M
            state->ptr = ptr;
1212
1.09M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
88.9M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
88.9M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
88.9M
                   ptr, pattern[1]));
1565
88.9M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
88.9M
            state->ptr = ptr - pattern[1];
1568
88.9M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
88.9M
            RETURN_ON_FAILURE(ret);
1570
85.9M
            pattern += pattern[0];
1571
85.9M
            DISPATCH;
1572
1573
85.9M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
28.9M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
28.9M
                   ptr, pattern[1]));
1578
28.9M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
28.9M
                state->ptr = ptr - pattern[1];
1580
28.9M
                LASTMARK_SAVE();
1581
28.9M
                if (state->repeat)
1582
28.9M
                    MARK_PUSH(ctx->lastmark);
1583
1584
57.8M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
57.8M
                if (ret) {
1586
10.9k
                    if (state->repeat)
1587
10.9k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
10.9k
                    RETURN_ON_ERROR(ret);
1589
10.9k
                    RETURN_FAILURE;
1590
10.9k
                }
1591
28.9M
                if (state->repeat)
1592
28.9M
                    MARK_POP(ctx->lastmark);
1593
28.9M
                LASTMARK_RESTORE();
1594
28.9M
            }
1595
28.9M
            pattern += pattern[0];
1596
28.9M
            DISPATCH;
1597
1598
28.9M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.47G
exit:
1620
1.47G
    ctx_pos = ctx->last_ctx_pos;
1621
1.47G
    jump = ctx->jump;
1622
1.47G
    DATA_POP_DISCARD(ctx);
1623
1.47G
    if (ctx_pos == -1) {
1624
547M
        state->sigcount = sigcount;
1625
547M
        return ret;
1626
547M
    }
1627
924M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
924M
    switch (jump) {
1630
233M
        case JUMP_MAX_UNTIL_2:
1631
233M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
233M
            goto jump_max_until_2;
1633
132M
        case JUMP_MAX_UNTIL_3:
1634
132M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
132M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
137M
        case JUMP_BRANCH:
1643
137M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
137M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
131M
        case JUMP_REPEAT:
1658
131M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
131M
            goto jump_repeat;
1660
8.13M
        case JUMP_REPEAT_ONE_1:
1661
8.13M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
8.13M
            goto jump_repeat_one_1;
1663
164M
        case JUMP_REPEAT_ONE_2:
1664
164M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
164M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
88.9M
        case JUMP_ASSERT:
1673
88.9M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
88.9M
            goto jump_assert;
1675
28.9M
        case JUMP_ASSERT_NOT:
1676
28.9M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
28.9M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
924M
    }
1683
1684
0
    return ret; /* should never get here */
1685
924M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
187M
{
601
187M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
187M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
187M
    Py_ssize_t ret = 0;
604
187M
    int jump;
605
187M
    unsigned int sigcount = state->sigcount;
606
607
187M
    SRE(match_context)* ctx;
608
187M
    SRE(match_context)* nextctx;
609
187M
    INIT_TRACE(state);
610
611
187M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
187M
    DATA_ALLOC(SRE(match_context), ctx);
614
187M
    ctx->last_ctx_pos = -1;
615
187M
    ctx->jump = JUMP_NONE;
616
187M
    ctx->toplevel = toplevel;
617
187M
    ctx_pos = alloc_pos;
618
619
187M
#if USE_COMPUTED_GOTOS
620
187M
#include "sre_targets.h"
621
187M
#endif
622
623
314M
entrance:
624
625
314M
    ;  // Fashion statement.
626
314M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
314M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
25.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.97M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.97M
                   end - ptr, (size_t) pattern[3]));
634
4.97M
            RETURN_FAILURE;
635
4.97M
        }
636
20.9M
        pattern += pattern[1] + 1;
637
20.9M
    }
638
639
309M
#if USE_COMPUTED_GOTOS
640
309M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
309M
    {
647
648
309M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
161M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
161M
                   ptr, pattern[0]));
653
161M
            {
654
161M
                int i = pattern[0];
655
161M
                if (i & 1)
656
16.0M
                    state->lastindex = i/2 + 1;
657
161M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
158M
                    int j = state->lastmark + 1;
663
161M
                    while (j < i)
664
2.82M
                        state->mark[j++] = NULL;
665
158M
                    state->lastmark = i;
666
158M
                }
667
161M
                state->mark[i] = ptr;
668
161M
            }
669
161M
            pattern++;
670
161M
            DISPATCH;
671
672
161M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
49.1M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
49.1M
                   ptr, *pattern));
677
49.1M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
17.7M
                RETURN_FAILURE;
679
31.3M
            pattern++;
680
31.3M
            ptr++;
681
31.3M
            DISPATCH;
682
683
31.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
48.2M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
48.2M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
48.2M
            if (ctx->toplevel &&
698
48.2M
                ((state->match_all && ptr != state->end) ||
699
14.9M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
48.2M
            state->ptr = ptr;
704
48.2M
            RETURN_SUCCESS;
705
706
10.5M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
10.5M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
10.5M
            if (!SRE(at)(state, ptr, *pattern))
711
2.56M
                RETURN_FAILURE;
712
8.02M
            pattern++;
713
8.02M
            DISPATCH;
714
715
8.02M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
37.1M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
37.1M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
37.1M
            if (ptr >= end ||
749
37.1M
                !SRE(charset)(state, pattern + 1, *ptr))
750
335k
                RETURN_FAILURE;
751
36.8M
            pattern += pattern[0];
752
36.8M
            ptr++;
753
36.8M
            DISPATCH;
754
755
36.8M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.49M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.49M
                   pattern, ptr, pattern[0]));
758
1.49M
            if (ptr >= end ||
759
1.49M
                sre_lower_ascii(*ptr) != *pattern)
760
258k
                RETURN_FAILURE;
761
1.23M
            pattern++;
762
1.23M
            ptr++;
763
1.23M
            DISPATCH;
764
765
1.23M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
20.4M
        TARGET(SRE_OP_JUMP):
845
20.4M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
20.4M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
20.4M
                   ptr, pattern[0]));
850
20.4M
            pattern += pattern[0];
851
20.4M
            DISPATCH;
852
853
39.6M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
39.6M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
39.6M
            LASTMARK_SAVE();
858
39.6M
            if (state->repeat)
859
6.37M
                MARK_PUSH(ctx->lastmark);
860
119M
            for (; pattern[0]; pattern += pattern[0]) {
861
98.2M
                if (pattern[1] == SRE_OP_LITERAL &&
862
98.2M
                    (ptr >= end ||
863
48.3M
                     (SRE_CODE) *ptr != pattern[2]))
864
21.6M
                    continue;
865
76.5M
                if (pattern[1] == SRE_OP_IN &&
866
76.5M
                    (ptr >= end ||
867
7.50M
                     !SRE(charset)(state, pattern + 3,
868
7.50M
                                   (SRE_CODE) *ptr)))
869
4.33M
                    continue;
870
72.2M
                state->ptr = ptr;
871
72.2M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
72.2M
                if (ret) {
873
18.5M
                    if (state->repeat)
874
6.04M
                        MARK_POP_DISCARD(ctx->lastmark);
875
18.5M
                    RETURN_ON_ERROR(ret);
876
18.5M
                    RETURN_SUCCESS;
877
18.5M
                }
878
53.7M
                if (state->repeat)
879
5.28k
                    MARK_POP_KEEP(ctx->lastmark);
880
53.7M
                LASTMARK_RESTORE();
881
53.7M
            }
882
21.0M
            if (state->repeat)
883
330k
                MARK_POP_DISCARD(ctx->lastmark);
884
21.0M
            RETURN_FAILURE;
885
886
176M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
176M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
176M
                   pattern[1], pattern[2]));
898
899
176M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
697k
                RETURN_FAILURE; /* cannot match */
901
902
175M
            state->ptr = ptr;
903
904
175M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
175M
            RETURN_ON_ERROR(ret);
906
175M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
175M
            ctx->count = ret;
908
175M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
175M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
154M
                RETURN_FAILURE;
917
918
21.5M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
21.5M
                ptr == state->end &&
920
21.5M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
57.7k
            {
922
                /* tail is empty.  we're finished */
923
57.7k
                state->ptr = ptr;
924
57.7k
                RETURN_SUCCESS;
925
57.7k
            }
926
927
21.4M
            LASTMARK_SAVE();
928
21.4M
            if (state->repeat)
929
10.8M
                MARK_PUSH(ctx->lastmark);
930
931
21.4M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
2.70M
                ctx->u.chr = pattern[pattern[0]+1];
935
2.70M
                for (;;) {
936
13.8M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
13.8M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
11.1M
                        ptr--;
939
11.1M
                        ctx->count--;
940
11.1M
                    }
941
2.70M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.28M
                        break;
943
1.41M
                    state->ptr = ptr;
944
1.41M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
1.41M
                            pattern+pattern[0]);
946
1.41M
                    if (ret) {
947
1.41M
                        if (state->repeat)
948
171k
                            MARK_POP_DISCARD(ctx->lastmark);
949
1.41M
                        RETURN_ON_ERROR(ret);
950
1.41M
                        RETURN_SUCCESS;
951
1.41M
                    }
952
280
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
280
                    LASTMARK_RESTORE();
955
956
280
                    ptr--;
957
280
                    ctx->count--;
958
280
                }
959
1.28M
                if (state->repeat)
960
125
                    MARK_POP_DISCARD(ctx->lastmark);
961
18.7M
            } else {
962
                /* general case */
963
20.5M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
19.8M
                    state->ptr = ptr;
965
19.8M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
19.8M
                            pattern+pattern[0]);
967
19.8M
                    if (ret) {
968
18.0M
                        if (state->repeat)
969
10.1M
                            MARK_POP_DISCARD(ctx->lastmark);
970
18.0M
                        RETURN_ON_ERROR(ret);
971
18.0M
                        RETURN_SUCCESS;
972
18.0M
                    }
973
1.80M
                    if (state->repeat)
974
1.13M
                        MARK_POP_KEEP(ctx->lastmark);
975
1.80M
                    LASTMARK_RESTORE();
976
977
1.80M
                    ptr--;
978
1.80M
                    ctx->count--;
979
1.80M
                }
980
742k
                if (state->repeat)
981
601k
                    MARK_POP_DISCARD(ctx->lastmark);
982
742k
            }
983
2.03M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
6.14M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
6.14M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
6.14M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
6.14M
            ctx->u.rep = repeat_pool_malloc(state);
1127
6.14M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
6.14M
            ctx->u.rep->count = -1;
1131
6.14M
            ctx->u.rep->pattern = pattern;
1132
6.14M
            ctx->u.rep->prev = state->repeat;
1133
6.14M
            ctx->u.rep->last_ptr = NULL;
1134
6.14M
            state->repeat = ctx->u.rep;
1135
1136
6.14M
            state->ptr = ptr;
1137
6.14M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
6.14M
            state->repeat = ctx->u.rep->prev;
1139
6.14M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
6.14M
            if (ret) {
1142
6.06M
                RETURN_ON_ERROR(ret);
1143
6.06M
                RETURN_SUCCESS;
1144
6.06M
            }
1145
75.9k
            RETURN_FAILURE;
1146
1147
17.7M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
17.7M
            ctx->u.rep = state->repeat;
1155
17.7M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
17.7M
            state->ptr = ptr;
1159
1160
17.7M
            ctx->count = ctx->u.rep->count+1;
1161
1162
17.7M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
17.7M
                   ptr, ctx->count));
1164
1165
17.7M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
17.7M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
17.7M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
17.7M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
14.6M
                ctx->u.rep->count = ctx->count;
1185
14.6M
                LASTMARK_SAVE();
1186
14.6M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
14.6M
                LAST_PTR_PUSH();
1189
14.6M
                ctx->u.rep->last_ptr = state->ptr;
1190
14.6M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
14.6M
                        ctx->u.rep->pattern+3);
1192
14.6M
                LAST_PTR_POP();
1193
14.6M
                if (ret) {
1194
11.0M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
11.0M
                    RETURN_ON_ERROR(ret);
1196
11.0M
                    RETURN_SUCCESS;
1197
11.0M
                }
1198
3.58M
                MARK_POP(ctx->lastmark);
1199
3.58M
                LASTMARK_RESTORE();
1200
3.58M
                ctx->u.rep->count = ctx->count-1;
1201
3.58M
                state->ptr = ptr;
1202
3.58M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
6.71M
            state->repeat = ctx->u.rep->prev;
1207
6.71M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
6.71M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
6.71M
            RETURN_ON_SUCCESS(ret);
1211
646k
            state->ptr = ptr;
1212
646k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.10M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.10M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.10M
                   ptr, pattern[1]));
1565
3.10M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.10M
            state->ptr = ptr - pattern[1];
1568
3.10M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.10M
            RETURN_ON_FAILURE(ret);
1570
3.02M
            pattern += pattern[0];
1571
3.02M
            DISPATCH;
1572
1573
3.02M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
2.84M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
2.84M
                   ptr, pattern[1]));
1578
2.84M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
2.84M
                state->ptr = ptr - pattern[1];
1580
2.84M
                LASTMARK_SAVE();
1581
2.84M
                if (state->repeat)
1582
2.84M
                    MARK_PUSH(ctx->lastmark);
1583
1584
5.69M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
5.69M
                if (ret) {
1586
848
                    if (state->repeat)
1587
848
                        MARK_POP_DISCARD(ctx->lastmark);
1588
848
                    RETURN_ON_ERROR(ret);
1589
848
                    RETURN_FAILURE;
1590
848
                }
1591
2.84M
                if (state->repeat)
1592
2.84M
                    MARK_POP(ctx->lastmark);
1593
2.84M
                LASTMARK_RESTORE();
1594
2.84M
            }
1595
2.84M
            pattern += pattern[0];
1596
2.84M
            DISPATCH;
1597
1598
2.84M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
314M
exit:
1620
314M
    ctx_pos = ctx->last_ctx_pos;
1621
314M
    jump = ctx->jump;
1622
314M
    DATA_POP_DISCARD(ctx);
1623
314M
    if (ctx_pos == -1) {
1624
187M
        state->sigcount = sigcount;
1625
187M
        return ret;
1626
187M
    }
1627
126M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
126M
    switch (jump) {
1630
14.6M
        case JUMP_MAX_UNTIL_2:
1631
14.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
14.6M
            goto jump_max_until_2;
1633
6.71M
        case JUMP_MAX_UNTIL_3:
1634
6.71M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
6.71M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
72.2M
        case JUMP_BRANCH:
1643
72.2M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
72.2M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
6.14M
        case JUMP_REPEAT:
1658
6.14M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
6.14M
            goto jump_repeat;
1660
1.41M
        case JUMP_REPEAT_ONE_1:
1661
1.41M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
1.41M
            goto jump_repeat_one_1;
1663
19.8M
        case JUMP_REPEAT_ONE_2:
1664
19.8M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
19.8M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.10M
        case JUMP_ASSERT:
1673
3.10M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.10M
            goto jump_assert;
1675
2.84M
        case JUMP_ASSERT_NOT:
1676
2.84M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
2.84M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
126M
    }
1683
1684
0
    return ret; /* should never get here */
1685
126M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
252M
{
601
252M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
252M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
252M
    Py_ssize_t ret = 0;
604
252M
    int jump;
605
252M
    unsigned int sigcount = state->sigcount;
606
607
252M
    SRE(match_context)* ctx;
608
252M
    SRE(match_context)* nextctx;
609
252M
    INIT_TRACE(state);
610
611
252M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
252M
    DATA_ALLOC(SRE(match_context), ctx);
614
252M
    ctx->last_ctx_pos = -1;
615
252M
    ctx->jump = JUMP_NONE;
616
252M
    ctx->toplevel = toplevel;
617
252M
    ctx_pos = alloc_pos;
618
619
252M
#if USE_COMPUTED_GOTOS
620
252M
#include "sre_targets.h"
621
252M
#endif
622
623
557M
entrance:
624
625
557M
    ;  // Fashion statement.
626
557M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
557M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
21.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
665
            TRACE(("reject (got %tu chars, need %zu)\n",
633
665
                   end - ptr, (size_t) pattern[3]));
634
665
            RETURN_FAILURE;
635
665
        }
636
21.8M
        pattern += pattern[1] + 1;
637
21.8M
    }
638
639
557M
#if USE_COMPUTED_GOTOS
640
557M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
557M
    {
647
648
557M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
243M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
243M
                   ptr, pattern[0]));
653
243M
            {
654
243M
                int i = pattern[0];
655
243M
                if (i & 1)
656
21.8M
                    state->lastindex = i/2 + 1;
657
243M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
241M
                    int j = state->lastmark + 1;
663
243M
                    while (j < i)
664
1.86M
                        state->mark[j++] = NULL;
665
241M
                    state->lastmark = i;
666
241M
                }
667
243M
                state->mark[i] = ptr;
668
243M
            }
669
243M
            pattern++;
670
243M
            DISPATCH;
671
672
243M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
69.4M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
69.4M
                   ptr, *pattern));
677
69.4M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
38.8M
                RETURN_FAILURE;
679
30.6M
            pattern++;
680
30.6M
            ptr++;
681
30.6M
            DISPATCH;
682
683
30.6M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
90.1M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
90.1M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
90.1M
            if (ctx->toplevel &&
698
90.1M
                ((state->match_all && ptr != state->end) ||
699
19.5M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
90.1M
            state->ptr = ptr;
704
90.1M
            RETURN_SUCCESS;
705
706
865k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
865k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
865k
            if (!SRE(at)(state, ptr, *pattern))
711
833k
                RETURN_FAILURE;
712
31.5k
            pattern++;
713
31.5k
            DISPATCH;
714
715
31.5k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
111M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
111M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
111M
            if (ptr >= end ||
749
111M
                !SRE(charset)(state, pattern + 1, *ptr))
750
3.95M
                RETURN_FAILURE;
751
107M
            pattern += pattern[0];
752
107M
            ptr++;
753
107M
            DISPATCH;
754
755
107M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.70M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.70M
                   pattern, ptr, pattern[0]));
758
2.70M
            if (ptr >= end ||
759
2.70M
                sre_lower_ascii(*ptr) != *pattern)
760
154k
                RETURN_FAILURE;
761
2.54M
            pattern++;
762
2.54M
            ptr++;
763
2.54M
            DISPATCH;
764
765
2.54M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
25.2M
        TARGET(SRE_OP_JUMP):
845
25.2M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
25.2M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
25.2M
                   ptr, pattern[0]));
850
25.2M
            pattern += pattern[0];
851
25.2M
            DISPATCH;
852
853
41.0M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
41.0M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
41.0M
            LASTMARK_SAVE();
858
41.0M
            if (state->repeat)
859
38.2M
                MARK_PUSH(ctx->lastmark);
860
91.5M
            for (; pattern[0]; pattern += pattern[0]) {
861
75.3M
                if (pattern[1] == SRE_OP_LITERAL &&
862
75.3M
                    (ptr >= end ||
863
37.8M
                     (SRE_CODE) *ptr != pattern[2]))
864
18.8M
                    continue;
865
56.5M
                if (pattern[1] == SRE_OP_IN &&
866
56.5M
                    (ptr >= end ||
867
34.0M
                     !SRE(charset)(state, pattern + 3,
868
34.0M
                                   (SRE_CODE) *ptr)))
869
28.5M
                    continue;
870
27.9M
                state->ptr = ptr;
871
27.9M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
27.9M
                if (ret) {
873
24.7M
                    if (state->repeat)
874
23.4M
                        MARK_POP_DISCARD(ctx->lastmark);
875
24.7M
                    RETURN_ON_ERROR(ret);
876
24.7M
                    RETURN_SUCCESS;
877
24.7M
                }
878
3.19M
                if (state->repeat)
879
3.58k
                    MARK_POP_KEEP(ctx->lastmark);
880
3.19M
                LASTMARK_RESTORE();
881
3.19M
            }
882
16.2M
            if (state->repeat)
883
14.7M
                MARK_POP_DISCARD(ctx->lastmark);
884
16.2M
            RETURN_FAILURE;
885
886
250M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
250M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
250M
                   pattern[1], pattern[2]));
898
899
250M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
175k
                RETURN_FAILURE; /* cannot match */
901
902
249M
            state->ptr = ptr;
903
904
249M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
249M
            RETURN_ON_ERROR(ret);
906
249M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
249M
            ctx->count = ret;
908
249M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
249M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
189M
                RETURN_FAILURE;
917
918
60.3M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
60.3M
                ptr == state->end &&
920
60.3M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
16.6k
            {
922
                /* tail is empty.  we're finished */
923
16.6k
                state->ptr = ptr;
924
16.6k
                RETURN_SUCCESS;
925
16.6k
            }
926
927
60.3M
            LASTMARK_SAVE();
928
60.3M
            if (state->repeat)
929
36.1M
                MARK_PUSH(ctx->lastmark);
930
931
60.3M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
4.22M
                ctx->u.chr = pattern[pattern[0]+1];
935
4.23M
                for (;;) {
936
7.63M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
7.63M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
3.40M
                        ptr--;
939
3.40M
                        ctx->count--;
940
3.40M
                    }
941
4.23M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
22.9k
                        break;
943
4.20M
                    state->ptr = ptr;
944
4.20M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.20M
                            pattern+pattern[0]);
946
4.20M
                    if (ret) {
947
4.20M
                        if (state->repeat)
948
4.18M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.20M
                        RETURN_ON_ERROR(ret);
950
4.20M
                        RETURN_SUCCESS;
951
4.20M
                    }
952
4.99k
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
4.99k
                    LASTMARK_RESTORE();
955
956
4.99k
                    ptr--;
957
4.99k
                    ctx->count--;
958
4.99k
                }
959
22.9k
                if (state->repeat)
960
171
                    MARK_POP_DISCARD(ctx->lastmark);
961
56.1M
            } else {
962
                /* general case */
963
57.0M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
56.6M
                    state->ptr = ptr;
965
56.6M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
56.6M
                            pattern+pattern[0]);
967
56.6M
                    if (ret) {
968
55.7M
                        if (state->repeat)
969
31.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
55.7M
                        RETURN_ON_ERROR(ret);
971
55.7M
                        RETURN_SUCCESS;
972
55.7M
                    }
973
877k
                    if (state->repeat)
974
729k
                        MARK_POP_KEEP(ctx->lastmark);
975
877k
                    LASTMARK_RESTORE();
976
977
877k
                    ptr--;
978
877k
                    ctx->count--;
979
877k
                }
980
367k
                if (state->repeat)
981
364k
                    MARK_POP_DISCARD(ctx->lastmark);
982
367k
            }
983
390k
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
45.1M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
45.1M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
45.1M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
45.1M
            ctx->u.rep = repeat_pool_malloc(state);
1127
45.1M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
45.1M
            ctx->u.rep->count = -1;
1131
45.1M
            ctx->u.rep->pattern = pattern;
1132
45.1M
            ctx->u.rep->prev = state->repeat;
1133
45.1M
            ctx->u.rep->last_ptr = NULL;
1134
45.1M
            state->repeat = ctx->u.rep;
1135
1136
45.1M
            state->ptr = ptr;
1137
45.1M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
45.1M
            state->repeat = ctx->u.rep->prev;
1139
45.1M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
45.1M
            if (ret) {
1142
45.1M
                RETURN_ON_ERROR(ret);
1143
45.1M
                RETURN_SUCCESS;
1144
45.1M
            }
1145
938
            RETURN_FAILURE;
1146
1147
84.7M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
84.7M
            ctx->u.rep = state->repeat;
1155
84.7M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
84.7M
            state->ptr = ptr;
1159
1160
84.7M
            ctx->count = ctx->u.rep->count+1;
1161
1162
84.7M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
84.7M
                   ptr, ctx->count));
1164
1165
84.7M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
84.7M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
84.7M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
84.7M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
79.9M
                ctx->u.rep->count = ctx->count;
1185
79.9M
                LASTMARK_SAVE();
1186
79.9M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
79.9M
                LAST_PTR_PUSH();
1189
79.9M
                ctx->u.rep->last_ptr = state->ptr;
1190
79.9M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
79.9M
                        ctx->u.rep->pattern+3);
1192
79.9M
                LAST_PTR_POP();
1193
79.9M
                if (ret) {
1194
39.1M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
39.1M
                    RETURN_ON_ERROR(ret);
1196
39.1M
                    RETURN_SUCCESS;
1197
39.1M
                }
1198
40.8M
                MARK_POP(ctx->lastmark);
1199
40.8M
                LASTMARK_RESTORE();
1200
40.8M
                ctx->u.rep->count = ctx->count-1;
1201
40.8M
                state->ptr = ptr;
1202
40.8M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
45.5M
            state->repeat = ctx->u.rep->prev;
1207
45.5M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
45.5M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
45.5M
            RETURN_ON_SUCCESS(ret);
1211
365k
            state->ptr = ptr;
1212
365k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
31.8M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
31.8M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
31.8M
                   ptr, pattern[1]));
1565
31.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
31.8M
            state->ptr = ptr - pattern[1];
1568
31.8M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
31.8M
            RETURN_ON_FAILURE(ret);
1570
29.3M
            pattern += pattern[0];
1571
29.3M
            DISPATCH;
1572
1573
29.3M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
13.8M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
13.8M
                   ptr, pattern[1]));
1578
13.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
13.8M
                state->ptr = ptr - pattern[1];
1580
13.8M
                LASTMARK_SAVE();
1581
13.8M
                if (state->repeat)
1582
13.8M
                    MARK_PUSH(ctx->lastmark);
1583
1584
27.6M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
27.6M
                if (ret) {
1586
3.40k
                    if (state->repeat)
1587
3.40k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
3.40k
                    RETURN_ON_ERROR(ret);
1589
3.40k
                    RETURN_FAILURE;
1590
3.40k
                }
1591
13.8M
                if (state->repeat)
1592
13.8M
                    MARK_POP(ctx->lastmark);
1593
13.8M
                LASTMARK_RESTORE();
1594
13.8M
            }
1595
13.8M
            pattern += pattern[0];
1596
13.8M
            DISPATCH;
1597
1598
13.8M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
557M
exit:
1620
557M
    ctx_pos = ctx->last_ctx_pos;
1621
557M
    jump = ctx->jump;
1622
557M
    DATA_POP_DISCARD(ctx);
1623
557M
    if (ctx_pos == -1) {
1624
252M
        state->sigcount = sigcount;
1625
252M
        return ret;
1626
252M
    }
1627
305M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
305M
    switch (jump) {
1630
79.9M
        case JUMP_MAX_UNTIL_2:
1631
79.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
79.9M
            goto jump_max_until_2;
1633
45.5M
        case JUMP_MAX_UNTIL_3:
1634
45.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
45.5M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
27.9M
        case JUMP_BRANCH:
1643
27.9M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
27.9M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
45.1M
        case JUMP_REPEAT:
1658
45.1M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
45.1M
            goto jump_repeat;
1660
4.20M
        case JUMP_REPEAT_ONE_1:
1661
4.20M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.20M
            goto jump_repeat_one_1;
1663
56.6M
        case JUMP_REPEAT_ONE_2:
1664
56.6M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
56.6M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
31.8M
        case JUMP_ASSERT:
1673
31.8M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
31.8M
            goto jump_assert;
1675
13.8M
        case JUMP_ASSERT_NOT:
1676
13.8M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
13.8M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
305M
    }
1683
1684
0
    return ret; /* should never get here */
1685
305M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
108M
{
601
108M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
108M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
108M
    Py_ssize_t ret = 0;
604
108M
    int jump;
605
108M
    unsigned int sigcount = state->sigcount;
606
607
108M
    SRE(match_context)* ctx;
608
108M
    SRE(match_context)* nextctx;
609
108M
    INIT_TRACE(state);
610
611
108M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
108M
    DATA_ALLOC(SRE(match_context), ctx);
614
108M
    ctx->last_ctx_pos = -1;
615
108M
    ctx->jump = JUMP_NONE;
616
108M
    ctx->toplevel = toplevel;
617
108M
    ctx_pos = alloc_pos;
618
619
108M
#if USE_COMPUTED_GOTOS
620
108M
#include "sre_targets.h"
621
108M
#endif
622
623
600M
entrance:
624
625
600M
    ;  // Fashion statement.
626
600M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
600M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
27.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
395
            TRACE(("reject (got %tu chars, need %zu)\n",
633
395
                   end - ptr, (size_t) pattern[3]));
634
395
            RETURN_FAILURE;
635
395
        }
636
27.2M
        pattern += pattern[1] + 1;
637
27.2M
    }
638
639
600M
#if USE_COMPUTED_GOTOS
640
600M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
600M
    {
647
648
600M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
162M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
162M
                   ptr, pattern[0]));
653
162M
            {
654
162M
                int i = pattern[0];
655
162M
                if (i & 1)
656
32.5M
                    state->lastindex = i/2 + 1;
657
162M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
161M
                    int j = state->lastmark + 1;
663
162M
                    while (j < i)
664
1.20M
                        state->mark[j++] = NULL;
665
161M
                    state->lastmark = i;
666
161M
                }
667
162M
                state->mark[i] = ptr;
668
162M
            }
669
162M
            pattern++;
670
162M
            DISPATCH;
671
672
162M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
84.7M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
84.7M
                   ptr, *pattern));
677
84.7M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
63.4M
                RETURN_FAILURE;
679
21.2M
            pattern++;
680
21.2M
            ptr++;
681
21.2M
            DISPATCH;
682
683
21.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
86.6M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
86.6M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
86.6M
            if (ctx->toplevel &&
698
86.6M
                ((state->match_all && ptr != state->end) ||
699
26.8M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
86.6M
            state->ptr = ptr;
704
86.6M
            RETURN_SUCCESS;
705
706
582k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
582k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
582k
            if (!SRE(at)(state, ptr, *pattern))
711
576k
                RETURN_FAILURE;
712
5.52k
            pattern++;
713
5.52k
            DISPATCH;
714
715
5.52k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
132M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
132M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
132M
            if (ptr >= end ||
749
132M
                !SRE(charset)(state, pattern + 1, *ptr))
750
1.09M
                RETURN_FAILURE;
751
131M
            pattern += pattern[0];
752
131M
            ptr++;
753
131M
            DISPATCH;
754
755
131M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
532k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
532k
                   pattern, ptr, pattern[0]));
758
532k
            if (ptr >= end ||
759
532k
                sre_lower_ascii(*ptr) != *pattern)
760
15.5k
                RETURN_FAILURE;
761
517k
            pattern++;
762
517k
            ptr++;
763
517k
            DISPATCH;
764
765
517k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
36.8M
        TARGET(SRE_OP_JUMP):
845
36.8M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
36.8M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
36.8M
                   ptr, pattern[0]));
850
36.8M
            pattern += pattern[0];
851
36.8M
            DISPATCH;
852
853
63.2M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
63.2M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
63.2M
            LASTMARK_SAVE();
858
63.2M
            if (state->repeat)
859
60.2M
                MARK_PUSH(ctx->lastmark);
860
136M
            for (; pattern[0]; pattern += pattern[0]) {
861
109M
                if (pattern[1] == SRE_OP_LITERAL &&
862
109M
                    (ptr >= end ||
863
49.4M
                     (SRE_CODE) *ptr != pattern[2]))
864
34.1M
                    continue;
865
75.3M
                if (pattern[1] == SRE_OP_IN &&
866
75.3M
                    (ptr >= end ||
867
57.4M
                     !SRE(charset)(state, pattern + 3,
868
57.4M
                                   (SRE_CODE) *ptr)))
869
38.0M
                    continue;
870
37.3M
                state->ptr = ptr;
871
37.3M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
37.3M
                if (ret) {
873
36.3M
                    if (state->repeat)
874
34.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
36.3M
                    RETURN_ON_ERROR(ret);
876
36.3M
                    RETURN_SUCCESS;
877
36.3M
                }
878
995k
                if (state->repeat)
879
6.91k
                    MARK_POP_KEEP(ctx->lastmark);
880
995k
                LASTMARK_RESTORE();
881
995k
            }
882
26.8M
            if (state->repeat)
883
26.0M
                MARK_POP_DISCARD(ctx->lastmark);
884
26.8M
            RETURN_FAILURE;
885
886
164M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
164M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
164M
                   pattern[1], pattern[2]));
898
899
164M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
25.9k
                RETURN_FAILURE; /* cannot match */
901
902
164M
            state->ptr = ptr;
903
904
164M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
164M
            RETURN_ON_ERROR(ret);
906
164M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
164M
            ctx->count = ret;
908
164M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
164M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
74.2M
                RETURN_FAILURE;
917
918
89.8M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
89.8M
                ptr == state->end &&
920
89.8M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.86k
            {
922
                /* tail is empty.  we're finished */
923
3.86k
                state->ptr = ptr;
924
3.86k
                RETURN_SUCCESS;
925
3.86k
            }
926
927
89.8M
            LASTMARK_SAVE();
928
89.8M
            if (state->repeat)
929
58.1M
                MARK_PUSH(ctx->lastmark);
930
931
89.8M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
2.54M
                ctx->u.chr = pattern[pattern[0]+1];
935
2.54M
                for (;;) {
936
5.40M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
5.40M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
2.86M
                        ptr--;
939
2.86M
                        ctx->count--;
940
2.86M
                    }
941
2.54M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
31.7k
                        break;
943
2.51M
                    state->ptr = ptr;
944
2.51M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.51M
                            pattern+pattern[0]);
946
2.51M
                    if (ret) {
947
2.50M
                        if (state->repeat)
948
2.49M
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.50M
                        RETURN_ON_ERROR(ret);
950
2.50M
                        RETURN_SUCCESS;
951
2.50M
                    }
952
5.34k
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
5.34k
                    LASTMARK_RESTORE();
955
956
5.34k
                    ptr--;
957
5.34k
                    ctx->count--;
958
5.34k
                }
959
31.7k
                if (state->repeat)
960
206
                    MARK_POP_DISCARD(ctx->lastmark);
961
87.3M
            } else {
962
                /* general case */
963
88.4M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
88.3M
                    state->ptr = ptr;
965
88.3M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
88.3M
                            pattern+pattern[0]);
967
88.3M
                    if (ret) {
968
87.2M
                        if (state->repeat)
969
55.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
87.2M
                        RETURN_ON_ERROR(ret);
971
87.2M
                        RETURN_SUCCESS;
972
87.2M
                    }
973
1.08M
                    if (state->repeat)
974
160k
                        MARK_POP_KEEP(ctx->lastmark);
975
1.08M
                    LASTMARK_RESTORE();
976
977
1.08M
                    ptr--;
978
1.08M
                    ctx->count--;
979
1.08M
                }
980
80.7k
                if (state->repeat)
981
80.1k
                    MARK_POP_DISCARD(ctx->lastmark);
982
80.7k
            }
983
112k
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
79.7M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
79.7M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
79.7M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
79.7M
            ctx->u.rep = repeat_pool_malloc(state);
1127
79.7M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
79.7M
            ctx->u.rep->count = -1;
1131
79.7M
            ctx->u.rep->pattern = pattern;
1132
79.7M
            ctx->u.rep->prev = state->repeat;
1133
79.7M
            ctx->u.rep->last_ptr = NULL;
1134
79.7M
            state->repeat = ctx->u.rep;
1135
1136
79.7M
            state->ptr = ptr;
1137
79.7M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
79.7M
            state->repeat = ctx->u.rep->prev;
1139
79.7M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
79.7M
            if (ret) {
1142
79.7M
                RETURN_ON_ERROR(ret);
1143
79.7M
                RETURN_SUCCESS;
1144
79.7M
            }
1145
609
            RETURN_FAILURE;
1146
1147
141M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
141M
            ctx->u.rep = state->repeat;
1155
141M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
141M
            state->ptr = ptr;
1159
1160
141M
            ctx->count = ctx->u.rep->count+1;
1161
1162
141M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
141M
                   ptr, ctx->count));
1164
1165
141M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
141M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
141M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
141M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
138M
                ctx->u.rep->count = ctx->count;
1185
138M
                LASTMARK_SAVE();
1186
138M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
138M
                LAST_PTR_PUSH();
1189
138M
                ctx->u.rep->last_ptr = state->ptr;
1190
138M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
138M
                        ctx->u.rep->pattern+3);
1192
138M
                LAST_PTR_POP();
1193
138M
                if (ret) {
1194
61.7M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
61.7M
                    RETURN_ON_ERROR(ret);
1196
61.7M
                    RETURN_SUCCESS;
1197
61.7M
                }
1198
76.8M
                MARK_POP(ctx->lastmark);
1199
76.8M
                LASTMARK_RESTORE();
1200
76.8M
                ctx->u.rep->count = ctx->count-1;
1201
76.8M
                state->ptr = ptr;
1202
76.8M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
79.8M
            state->repeat = ctx->u.rep->prev;
1207
79.8M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
79.8M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
79.8M
            RETURN_ON_SUCCESS(ret);
1211
80.7k
            state->ptr = ptr;
1212
80.7k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
53.9M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
53.9M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
53.9M
                   ptr, pattern[1]));
1565
53.9M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
53.9M
            state->ptr = ptr - pattern[1];
1568
53.9M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
53.9M
            RETURN_ON_FAILURE(ret);
1570
53.5M
            pattern += pattern[0];
1571
53.5M
            DISPATCH;
1572
1573
53.5M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
12.2M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
12.2M
                   ptr, pattern[1]));
1578
12.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
12.2M
                state->ptr = ptr - pattern[1];
1580
12.2M
                LASTMARK_SAVE();
1581
12.2M
                if (state->repeat)
1582
12.2M
                    MARK_PUSH(ctx->lastmark);
1583
1584
24.4M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
24.4M
                if (ret) {
1586
6.71k
                    if (state->repeat)
1587
6.71k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
6.71k
                    RETURN_ON_ERROR(ret);
1589
6.71k
                    RETURN_FAILURE;
1590
6.71k
                }
1591
12.2M
                if (state->repeat)
1592
12.2M
                    MARK_POP(ctx->lastmark);
1593
12.2M
                LASTMARK_RESTORE();
1594
12.2M
            }
1595
12.2M
            pattern += pattern[0];
1596
12.2M
            DISPATCH;
1597
1598
12.2M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
600M
exit:
1620
600M
    ctx_pos = ctx->last_ctx_pos;
1621
600M
    jump = ctx->jump;
1622
600M
    DATA_POP_DISCARD(ctx);
1623
600M
    if (ctx_pos == -1) {
1624
108M
        state->sigcount = sigcount;
1625
108M
        return ret;
1626
108M
    }
1627
492M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
492M
    switch (jump) {
1630
138M
        case JUMP_MAX_UNTIL_2:
1631
138M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
138M
            goto jump_max_until_2;
1633
79.8M
        case JUMP_MAX_UNTIL_3:
1634
79.8M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
79.8M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
37.3M
        case JUMP_BRANCH:
1643
37.3M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
37.3M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
79.7M
        case JUMP_REPEAT:
1658
79.7M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
79.7M
            goto jump_repeat;
1660
2.51M
        case JUMP_REPEAT_ONE_1:
1661
2.51M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.51M
            goto jump_repeat_one_1;
1663
88.3M
        case JUMP_REPEAT_ONE_2:
1664
88.3M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
88.3M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
53.9M
        case JUMP_ASSERT:
1673
53.9M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
53.9M
            goto jump_assert;
1675
12.2M
        case JUMP_ASSERT_NOT:
1676
12.2M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
12.2M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
492M
    }
1683
1684
0
    return ret; /* should never get here */
1685
492M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
394M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
83.8M
{
1694
83.8M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
83.8M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
83.8M
    Py_ssize_t status = 0;
1697
83.8M
    Py_ssize_t prefix_len = 0;
1698
83.8M
    Py_ssize_t prefix_skip = 0;
1699
83.8M
    SRE_CODE* prefix = NULL;
1700
83.8M
    SRE_CODE* charset = NULL;
1701
83.8M
    SRE_CODE* overlap = NULL;
1702
83.8M
    int flags = 0;
1703
83.8M
    INIT_TRACE(state);
1704
1705
83.8M
    if (ptr > end)
1706
0
        return 0;
1707
1708
83.8M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
83.8M
        flags = pattern[2];
1713
1714
83.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.34M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.34M
                   end - ptr, (size_t) pattern[3]));
1717
1.34M
            return 0;
1718
1.34M
        }
1719
82.5M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
5.48M
            end -= pattern[3] - 1;
1723
5.48M
            if (end <= ptr)
1724
0
                end = ptr;
1725
5.48M
        }
1726
1727
82.5M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
5.49M
            prefix_len = pattern[5];
1731
5.49M
            prefix_skip = pattern[6];
1732
5.49M
            prefix = pattern + 7;
1733
5.49M
            overlap = prefix + prefix_len - 1;
1734
77.0M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
73.7M
            charset = pattern + 5;
1738
1739
82.5M
        pattern += 1 + pattern[1];
1740
82.5M
    }
1741
1742
82.5M
    TRACE(("prefix = %p %zd %zd\n",
1743
82.5M
           prefix, prefix_len, prefix_skip));
1744
82.5M
    TRACE(("charset = %p\n", charset));
1745
1746
82.5M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
5.11M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
2.86M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.86M
#endif
1753
2.86M
        end = (SRE_CHAR *)state->end;
1754
2.86M
        state->must_advance = 0;
1755
5.85M
        while (ptr < end) {
1756
94.9M
            while (*ptr != c) {
1757
89.4M
                if (++ptr >= end)
1758
342k
                    return 0;
1759
89.4M
            }
1760
5.51M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
5.51M
            state->start = ptr;
1762
5.51M
            state->ptr = ptr + prefix_skip;
1763
5.51M
            if (flags & SRE_INFO_LITERAL)
1764
9.64k
                return 1; /* we got all of it */
1765
5.50M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
5.50M
            if (status != 0)
1767
4.75M
                return status;
1768
746k
            ++ptr;
1769
746k
            RESET_CAPTURE_GROUP();
1770
746k
        }
1771
6.43k
        return 0;
1772
2.86M
    }
1773
1774
77.4M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
385k
        Py_ssize_t i = 0;
1778
1779
385k
        end = (SRE_CHAR *)state->end;
1780
385k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.07M
        for (i = 0; i < prefix_len; i++)
1784
713k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
356k
#endif
1787
1.19M
        while (ptr < end) {
1788
1.19M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
7.86M
            while (*ptr++ != c) {
1790
6.66M
                if (ptr >= end)
1791
276
                    return 0;
1792
6.66M
            }
1793
1.19M
            if (ptr >= end)
1794
42
                return 0;
1795
1796
1.19M
            i = 1;
1797
1.19M
            state->must_advance = 0;
1798
1.20M
            do {
1799
1.20M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
991k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
991k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
991k
                    state->start = ptr - (prefix_len - 1);
1808
991k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
991k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
991k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
991k
                    if (status != 0)
1813
385k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
606k
                    if (++ptr >= end)
1816
24
                        return 0;
1817
606k
                    RESET_CAPTURE_GROUP();
1818
606k
                }
1819
823k
                i = overlap[i];
1820
823k
            } while (i != 0);
1821
1.19M
        }
1822
0
        return 0;
1823
385k
    }
1824
1825
77.0M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
73.7M
        end = (SRE_CHAR *)state->end;
1828
73.7M
        state->must_advance = 0;
1829
76.6M
        for (;;) {
1830
350M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
273M
                ptr++;
1832
76.6M
            if (ptr >= end)
1833
3.64M
                return 0;
1834
73.0M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
73.0M
            state->start = ptr;
1836
73.0M
            state->ptr = ptr;
1837
73.0M
            status = SRE(match)(state, pattern, 0);
1838
73.0M
            if (status != 0)
1839
70.1M
                break;
1840
2.88M
            ptr++;
1841
2.88M
            RESET_CAPTURE_GROUP();
1842
2.88M
        }
1843
73.7M
    } else {
1844
        /* general case */
1845
3.24M
        assert(ptr <= end);
1846
3.24M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
3.24M
        state->start = state->ptr = ptr;
1848
3.24M
        status = SRE(match)(state, pattern, 1);
1849
3.24M
        state->must_advance = 0;
1850
3.24M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
3.24M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
393M
        while (status == 0 && ptr < end) {
1858
390M
            ptr++;
1859
390M
            RESET_CAPTURE_GROUP();
1860
390M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
390M
            state->start = state->ptr = ptr;
1862
390M
            status = SRE(match)(state, pattern, 0);
1863
390M
        }
1864
3.24M
    }
1865
1866
73.3M
    return status;
1867
77.0M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
34.7M
{
1694
34.7M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
34.7M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
34.7M
    Py_ssize_t status = 0;
1697
34.7M
    Py_ssize_t prefix_len = 0;
1698
34.7M
    Py_ssize_t prefix_skip = 0;
1699
34.7M
    SRE_CODE* prefix = NULL;
1700
34.7M
    SRE_CODE* charset = NULL;
1701
34.7M
    SRE_CODE* overlap = NULL;
1702
34.7M
    int flags = 0;
1703
34.7M
    INIT_TRACE(state);
1704
1705
34.7M
    if (ptr > end)
1706
0
        return 0;
1707
1708
34.7M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
34.7M
        flags = pattern[2];
1713
1714
34.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.23M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.23M
                   end - ptr, (size_t) pattern[3]));
1717
1.23M
            return 0;
1718
1.23M
        }
1719
33.5M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
1.52M
            end -= pattern[3] - 1;
1723
1.52M
            if (end <= ptr)
1724
0
                end = ptr;
1725
1.52M
        }
1726
1727
33.5M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
1.52M
            prefix_len = pattern[5];
1731
1.52M
            prefix_skip = pattern[6];
1732
1.52M
            prefix = pattern + 7;
1733
1.52M
            overlap = prefix + prefix_len - 1;
1734
31.9M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
29.8M
            charset = pattern + 5;
1738
1739
33.5M
        pattern += 1 + pattern[1];
1740
33.5M
    }
1741
1742
33.5M
    TRACE(("prefix = %p %zd %zd\n",
1743
33.5M
           prefix, prefix_len, prefix_skip));
1744
33.5M
    TRACE(("charset = %p\n", charset));
1745
1746
33.5M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.50M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.50M
#if SIZEOF_SRE_CHAR < 4
1750
1.50M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.50M
#endif
1753
1.50M
        end = (SRE_CHAR *)state->end;
1754
1.50M
        state->must_advance = 0;
1755
1.71M
        while (ptr < end) {
1756
23.4M
            while (*ptr != c) {
1757
22.0M
                if (++ptr >= end)
1758
261k
                    return 0;
1759
22.0M
            }
1760
1.44M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.44M
            state->start = ptr;
1762
1.44M
            state->ptr = ptr + prefix_skip;
1763
1.44M
            if (flags & SRE_INFO_LITERAL)
1764
417
                return 1; /* we got all of it */
1765
1.44M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.44M
            if (status != 0)
1767
1.24M
                return status;
1768
200k
            ++ptr;
1769
200k
            RESET_CAPTURE_GROUP();
1770
200k
        }
1771
3.76k
        return 0;
1772
1.50M
    }
1773
1774
32.0M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
13.0k
        Py_ssize_t i = 0;
1778
1779
13.0k
        end = (SRE_CHAR *)state->end;
1780
13.0k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
13.0k
#if SIZEOF_SRE_CHAR < 4
1783
39.1k
        for (i = 0; i < prefix_len; i++)
1784
26.0k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
13.0k
#endif
1787
414k
        while (ptr < end) {
1788
414k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.24M
            while (*ptr++ != c) {
1790
2.82M
                if (ptr >= end)
1791
60
                    return 0;
1792
2.82M
            }
1793
414k
            if (ptr >= end)
1794
18
                return 0;
1795
1796
414k
            i = 1;
1797
414k
            state->must_advance = 0;
1798
414k
            do {
1799
414k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
307k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
307k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
307k
                    state->start = ptr - (prefix_len - 1);
1808
307k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
307k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
307k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
307k
                    if (status != 0)
1813
12.9k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
294k
                    if (++ptr >= end)
1816
11
                        return 0;
1817
294k
                    RESET_CAPTURE_GROUP();
1818
294k
                }
1819
401k
                i = overlap[i];
1820
401k
            } while (i != 0);
1821
414k
        }
1822
0
        return 0;
1823
13.0k
    }
1824
1825
31.9M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
29.8M
        end = (SRE_CHAR *)state->end;
1828
29.8M
        state->must_advance = 0;
1829
31.7M
        for (;;) {
1830
79.7M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
47.9M
                ptr++;
1832
31.7M
            if (ptr >= end)
1833
2.53M
                return 0;
1834
29.2M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
29.2M
            state->start = ptr;
1836
29.2M
            state->ptr = ptr;
1837
29.2M
            status = SRE(match)(state, pattern, 0);
1838
29.2M
            if (status != 0)
1839
27.3M
                break;
1840
1.92M
            ptr++;
1841
1.92M
            RESET_CAPTURE_GROUP();
1842
1.92M
        }
1843
29.8M
    } else {
1844
        /* general case */
1845
2.12M
        assert(ptr <= end);
1846
2.12M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.12M
        state->start = state->ptr = ptr;
1848
2.12M
        status = SRE(match)(state, pattern, 1);
1849
2.12M
        state->must_advance = 0;
1850
2.12M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
2.12M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
130M
        while (status == 0 && ptr < end) {
1858
128M
            ptr++;
1859
128M
            RESET_CAPTURE_GROUP();
1860
128M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
128M
            state->start = state->ptr = ptr;
1862
128M
            status = SRE(match)(state, pattern, 0);
1863
128M
        }
1864
2.12M
    }
1865
1866
29.4M
    return status;
1867
31.9M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
42.7M
{
1694
42.7M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
42.7M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
42.7M
    Py_ssize_t status = 0;
1697
42.7M
    Py_ssize_t prefix_len = 0;
1698
42.7M
    Py_ssize_t prefix_skip = 0;
1699
42.7M
    SRE_CODE* prefix = NULL;
1700
42.7M
    SRE_CODE* charset = NULL;
1701
42.7M
    SRE_CODE* overlap = NULL;
1702
42.7M
    int flags = 0;
1703
42.7M
    INIT_TRACE(state);
1704
1705
42.7M
    if (ptr > end)
1706
0
        return 0;
1707
1708
42.7M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
42.7M
        flags = pattern[2];
1713
1714
42.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
109k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
109k
                   end - ptr, (size_t) pattern[3]));
1717
109k
            return 0;
1718
109k
        }
1719
42.6M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
1.69M
            end -= pattern[3] - 1;
1723
1.69M
            if (end <= ptr)
1724
0
                end = ptr;
1725
1.69M
        }
1726
1727
42.6M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
1.69M
            prefix_len = pattern[5];
1731
1.69M
            prefix_skip = pattern[6];
1732
1.69M
            prefix = pattern + 7;
1733
1.69M
            overlap = prefix + prefix_len - 1;
1734
40.9M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
39.9M
            charset = pattern + 5;
1738
1739
42.6M
        pattern += 1 + pattern[1];
1740
42.6M
    }
1741
1742
42.6M
    TRACE(("prefix = %p %zd %zd\n",
1743
42.6M
           prefix, prefix_len, prefix_skip));
1744
42.6M
    TRACE(("charset = %p\n", charset));
1745
1746
42.6M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.35M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.35M
#if SIZEOF_SRE_CHAR < 4
1750
1.35M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.35M
#endif
1753
1.35M
        end = (SRE_CHAR *)state->end;
1754
1.35M
        state->must_advance = 0;
1755
1.63M
        while (ptr < end) {
1756
47.8M
            while (*ptr != c) {
1757
46.2M
                if (++ptr >= end)
1758
75.7k
                    return 0;
1759
46.2M
            }
1760
1.55M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.55M
            state->start = ptr;
1762
1.55M
            state->ptr = ptr + prefix_skip;
1763
1.55M
            if (flags & SRE_INFO_LITERAL)
1764
6.45k
                return 1; /* we got all of it */
1765
1.55M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.55M
            if (status != 0)
1767
1.27M
                return status;
1768
280k
            ++ptr;
1769
280k
            RESET_CAPTURE_GROUP();
1770
280k
        }
1771
2.04k
        return 0;
1772
1.35M
    }
1773
1774
41.2M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
343k
        Py_ssize_t i = 0;
1778
1779
343k
        end = (SRE_CHAR *)state->end;
1780
343k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
343k
#if SIZEOF_SRE_CHAR < 4
1783
1.03M
        for (i = 0; i < prefix_len; i++)
1784
687k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
343k
#endif
1787
628k
        while (ptr < end) {
1788
628k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.55M
            while (*ptr++ != c) {
1790
1.92M
                if (ptr >= end)
1791
110
                    return 0;
1792
1.92M
            }
1793
628k
            if (ptr >= end)
1794
12
                return 0;
1795
1796
628k
            i = 1;
1797
628k
            state->must_advance = 0;
1798
629k
            do {
1799
629k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
555k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
555k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
555k
                    state->start = ptr - (prefix_len - 1);
1808
555k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
555k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
555k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
555k
                    if (status != 0)
1813
343k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
212k
                    if (++ptr >= end)
1816
6
                        return 0;
1817
212k
                    RESET_CAPTURE_GROUP();
1818
212k
                }
1819
285k
                i = overlap[i];
1820
285k
            } while (i != 0);
1821
628k
        }
1822
0
        return 0;
1823
343k
    }
1824
1825
40.9M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
39.9M
        end = (SRE_CHAR *)state->end;
1828
39.9M
        state->must_advance = 0;
1829
40.4M
        for (;;) {
1830
193M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
153M
                ptr++;
1832
40.4M
            if (ptr >= end)
1833
1.05M
                return 0;
1834
39.3M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
39.3M
            state->start = ptr;
1836
39.3M
            state->ptr = ptr;
1837
39.3M
            status = SRE(match)(state, pattern, 0);
1838
39.3M
            if (status != 0)
1839
38.9M
                break;
1840
468k
            ptr++;
1841
468k
            RESET_CAPTURE_GROUP();
1842
468k
        }
1843
39.9M
    } else {
1844
        /* general case */
1845
915k
        assert(ptr <= end);
1846
915k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
915k
        state->start = state->ptr = ptr;
1848
915k
        status = SRE(match)(state, pattern, 1);
1849
915k
        state->must_advance = 0;
1850
915k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
915k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
188M
        while (status == 0 && ptr < end) {
1858
187M
            ptr++;
1859
187M
            RESET_CAPTURE_GROUP();
1860
187M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
187M
            state->start = state->ptr = ptr;
1862
187M
            status = SRE(match)(state, pattern, 0);
1863
187M
        }
1864
915k
    }
1865
1866
39.8M
    return status;
1867
40.9M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
6.41M
{
1694
6.41M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
6.41M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
6.41M
    Py_ssize_t status = 0;
1697
6.41M
    Py_ssize_t prefix_len = 0;
1698
6.41M
    Py_ssize_t prefix_skip = 0;
1699
6.41M
    SRE_CODE* prefix = NULL;
1700
6.41M
    SRE_CODE* charset = NULL;
1701
6.41M
    SRE_CODE* overlap = NULL;
1702
6.41M
    int flags = 0;
1703
6.41M
    INIT_TRACE(state);
1704
1705
6.41M
    if (ptr > end)
1706
0
        return 0;
1707
1708
6.41M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
6.41M
        flags = pattern[2];
1713
1714
6.41M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.76k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.76k
                   end - ptr, (size_t) pattern[3]));
1717
6.76k
            return 0;
1718
6.76k
        }
1719
6.40M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.27M
            end -= pattern[3] - 1;
1723
2.27M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.27M
        }
1726
1727
6.40M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.27M
            prefix_len = pattern[5];
1731
2.27M
            prefix_skip = pattern[6];
1732
2.27M
            prefix = pattern + 7;
1733
2.27M
            overlap = prefix + prefix_len - 1;
1734
4.13M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.92M
            charset = pattern + 5;
1738
1739
6.40M
        pattern += 1 + pattern[1];
1740
6.40M
    }
1741
1742
6.40M
    TRACE(("prefix = %p %zd %zd\n",
1743
6.40M
           prefix, prefix_len, prefix_skip));
1744
6.40M
    TRACE(("charset = %p\n", charset));
1745
1746
6.40M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.24M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.24M
        end = (SRE_CHAR *)state->end;
1754
2.24M
        state->must_advance = 0;
1755
2.51M
        while (ptr < end) {
1756
23.6M
            while (*ptr != c) {
1757
21.1M
                if (++ptr >= end)
1758
5.33k
                    return 0;
1759
21.1M
            }
1760
2.50M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.50M
            state->start = ptr;
1762
2.50M
            state->ptr = ptr + prefix_skip;
1763
2.50M
            if (flags & SRE_INFO_LITERAL)
1764
2.76k
                return 1; /* we got all of it */
1765
2.50M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.50M
            if (status != 0)
1767
2.23M
                return status;
1768
265k
            ++ptr;
1769
265k
            RESET_CAPTURE_GROUP();
1770
265k
        }
1771
633
        return 0;
1772
2.24M
    }
1773
1774
4.16M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
28.5k
        Py_ssize_t i = 0;
1778
1779
28.5k
        end = (SRE_CHAR *)state->end;
1780
28.5k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
156k
        while (ptr < end) {
1788
156k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.06M
            while (*ptr++ != c) {
1790
1.91M
                if (ptr >= end)
1791
106
                    return 0;
1792
1.91M
            }
1793
156k
            if (ptr >= end)
1794
12
                return 0;
1795
1796
156k
            i = 1;
1797
156k
            state->must_advance = 0;
1798
164k
            do {
1799
164k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
127k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
127k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
127k
                    state->start = ptr - (prefix_len - 1);
1808
127k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
127k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
127k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
127k
                    if (status != 0)
1813
28.3k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
99.3k
                    if (++ptr >= end)
1816
7
                        return 0;
1817
99.3k
                    RESET_CAPTURE_GROUP();
1818
99.3k
                }
1819
136k
                i = overlap[i];
1820
136k
            } while (i != 0);
1821
156k
        }
1822
0
        return 0;
1823
28.5k
    }
1824
1825
4.13M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.92M
        end = (SRE_CHAR *)state->end;
1828
3.92M
        state->must_advance = 0;
1829
4.42M
        for (;;) {
1830
77.2M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
72.8M
                ptr++;
1832
4.42M
            if (ptr >= end)
1833
55.8k
                return 0;
1834
4.36M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
4.36M
            state->start = ptr;
1836
4.36M
            state->ptr = ptr;
1837
4.36M
            status = SRE(match)(state, pattern, 0);
1838
4.36M
            if (status != 0)
1839
3.87M
                break;
1840
495k
            ptr++;
1841
495k
            RESET_CAPTURE_GROUP();
1842
495k
        }
1843
3.92M
    } else {
1844
        /* general case */
1845
205k
        assert(ptr <= end);
1846
205k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
205k
        state->start = state->ptr = ptr;
1848
205k
        status = SRE(match)(state, pattern, 1);
1849
205k
        state->must_advance = 0;
1850
205k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
205k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
74.0M
        while (status == 0 && ptr < end) {
1858
73.8M
            ptr++;
1859
73.8M
            RESET_CAPTURE_GROUP();
1860
73.8M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
73.8M
            state->start = state->ptr = ptr;
1862
73.8M
            status = SRE(match)(state, pattern, 0);
1863
73.8M
        }
1864
205k
    }
1865
1866
4.07M
    return status;
1867
4.13M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/