Coverage Report

Created: 2025-12-14 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
73.9M
{
18
    /* check if pointer is at given position */
19
20
73.9M
    Py_ssize_t thisp, thatp;
21
22
73.9M
    switch (at) {
23
24
10.4M
    case SRE_AT_BEGINNING:
25
10.4M
    case SRE_AT_BEGINNING_STRING:
26
10.4M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
60.0M
    case SRE_AT_END:
33
60.0M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
882k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
60.0M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
3.39M
    case SRE_AT_END_STRING:
42
3.39M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
73.9M
    }
87
88
0
    return 0;
89
73.9M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
27.6M
{
18
    /* check if pointer is at given position */
19
20
27.6M
    Py_ssize_t thisp, thatp;
21
22
27.6M
    switch (at) {
23
24
9.66M
    case SRE_AT_BEGINNING:
25
9.66M
    case SRE_AT_BEGINNING_STRING:
26
9.66M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
16.5M
    case SRE_AT_END:
33
16.5M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
358k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
16.5M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.44M
    case SRE_AT_END_STRING:
42
1.44M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
27.6M
    }
87
88
0
    return 0;
89
27.6M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
28.4M
{
18
    /* check if pointer is at given position */
19
20
28.4M
    Py_ssize_t thisp, thatp;
21
22
28.4M
    switch (at) {
23
24
784k
    case SRE_AT_BEGINNING:
25
784k
    case SRE_AT_BEGINNING_STRING:
26
784k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
26.7M
    case SRE_AT_END:
33
26.7M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
518k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
26.7M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
924k
    case SRE_AT_END_STRING:
42
924k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
28.4M
    }
87
88
0
    return 0;
89
28.4M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
17.7M
{
18
    /* check if pointer is at given position */
19
20
17.7M
    Py_ssize_t thisp, thatp;
21
22
17.7M
    switch (at) {
23
24
14.6k
    case SRE_AT_BEGINNING:
25
14.6k
    case SRE_AT_BEGINNING_STRING:
26
14.6k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
16.7M
    case SRE_AT_END:
33
16.7M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
6.06k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
16.7M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.03M
    case SRE_AT_END_STRING:
42
1.03M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
17.7M
    }
87
88
0
    return 0;
89
17.7M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.42G
{
94
    /* check if character is a member of the given set */
95
96
1.42G
    int ok = 1;
97
98
3.22G
    for (;;) {
99
3.22G
        switch (*set++) {
100
101
937M
        case SRE_OP_FAILURE:
102
937M
            return !ok;
103
104
1.10G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.10G
            if (ch == set[0])
107
8.46M
                return ok;
108
1.09G
            set++;
109
1.09G
            break;
110
111
76.6M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
76.6M
            if (sre_category(set[0], (int) ch))
114
50.5M
                return ok;
115
26.1M
            set++;
116
26.1M
            break;
117
118
498M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
498M
            if (ch < 256 &&
121
471M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
200M
                return ok;
123
297M
            set += 256/SRE_CODE_BITS;
124
297M
            break;
125
126
356M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
356M
            if (set[0] <= ch && ch <= set[1])
129
226M
                return ok;
130
130M
            set += 2;
131
130M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
256M
        case SRE_OP_NEGATE:
148
256M
            ok = !ok;
149
256M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.22G
        }
175
3.22G
    }
176
1.42G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
398M
{
94
    /* check if character is a member of the given set */
95
96
398M
    int ok = 1;
97
98
802M
    for (;;) {
99
802M
        switch (*set++) {
100
101
211M
        case SRE_OP_FAILURE:
102
211M
            return !ok;
103
104
236M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
236M
            if (ch == set[0])
107
4.98M
                return ok;
108
231M
            set++;
109
231M
            break;
110
111
31.1M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
31.1M
            if (sre_category(set[0], (int) ch))
114
20.1M
                return ok;
115
10.9M
            set++;
116
10.9M
            break;
117
118
95.7M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
95.7M
            if (ch < 256 &&
121
95.7M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
48.6M
                return ok;
123
47.0M
            set += 256/SRE_CODE_BITS;
124
47.0M
            break;
125
126
186M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
186M
            if (set[0] <= ch && ch <= set[1])
129
112M
                return ok;
130
73.5M
            set += 2;
131
73.5M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
41.0M
        case SRE_OP_NEGATE:
148
41.0M
            ok = !ok;
149
41.0M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
802M
        }
175
802M
    }
176
398M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
622M
{
94
    /* check if character is a member of the given set */
95
96
622M
    int ok = 1;
97
98
1.50G
    for (;;) {
99
1.50G
        switch (*set++) {
100
101
451M
        case SRE_OP_FAILURE:
102
451M
            return !ok;
103
104
587M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
587M
            if (ch == set[0])
107
1.73M
                return ok;
108
585M
            set++;
109
585M
            break;
110
111
28.2M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
28.2M
            if (sre_category(set[0], (int) ch))
114
15.2M
                return ok;
115
13.0M
            set++;
116
13.0M
            break;
117
118
176M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
176M
            if (ch < 256 &&
121
165M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
53.8M
                return ok;
123
122M
            set += 256/SRE_CODE_BITS;
124
122M
            break;
125
126
146M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
146M
            if (set[0] <= ch && ch <= set[1])
129
99.7M
                return ok;
130
47.1M
            set += 2;
131
47.1M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
110M
        case SRE_OP_NEGATE:
148
110M
            ok = !ok;
149
110M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.50G
        }
175
1.50G
    }
176
622M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
402M
{
94
    /* check if character is a member of the given set */
95
96
402M
    int ok = 1;
97
98
925M
    for (;;) {
99
925M
        switch (*set++) {
100
101
274M
        case SRE_OP_FAILURE:
102
274M
            return !ok;
103
104
279M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
279M
            if (ch == set[0])
107
1.74M
                return ok;
108
278M
            set++;
109
278M
            break;
110
111
17.2M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
17.2M
            if (sre_category(set[0], (int) ch))
114
15.1M
                return ok;
115
2.12M
            set++;
116
2.12M
            break;
117
118
226M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
226M
            if (ch < 256 &&
121
209M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
97.7M
                return ok;
123
128M
            set += 256/SRE_CODE_BITS;
124
128M
            break;
125
126
23.2M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
23.2M
            if (set[0] <= ch && ch <= set[1])
129
13.7M
                return ok;
130
9.50M
            set += 2;
131
9.50M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
104M
        case SRE_OP_NEGATE:
148
104M
            ok = !ok;
149
104M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
925M
        }
175
925M
    }
176
402M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
545M
{
195
545M
    SRE_CODE chr;
196
545M
    SRE_CHAR c;
197
545M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
545M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
545M
    Py_ssize_t i;
200
545M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
545M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
57.1M
        end = ptr + maxcount;
205
206
545M
    switch (pattern[0]) {
207
208
428M
    case SRE_OP_IN:
209
        /* repeated set */
210
428M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
799M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
371M
            ptr++;
213
428M
        break;
214
215
20.9M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
20.9M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
63.6M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
42.7M
            ptr++;
220
20.9M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
93.7M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
93.7M
        chr = pattern[1];
232
93.7M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
93.7M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
86.9M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
86.9M
        else
238
86.9M
#endif
239
98.7M
        while (ptr < end && *ptr == c)
240
5.01M
            ptr++;
241
93.7M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.45M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.45M
        chr = pattern[1];
270
2.45M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.45M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
1.30M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
1.30M
        else
276
1.30M
#endif
277
37.7M
        while (ptr < end && *ptr != c)
278
35.2M
            ptr++;
279
2.45M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
545M
    }
319
320
545M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
545M
           ptr - (SRE_CHAR*) state->ptr));
322
545M
    return ptr - (SRE_CHAR*) state->ptr;
323
545M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
188M
{
195
188M
    SRE_CODE chr;
196
188M
    SRE_CHAR c;
197
188M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
188M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
188M
    Py_ssize_t i;
200
188M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
188M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
19.5M
        end = ptr + maxcount;
205
206
188M
    switch (pattern[0]) {
207
208
109M
    case SRE_OP_IN:
209
        /* repeated set */
210
109M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
234M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
124M
            ptr++;
213
109M
        break;
214
215
6.39M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
6.39M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
19.0M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
12.6M
            ptr++;
220
6.39M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
71.9M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
71.9M
        chr = pattern[1];
232
71.9M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
71.9M
        c = (SRE_CHAR) chr;
234
71.9M
#if SIZEOF_SRE_CHAR < 4
235
71.9M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
71.9M
        else
238
71.9M
#endif
239
74.2M
        while (ptr < end && *ptr == c)
240
2.31M
            ptr++;
241
71.9M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
521k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
521k
        chr = pattern[1];
270
521k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
521k
        c = (SRE_CHAR) chr;
272
521k
#if SIZEOF_SRE_CHAR < 4
273
521k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
521k
        else
276
521k
#endif
277
7.14M
        while (ptr < end && *ptr != c)
278
6.62M
            ptr++;
279
521k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
188M
    }
319
320
188M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
188M
           ptr - (SRE_CHAR*) state->ptr));
322
188M
    return ptr - (SRE_CHAR*) state->ptr;
323
188M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
226M
{
195
226M
    SRE_CODE chr;
196
226M
    SRE_CHAR c;
197
226M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
226M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
226M
    Py_ssize_t i;
200
226M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
226M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
25.9M
        end = ptr + maxcount;
205
206
226M
    switch (pattern[0]) {
207
208
197M
    case SRE_OP_IN:
209
        /* repeated set */
210
197M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
326M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
128M
            ptr++;
213
197M
        break;
214
215
12.5M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
12.5M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
35.0M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
22.5M
            ptr++;
220
12.5M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
15.0M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
15.0M
        chr = pattern[1];
232
15.0M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
15.0M
        c = (SRE_CHAR) chr;
234
15.0M
#if SIZEOF_SRE_CHAR < 4
235
15.0M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
15.0M
        else
238
15.0M
#endif
239
17.3M
        while (ptr < end && *ptr == c)
240
2.29M
            ptr++;
241
15.0M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
785k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
785k
        chr = pattern[1];
270
785k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
785k
        c = (SRE_CHAR) chr;
272
785k
#if SIZEOF_SRE_CHAR < 4
273
785k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
785k
        else
276
785k
#endif
277
10.2M
        while (ptr < end && *ptr != c)
278
9.45M
            ptr++;
279
785k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
226M
    }
319
320
226M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
226M
           ptr - (SRE_CHAR*) state->ptr));
322
226M
    return ptr - (SRE_CHAR*) state->ptr;
323
226M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
130M
{
195
130M
    SRE_CODE chr;
196
130M
    SRE_CHAR c;
197
130M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
130M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
130M
    Py_ssize_t i;
200
130M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
130M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
11.6M
        end = ptr + maxcount;
205
206
130M
    switch (pattern[0]) {
207
208
120M
    case SRE_OP_IN:
209
        /* repeated set */
210
120M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
238M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
117M
            ptr++;
213
120M
        break;
214
215
1.98M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
1.98M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
9.55M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
7.57M
            ptr++;
220
1.98M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
6.73M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
6.73M
        chr = pattern[1];
232
6.73M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
6.73M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
7.13M
        while (ptr < end && *ptr == c)
240
408k
            ptr++;
241
6.73M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.14M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.14M
        chr = pattern[1];
270
1.14M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.14M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
20.3M
        while (ptr < end && *ptr != c)
278
19.1M
            ptr++;
279
1.14M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
130M
    }
319
320
130M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
130M
           ptr - (SRE_CHAR*) state->ptr));
322
130M
    return ptr - (SRE_CHAR*) state->ptr;
323
130M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
500M
    do { \
355
500M
        ctx->lastmark = state->lastmark; \
356
500M
        ctx->lastindex = state->lastindex; \
357
500M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
285M
    do { \
360
285M
        state->lastmark = ctx->lastmark; \
361
285M
        state->lastindex = ctx->lastindex; \
362
285M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
177M
    do { \
366
177M
        TRACE(("push last_ptr: %zd", \
367
177M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
177M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
177M
    } while (0)
370
#define LAST_PTR_POP()  \
371
177M
    do { \
372
177M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
177M
        TRACE(("pop last_ptr: %zd", \
374
177M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
177M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
731M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
562M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.03G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
102M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
31.4M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.29G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.29G
do { \
390
1.29G
    alloc_pos = state->data_stack_base; \
391
1.29G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.29G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.29G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
154M
        int j = data_stack_grow(state, sizeof(type)); \
395
154M
        if (j < 0) return j; \
396
154M
        if (ctx_pos != -1) \
397
154M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
154M
    } \
399
1.29G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.29G
    state->data_stack_base += sizeof(type); \
401
1.29G
} while (0)
402
403
1.37G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.37G
do { \
405
1.37G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.37G
    ptr = (type*)(state->data_stack+pos); \
407
1.37G
} while (0)
408
409
435M
#define DATA_STACK_PUSH(state, data, size) \
410
435M
do { \
411
435M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
435M
           data, state->data_stack_base, size)); \
413
435M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
91.5k
        int j = data_stack_grow(state, size); \
415
91.5k
        if (j < 0) return j; \
416
91.5k
        if (ctx_pos != -1) \
417
91.5k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
91.5k
    } \
419
435M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
435M
    state->data_stack_base += size; \
421
435M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
272M
#define DATA_STACK_POP(state, data, size, discard) \
427
272M
do { \
428
272M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
272M
           data, state->data_stack_base-size, size)); \
430
272M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
272M
    if (discard) \
432
272M
        state->data_stack_base -= size; \
433
272M
} while (0)
434
435
1.45G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.45G
do { \
437
1.45G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.45G
           state->data_stack_base-size, size)); \
439
1.45G
    state->data_stack_base -= size; \
440
1.45G
} while(0)
441
442
#define DATA_PUSH(x) \
443
177M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
177M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.29G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.29G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.37G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
367M
    do if (lastmark >= 0) { \
473
258M
        MARK_TRACE("push", (lastmark)); \
474
258M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
258M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
367M
    } while (0)
477
#define MARK_POP(lastmark) \
478
110M
    do if (lastmark >= 0) { \
479
93.6M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
93.6M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
93.6M
        MARK_TRACE("pop", (lastmark)); \
482
110M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.18M
    do if (lastmark >= 0) { \
485
1.97M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.97M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.97M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.18M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
257M
    do if (lastmark >= 0) { \
491
164M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
164M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
164M
        MARK_TRACE("pop discard", (lastmark)); \
494
257M
    } while (0)
495
496
467M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
177M
#define JUMP_MAX_UNTIL_2     2
499
102M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
101M
#define JUMP_REPEAT          7
504
12.2M
#define JUMP_REPEAT_ONE_1    8
505
199M
#define JUMP_REPEAT_ONE_2    9
506
21.4M
#define JUMP_MIN_REPEAT_ONE  10
507
159M
#define JUMP_BRANCH          11
508
31.4M
#define JUMP_ASSERT          12
509
22.8M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
827M
    ctx->pattern = pattern; \
516
827M
    ctx->ptr = ptr; \
517
827M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
827M
    nextctx->pattern = nextpattern; \
519
827M
    nextctx->toplevel = toplevel_; \
520
827M
    nextctx->jump = jumpvalue; \
521
827M
    nextctx->last_ctx_pos = ctx_pos; \
522
827M
    pattern = nextpattern; \
523
827M
    ctx_pos = alloc_pos; \
524
827M
    ctx = nextctx; \
525
827M
    goto entrance; \
526
827M
    jumplabel: \
527
827M
    pattern = ctx->pattern; \
528
827M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
773M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
54.2M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.22G
    do {                                                           \
553
2.22G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.22G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.22G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.29G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.22G
        do {                               \
588
2.22G
            MAYBE_CHECK_SIGNALS;           \
589
2.22G
            goto *sre_targets[*pattern++]; \
590
2.22G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
467M
{
601
467M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
467M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
467M
    Py_ssize_t ret = 0;
604
467M
    int jump;
605
467M
    unsigned int sigcount = state->sigcount;
606
607
467M
    SRE(match_context)* ctx;
608
467M
    SRE(match_context)* nextctx;
609
467M
    INIT_TRACE(state);
610
611
467M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
467M
    DATA_ALLOC(SRE(match_context), ctx);
614
467M
    ctx->last_ctx_pos = -1;
615
467M
    ctx->jump = JUMP_NONE;
616
467M
    ctx->toplevel = toplevel;
617
467M
    ctx_pos = alloc_pos;
618
619
467M
#if USE_COMPUTED_GOTOS
620
467M
#include "sre_targets.h"
621
467M
#endif
622
623
1.29G
entrance:
624
625
1.29G
    ;  // Fashion statement.
626
1.29G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.29G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
62.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.93M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.93M
                   end - ptr, (size_t) pattern[3]));
634
4.93M
            RETURN_FAILURE;
635
4.93M
        }
636
57.6M
        pattern += pattern[1] + 1;
637
57.6M
    }
638
639
1.28G
#if USE_COMPUTED_GOTOS
640
1.28G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.28G
    {
647
648
1.28G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
527M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
527M
                   ptr, pattern[0]));
653
527M
            {
654
527M
                int i = pattern[0];
655
527M
                if (i & 1)
656
89.6M
                    state->lastindex = i/2 + 1;
657
527M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
521M
                    int j = state->lastmark + 1;
663
536M
                    while (j < i)
664
14.8M
                        state->mark[j++] = NULL;
665
521M
                    state->lastmark = i;
666
521M
                }
667
527M
                state->mark[i] = ptr;
668
527M
            }
669
527M
            pattern++;
670
527M
            DISPATCH;
671
672
527M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
136M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
136M
                   ptr, *pattern));
677
136M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
59.0M
                RETURN_FAILURE;
679
77.2M
            pattern++;
680
77.2M
            ptr++;
681
77.2M
            DISPATCH;
682
683
77.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
157M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
157M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
157M
            if (ctx->toplevel &&
698
45.5M
                ((state->match_all && ptr != state->end) ||
699
45.5M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
157M
            state->ptr = ptr;
704
157M
            RETURN_SUCCESS;
705
706
73.9M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
73.9M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
73.9M
            if (!SRE(at)(state, ptr, *pattern))
711
56.5M
                RETURN_FAILURE;
712
17.3M
            pattern++;
713
17.3M
            DISPATCH;
714
715
17.3M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
248M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
248M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
248M
            if (ptr >= end ||
749
247M
                !SRE(charset)(state, pattern + 1, *ptr))
750
67.2M
                RETURN_FAILURE;
751
180M
            pattern += pattern[0];
752
180M
            ptr++;
753
180M
            DISPATCH;
754
755
180M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
5.77M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
5.77M
                   pattern, ptr, pattern[0]));
758
5.77M
            if (ptr >= end ||
759
5.77M
                sre_lower_ascii(*ptr) != *pattern)
760
70.5k
                RETURN_FAILURE;
761
5.70M
            pattern++;
762
5.70M
            ptr++;
763
5.70M
            DISPATCH;
764
765
5.70M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
74.2M
        TARGET(SRE_OP_JUMP):
845
74.2M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
74.2M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
74.2M
                   ptr, pattern[0]));
850
74.2M
            pattern += pattern[0];
851
74.2M
            DISPATCH;
852
853
117M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
117M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
117M
            LASTMARK_SAVE();
858
117M
            if (state->repeat)
859
59.9M
                MARK_PUSH(ctx->lastmark);
860
297M
            for (; pattern[0]; pattern += pattern[0]) {
861
252M
                if (pattern[1] == SRE_OP_LITERAL &&
862
116M
                    (ptr >= end ||
863
116M
                     (SRE_CODE) *ptr != pattern[2]))
864
61.3M
                    continue;
865
190M
                if (pattern[1] == SRE_OP_IN &&
866
53.3M
                    (ptr >= end ||
867
53.2M
                     !SRE(charset)(state, pattern + 3,
868
53.2M
                                   (SRE_CODE) *ptr)))
869
31.3M
                    continue;
870
159M
                state->ptr = ptr;
871
159M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
159M
                if (ret) {
873
71.7M
                    if (state->repeat)
874
46.8M
                        MARK_POP_DISCARD(ctx->lastmark);
875
71.7M
                    RETURN_ON_ERROR(ret);
876
71.7M
                    RETURN_SUCCESS;
877
71.7M
                }
878
87.6M
                if (state->repeat)
879
16.4k
                    MARK_POP_KEEP(ctx->lastmark);
880
87.6M
                LASTMARK_RESTORE();
881
87.6M
            }
882
45.6M
            if (state->repeat)
883
13.1M
                MARK_POP_DISCARD(ctx->lastmark);
884
45.6M
            RETURN_FAILURE;
885
886
529M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
529M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
529M
                   pattern[1], pattern[2]));
898
899
529M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.19M
                RETURN_FAILURE; /* cannot match */
901
902
528M
            state->ptr = ptr;
903
904
528M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
528M
            RETURN_ON_ERROR(ret);
906
528M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
528M
            ctx->count = ret;
908
528M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
528M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
348M
                RETURN_FAILURE;
917
918
179M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.65M
                ptr == state->end &&
920
71.5k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
71.5k
            {
922
                /* tail is empty.  we're finished */
923
71.5k
                state->ptr = ptr;
924
71.5k
                RETURN_SUCCESS;
925
71.5k
            }
926
927
179M
            LASTMARK_SAVE();
928
179M
            if (state->repeat)
929
107M
                MARK_PUSH(ctx->lastmark);
930
931
179M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
30.8M
                ctx->u.chr = pattern[pattern[0]+1];
935
30.8M
                for (;;) {
936
73.1M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
54.5M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
42.2M
                        ptr--;
939
42.2M
                        ctx->count--;
940
42.2M
                    }
941
30.8M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
18.5M
                        break;
943
12.2M
                    state->ptr = ptr;
944
12.2M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
12.2M
                            pattern+pattern[0]);
946
12.2M
                    if (ret) {
947
12.2M
                        if (state->repeat)
948
10.9M
                            MARK_POP_DISCARD(ctx->lastmark);
949
12.2M
                        RETURN_ON_ERROR(ret);
950
12.2M
                        RETURN_SUCCESS;
951
12.2M
                    }
952
588
                    if (state->repeat)
953
588
                        MARK_POP_KEEP(ctx->lastmark);
954
588
                    LASTMARK_RESTORE();
955
956
588
                    ptr--;
957
588
                    ctx->count--;
958
588
                }
959
18.5M
                if (state->repeat)
960
17.3M
                    MARK_POP_DISCARD(ctx->lastmark);
961
148M
            } else {
962
                /* general case */
963
218M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
199M
                    state->ptr = ptr;
965
199M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
199M
                            pattern+pattern[0]);
967
199M
                    if (ret) {
968
128M
                        if (state->repeat)
969
78.0M
                            MARK_POP_DISCARD(ctx->lastmark);
970
128M
                        RETURN_ON_ERROR(ret);
971
128M
                        RETURN_SUCCESS;
972
128M
                    }
973
70.1M
                    if (state->repeat)
974
2.16M
                        MARK_POP_KEEP(ctx->lastmark);
975
70.1M
                    LASTMARK_RESTORE();
976
977
70.1M
                    ptr--;
978
70.1M
                    ctx->count--;
979
70.1M
                }
980
19.2M
                if (state->repeat)
981
1.30M
                    MARK_POP_DISCARD(ctx->lastmark);
982
19.2M
            }
983
37.7M
            RETURN_FAILURE;
984
985
3.89M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
3.89M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
3.89M
                   pattern[1], pattern[2]));
997
998
3.89M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
3.89M
            state->ptr = ptr;
1002
1003
3.89M
            if (pattern[1] == 0)
1004
3.89M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
3.89M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
3.89M
            } else {
1028
                /* general case */
1029
3.89M
                LASTMARK_SAVE();
1030
3.89M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
21.4M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
21.4M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
21.4M
                    state->ptr = ptr;
1036
21.4M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
21.4M
                            pattern+pattern[0]);
1038
21.4M
                    if (ret) {
1039
3.89M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
3.89M
                        RETURN_ON_ERROR(ret);
1042
3.89M
                        RETURN_SUCCESS;
1043
3.89M
                    }
1044
17.5M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
17.5M
                    LASTMARK_RESTORE();
1047
1048
17.5M
                    state->ptr = ptr;
1049
17.5M
                    ret = SRE(count)(state, pattern+3, 1);
1050
17.5M
                    RETURN_ON_ERROR(ret);
1051
17.5M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
17.5M
                    if (ret == 0)
1053
0
                        break;
1054
17.5M
                    assert(ret == 1);
1055
17.5M
                    ptr++;
1056
17.5M
                    ctx->count++;
1057
17.5M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
101M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
101M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
101M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
101M
            ctx->u.rep = repeat_pool_malloc(state);
1127
101M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
101M
            ctx->u.rep->count = -1;
1131
101M
            ctx->u.rep->pattern = pattern;
1132
101M
            ctx->u.rep->prev = state->repeat;
1133
101M
            ctx->u.rep->last_ptr = NULL;
1134
101M
            state->repeat = ctx->u.rep;
1135
1136
101M
            state->ptr = ptr;
1137
101M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
101M
            state->repeat = ctx->u.rep->prev;
1139
101M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
101M
            if (ret) {
1142
49.2M
                RETURN_ON_ERROR(ret);
1143
49.2M
                RETURN_SUCCESS;
1144
49.2M
            }
1145
52.2M
            RETURN_FAILURE;
1146
1147
192M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
192M
            ctx->u.rep = state->repeat;
1155
192M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
192M
            state->ptr = ptr;
1159
1160
192M
            ctx->count = ctx->u.rep->count+1;
1161
1162
192M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
192M
                   ptr, ctx->count));
1164
1165
192M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
192M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
15.0M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
177M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
177M
                ctx->u.rep->count = ctx->count;
1185
177M
                LASTMARK_SAVE();
1186
177M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
177M
                LAST_PTR_PUSH();
1189
177M
                ctx->u.rep->last_ptr = state->ptr;
1190
177M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
177M
                        ctx->u.rep->pattern+3);
1192
177M
                LAST_PTR_POP();
1193
177M
                if (ret) {
1194
89.7M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
89.7M
                    RETURN_ON_ERROR(ret);
1196
89.7M
                    RETURN_SUCCESS;
1197
89.7M
                }
1198
87.3M
                MARK_POP(ctx->lastmark);
1199
87.3M
                LASTMARK_RESTORE();
1200
87.3M
                ctx->u.rep->count = ctx->count-1;
1201
87.3M
                state->ptr = ptr;
1202
87.3M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
102M
            state->repeat = ctx->u.rep->prev;
1207
102M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
102M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
102M
            RETURN_ON_SUCCESS(ret);
1211
53.1M
            state->ptr = ptr;
1212
53.1M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
31.4M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
31.4M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
31.4M
                   ptr, pattern[1]));
1565
31.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
31.4M
            state->ptr = ptr - pattern[1];
1568
31.4M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
31.4M
            RETURN_ON_FAILURE(ret);
1570
26.4M
            pattern += pattern[0];
1571
26.4M
            DISPATCH;
1572
1573
26.4M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
22.8M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
22.8M
                   ptr, pattern[1]));
1578
22.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
22.8M
                state->ptr = ptr - pattern[1];
1580
22.8M
                LASTMARK_SAVE();
1581
22.8M
                if (state->repeat)
1582
22.8M
                    MARK_PUSH(ctx->lastmark);
1583
1584
45.6M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
45.6M
                if (ret) {
1586
10.6k
                    if (state->repeat)
1587
10.6k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
10.6k
                    RETURN_ON_ERROR(ret);
1589
10.6k
                    RETURN_FAILURE;
1590
10.6k
                }
1591
22.8M
                if (state->repeat)
1592
22.8M
                    MARK_POP(ctx->lastmark);
1593
22.8M
                LASTMARK_RESTORE();
1594
22.8M
            }
1595
22.8M
            pattern += pattern[0];
1596
22.8M
            DISPATCH;
1597
1598
22.8M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.29G
exit:
1620
1.29G
    ctx_pos = ctx->last_ctx_pos;
1621
1.29G
    jump = ctx->jump;
1622
1.29G
    DATA_POP_DISCARD(ctx);
1623
1.29G
    if (ctx_pos == -1) {
1624
467M
        state->sigcount = sigcount;
1625
467M
        return ret;
1626
467M
    }
1627
827M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
827M
    switch (jump) {
1630
177M
        case JUMP_MAX_UNTIL_2:
1631
177M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
177M
            goto jump_max_until_2;
1633
102M
        case JUMP_MAX_UNTIL_3:
1634
102M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
102M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
159M
        case JUMP_BRANCH:
1643
159M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
159M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
101M
        case JUMP_REPEAT:
1658
101M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
101M
            goto jump_repeat;
1660
12.2M
        case JUMP_REPEAT_ONE_1:
1661
12.2M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
12.2M
            goto jump_repeat_one_1;
1663
199M
        case JUMP_REPEAT_ONE_2:
1664
199M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
199M
            goto jump_repeat_one_2;
1666
21.4M
        case JUMP_MIN_REPEAT_ONE:
1667
21.4M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
21.4M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
31.4M
        case JUMP_ASSERT:
1673
31.4M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
31.4M
            goto jump_assert;
1675
22.8M
        case JUMP_ASSERT_NOT:
1676
22.8M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
22.8M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
827M
    }
1683
1684
0
    return ret; /* should never get here */
1685
827M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
170M
{
601
170M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
170M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
170M
    Py_ssize_t ret = 0;
604
170M
    int jump;
605
170M
    unsigned int sigcount = state->sigcount;
606
607
170M
    SRE(match_context)* ctx;
608
170M
    SRE(match_context)* nextctx;
609
170M
    INIT_TRACE(state);
610
611
170M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
170M
    DATA_ALLOC(SRE(match_context), ctx);
614
170M
    ctx->last_ctx_pos = -1;
615
170M
    ctx->jump = JUMP_NONE;
616
170M
    ctx->toplevel = toplevel;
617
170M
    ctx_pos = alloc_pos;
618
619
170M
#if USE_COMPUTED_GOTOS
620
170M
#include "sre_targets.h"
621
170M
#endif
622
623
444M
entrance:
624
625
444M
    ;  // Fashion statement.
626
444M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
444M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
36.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.78M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.78M
                   end - ptr, (size_t) pattern[3]));
634
4.78M
            RETURN_FAILURE;
635
4.78M
        }
636
31.7M
        pattern += pattern[1] + 1;
637
31.7M
    }
638
639
439M
#if USE_COMPUTED_GOTOS
640
439M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
439M
    {
647
648
439M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
177M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
177M
                   ptr, pattern[0]));
653
177M
            {
654
177M
                int i = pattern[0];
655
177M
                if (i & 1)
656
36.6M
                    state->lastindex = i/2 + 1;
657
177M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
174M
                    int j = state->lastmark + 1;
663
184M
                    while (j < i)
664
10.1M
                        state->mark[j++] = NULL;
665
174M
                    state->lastmark = i;
666
174M
                }
667
177M
                state->mark[i] = ptr;
668
177M
            }
669
177M
            pattern++;
670
177M
            DISPATCH;
671
672
177M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
80.3M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
80.3M
                   ptr, *pattern));
677
80.3M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
34.4M
                RETURN_FAILURE;
679
45.8M
            pattern++;
680
45.8M
            ptr++;
681
45.8M
            DISPATCH;
682
683
45.8M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
61.7M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
61.7M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
61.7M
            if (ctx->toplevel &&
698
24.6M
                ((state->match_all && ptr != state->end) ||
699
24.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
61.7M
            state->ptr = ptr;
704
61.7M
            RETURN_SUCCESS;
705
706
27.6M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
27.6M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
27.6M
            if (!SRE(at)(state, ptr, *pattern))
711
11.7M
                RETURN_FAILURE;
712
15.9M
            pattern++;
713
15.9M
            DISPATCH;
714
715
15.9M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
66.1M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
66.1M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
66.1M
            if (ptr >= end ||
749
65.7M
                !SRE(charset)(state, pattern + 1, *ptr))
750
11.1M
                RETURN_FAILURE;
751
54.9M
            pattern += pattern[0];
752
54.9M
            ptr++;
753
54.9M
            DISPATCH;
754
755
54.9M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
442k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
442k
                   pattern, ptr, pattern[0]));
758
442k
            if (ptr >= end ||
759
442k
                sre_lower_ascii(*ptr) != *pattern)
760
30.6k
                RETURN_FAILURE;
761
411k
            pattern++;
762
411k
            ptr++;
763
411k
            DISPATCH;
764
765
411k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
32.5M
        TARGET(SRE_OP_JUMP):
845
32.5M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
32.5M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
32.5M
                   ptr, pattern[0]));
850
32.5M
            pattern += pattern[0];
851
32.5M
            DISPATCH;
852
853
60.9M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
60.9M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
60.9M
            LASTMARK_SAVE();
858
60.9M
            if (state->repeat)
859
12.1M
                MARK_PUSH(ctx->lastmark);
860
174M
            for (; pattern[0]; pattern += pattern[0]) {
861
145M
                if (pattern[1] == SRE_OP_LITERAL &&
862
64.8M
                    (ptr >= end ||
863
64.7M
                     (SRE_CODE) *ptr != pattern[2]))
864
25.7M
                    continue;
865
119M
                if (pattern[1] == SRE_OP_IN &&
866
12.7M
                    (ptr >= end ||
867
12.7M
                     !SRE(charset)(state, pattern + 3,
868
12.7M
                                   (SRE_CODE) *ptr)))
869
6.88M
                    continue;
870
112M
                state->ptr = ptr;
871
112M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
112M
                if (ret) {
873
31.1M
                    if (state->repeat)
874
11.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
31.1M
                    RETURN_ON_ERROR(ret);
876
31.1M
                    RETURN_SUCCESS;
877
31.1M
                }
878
81.3M
                if (state->repeat)
879
6.41k
                    MARK_POP_KEEP(ctx->lastmark);
880
81.3M
                LASTMARK_RESTORE();
881
81.3M
            }
882
29.8M
            if (state->repeat)
883
413k
                MARK_POP_DISCARD(ctx->lastmark);
884
29.8M
            RETURN_FAILURE;
885
886
186M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
186M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
186M
                   pattern[1], pattern[2]));
898
899
186M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.00M
                RETURN_FAILURE; /* cannot match */
901
902
185M
            state->ptr = ptr;
903
904
185M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
185M
            RETURN_ON_ERROR(ret);
906
185M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
185M
            ctx->count = ret;
908
185M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
185M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
131M
                RETURN_FAILURE;
917
918
53.9M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
640k
                ptr == state->end &&
920
49.9k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
49.9k
            {
922
                /* tail is empty.  we're finished */
923
49.9k
                state->ptr = ptr;
924
49.9k
                RETURN_SUCCESS;
925
49.9k
            }
926
927
53.8M
            LASTMARK_SAVE();
928
53.8M
            if (state->repeat)
929
31.8M
                MARK_PUSH(ctx->lastmark);
930
931
53.8M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.69M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.69M
                for (;;) {
936
16.0M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
13.2M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
10.3M
                        ptr--;
939
10.3M
                        ctx->count--;
940
10.3M
                    }
941
5.69M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.87M
                        break;
943
2.82M
                    state->ptr = ptr;
944
2.82M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.82M
                            pattern+pattern[0]);
946
2.82M
                    if (ret) {
947
2.82M
                        if (state->repeat)
948
1.48M
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.82M
                        RETURN_ON_ERROR(ret);
950
2.82M
                        RETURN_SUCCESS;
951
2.82M
                    }
952
126
                    if (state->repeat)
953
126
                        MARK_POP_KEEP(ctx->lastmark);
954
126
                    LASTMARK_RESTORE();
955
956
126
                    ptr--;
957
126
                    ctx->count--;
958
126
                }
959
2.87M
                if (state->repeat)
960
1.62M
                    MARK_POP_DISCARD(ctx->lastmark);
961
48.1M
            } else {
962
                /* general case */
963
64.5M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
60.2M
                    state->ptr = ptr;
965
60.2M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
60.2M
                            pattern+pattern[0]);
967
60.2M
                    if (ret) {
968
43.8M
                        if (state->repeat)
969
27.9M
                            MARK_POP_DISCARD(ctx->lastmark);
970
43.8M
                        RETURN_ON_ERROR(ret);
971
43.8M
                        RETURN_SUCCESS;
972
43.8M
                    }
973
16.3M
                    if (state->repeat)
974
1.31M
                        MARK_POP_KEEP(ctx->lastmark);
975
16.3M
                    LASTMARK_RESTORE();
976
977
16.3M
                    ptr--;
978
16.3M
                    ctx->count--;
979
16.3M
                }
980
4.35M
                if (state->repeat)
981
786k
                    MARK_POP_DISCARD(ctx->lastmark);
982
4.35M
            }
983
7.22M
            RETURN_FAILURE;
984
985
3.25M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
3.25M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
3.25M
                   pattern[1], pattern[2]));
997
998
3.25M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
3.25M
            state->ptr = ptr;
1002
1003
3.25M
            if (pattern[1] == 0)
1004
3.25M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
3.25M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
3.25M
            } else {
1028
                /* general case */
1029
3.25M
                LASTMARK_SAVE();
1030
3.25M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
6.55M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
6.55M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
6.55M
                    state->ptr = ptr;
1036
6.55M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
6.55M
                            pattern+pattern[0]);
1038
6.55M
                    if (ret) {
1039
3.25M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
3.25M
                        RETURN_ON_ERROR(ret);
1042
3.25M
                        RETURN_SUCCESS;
1043
3.25M
                    }
1044
3.30M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
3.30M
                    LASTMARK_RESTORE();
1047
1048
3.30M
                    state->ptr = ptr;
1049
3.30M
                    ret = SRE(count)(state, pattern+3, 1);
1050
3.30M
                    RETURN_ON_ERROR(ret);
1051
3.30M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
3.30M
                    if (ret == 0)
1053
0
                        break;
1054
3.30M
                    assert(ret == 1);
1055
3.30M
                    ptr++;
1056
3.30M
                    ctx->count++;
1057
3.30M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
19.4M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
19.4M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
19.4M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
19.4M
            ctx->u.rep = repeat_pool_malloc(state);
1127
19.4M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
19.4M
            ctx->u.rep->count = -1;
1131
19.4M
            ctx->u.rep->pattern = pattern;
1132
19.4M
            ctx->u.rep->prev = state->repeat;
1133
19.4M
            ctx->u.rep->last_ptr = NULL;
1134
19.4M
            state->repeat = ctx->u.rep;
1135
1136
19.4M
            state->ptr = ptr;
1137
19.4M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
19.4M
            state->repeat = ctx->u.rep->prev;
1139
19.4M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
19.4M
            if (ret) {
1142
9.75M
                RETURN_ON_ERROR(ret);
1143
9.75M
                RETURN_SUCCESS;
1144
9.75M
            }
1145
9.72M
            RETURN_FAILURE;
1146
1147
50.7M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
50.7M
            ctx->u.rep = state->repeat;
1155
50.7M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
50.7M
            state->ptr = ptr;
1159
1160
50.7M
            ctx->count = ctx->u.rep->count+1;
1161
1162
50.7M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
50.7M
                   ptr, ctx->count));
1164
1165
50.7M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
50.7M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
6.97M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
43.7M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
43.7M
                ctx->u.rep->count = ctx->count;
1185
43.7M
                LASTMARK_SAVE();
1186
43.7M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
43.7M
                LAST_PTR_PUSH();
1189
43.7M
                ctx->u.rep->last_ptr = state->ptr;
1190
43.7M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
43.7M
                        ctx->u.rep->pattern+3);
1192
43.7M
                LAST_PTR_POP();
1193
43.7M
                if (ret) {
1194
30.6M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
30.6M
                    RETURN_ON_ERROR(ret);
1196
30.6M
                    RETURN_SUCCESS;
1197
30.6M
                }
1198
13.0M
                MARK_POP(ctx->lastmark);
1199
13.0M
                LASTMARK_RESTORE();
1200
13.0M
                ctx->u.rep->count = ctx->count-1;
1201
13.0M
                state->ptr = ptr;
1202
13.0M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
20.0M
            state->repeat = ctx->u.rep->prev;
1207
20.0M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
20.0M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
20.0M
            RETURN_ON_SUCCESS(ret);
1211
10.3M
            state->ptr = ptr;
1212
10.3M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
2.79M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
2.79M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
2.79M
                   ptr, pattern[1]));
1565
2.79M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
2.79M
            state->ptr = ptr - pattern[1];
1568
2.79M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
2.79M
            RETURN_ON_FAILURE(ret);
1570
2.63M
            pattern += pattern[0];
1571
2.63M
            DISPATCH;
1572
1573
5.60M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.60M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.60M
                   ptr, pattern[1]));
1578
5.60M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.60M
                state->ptr = ptr - pattern[1];
1580
5.60M
                LASTMARK_SAVE();
1581
5.60M
                if (state->repeat)
1582
5.60M
                    MARK_PUSH(ctx->lastmark);
1583
1584
11.2M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
11.2M
                if (ret) {
1586
1.15k
                    if (state->repeat)
1587
1.15k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.15k
                    RETURN_ON_ERROR(ret);
1589
1.15k
                    RETURN_FAILURE;
1590
1.15k
                }
1591
5.60M
                if (state->repeat)
1592
5.60M
                    MARK_POP(ctx->lastmark);
1593
5.60M
                LASTMARK_RESTORE();
1594
5.60M
            }
1595
5.60M
            pattern += pattern[0];
1596
5.60M
            DISPATCH;
1597
1598
5.60M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
444M
exit:
1620
444M
    ctx_pos = ctx->last_ctx_pos;
1621
444M
    jump = ctx->jump;
1622
444M
    DATA_POP_DISCARD(ctx);
1623
444M
    if (ctx_pos == -1) {
1624
170M
        state->sigcount = sigcount;
1625
170M
        return ret;
1626
170M
    }
1627
273M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
273M
    switch (jump) {
1630
43.7M
        case JUMP_MAX_UNTIL_2:
1631
43.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
43.7M
            goto jump_max_until_2;
1633
20.0M
        case JUMP_MAX_UNTIL_3:
1634
20.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
20.0M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
112M
        case JUMP_BRANCH:
1643
112M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
112M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
19.4M
        case JUMP_REPEAT:
1658
19.4M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
19.4M
            goto jump_repeat;
1660
2.82M
        case JUMP_REPEAT_ONE_1:
1661
2.82M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.82M
            goto jump_repeat_one_1;
1663
60.2M
        case JUMP_REPEAT_ONE_2:
1664
60.2M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
60.2M
            goto jump_repeat_one_2;
1666
6.55M
        case JUMP_MIN_REPEAT_ONE:
1667
6.55M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
6.55M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
2.79M
        case JUMP_ASSERT:
1673
2.79M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
2.79M
            goto jump_assert;
1675
5.60M
        case JUMP_ASSERT_NOT:
1676
5.60M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.60M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
273M
    }
1683
1684
0
    return ret; /* should never get here */
1685
273M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
215M
{
601
215M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
215M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
215M
    Py_ssize_t ret = 0;
604
215M
    int jump;
605
215M
    unsigned int sigcount = state->sigcount;
606
607
215M
    SRE(match_context)* ctx;
608
215M
    SRE(match_context)* nextctx;
609
215M
    INIT_TRACE(state);
610
611
215M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
215M
    DATA_ALLOC(SRE(match_context), ctx);
614
215M
    ctx->last_ctx_pos = -1;
615
215M
    ctx->jump = JUMP_NONE;
616
215M
    ctx->toplevel = toplevel;
617
215M
    ctx_pos = alloc_pos;
618
619
215M
#if USE_COMPUTED_GOTOS
620
215M
#include "sre_targets.h"
621
215M
#endif
622
623
483M
entrance:
624
625
483M
    ;  // Fashion statement.
626
483M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
483M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
15.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
145k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
145k
                   end - ptr, (size_t) pattern[3]));
634
145k
            RETURN_FAILURE;
635
145k
        }
636
15.6M
        pattern += pattern[1] + 1;
637
15.6M
    }
638
639
483M
#if USE_COMPUTED_GOTOS
640
483M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
483M
    {
647
648
483M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
227M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
227M
                   ptr, pattern[0]));
653
227M
            {
654
227M
                int i = pattern[0];
655
227M
                if (i & 1)
656
31.3M
                    state->lastindex = i/2 + 1;
657
227M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
225M
                    int j = state->lastmark + 1;
663
228M
                    while (j < i)
664
2.89M
                        state->mark[j++] = NULL;
665
225M
                    state->lastmark = i;
666
225M
                }
667
227M
                state->mark[i] = ptr;
668
227M
            }
669
227M
            pattern++;
670
227M
            DISPATCH;
671
672
227M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
29.0M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
29.0M
                   ptr, *pattern));
677
29.0M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
13.4M
                RETURN_FAILURE;
679
15.6M
            pattern++;
680
15.6M
            ptr++;
681
15.6M
            DISPATCH;
682
683
15.6M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
62.7M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
62.7M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
62.7M
            if (ctx->toplevel &&
698
11.5M
                ((state->match_all && ptr != state->end) ||
699
11.5M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
62.7M
            state->ptr = ptr;
704
62.7M
            RETURN_SUCCESS;
705
706
28.4M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
28.4M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
28.4M
            if (!SRE(at)(state, ptr, *pattern))
711
27.0M
                RETURN_FAILURE;
712
1.42M
            pattern++;
713
1.42M
            DISPATCH;
714
715
1.42M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
107M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
107M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
107M
            if (ptr >= end ||
749
107M
                !SRE(charset)(state, pattern + 1, *ptr))
750
34.5M
                RETURN_FAILURE;
751
72.9M
            pattern += pattern[0];
752
72.9M
            ptr++;
753
72.9M
            DISPATCH;
754
755
72.9M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
3.57M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
3.57M
                   pattern, ptr, pattern[0]));
758
3.57M
            if (ptr >= end ||
759
3.57M
                sre_lower_ascii(*ptr) != *pattern)
760
20.1k
                RETURN_FAILURE;
761
3.55M
            pattern++;
762
3.55M
            ptr++;
763
3.55M
            DISPATCH;
764
765
3.55M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
16.9M
        TARGET(SRE_OP_JUMP):
845
16.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
16.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
16.9M
                   ptr, pattern[0]));
850
16.9M
            pattern += pattern[0];
851
16.9M
            DISPATCH;
852
853
23.3M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
23.3M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
23.3M
            LASTMARK_SAVE();
858
23.3M
            if (state->repeat)
859
18.3M
                MARK_PUSH(ctx->lastmark);
860
51.8M
            for (; pattern[0]; pattern += pattern[0]) {
861
45.0M
                if (pattern[1] == SRE_OP_LITERAL &&
862
20.9M
                    (ptr >= end ||
863
20.9M
                     (SRE_CODE) *ptr != pattern[2]))
864
13.8M
                    continue;
865
31.1M
                if (pattern[1] == SRE_OP_IN &&
866
15.4M
                    (ptr >= end ||
867
15.4M
                     !SRE(charset)(state, pattern + 3,
868
15.4M
                                   (SRE_CODE) *ptr)))
869
9.30M
                    continue;
870
21.8M
                state->ptr = ptr;
871
21.8M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
21.8M
                if (ret) {
873
16.5M
                    if (state->repeat)
874
13.8M
                        MARK_POP_DISCARD(ctx->lastmark);
875
16.5M
                    RETURN_ON_ERROR(ret);
876
16.5M
                    RETURN_SUCCESS;
877
16.5M
                }
878
5.31M
                if (state->repeat)
879
2.59k
                    MARK_POP_KEEP(ctx->lastmark);
880
5.31M
                LASTMARK_RESTORE();
881
5.31M
            }
882
6.85M
            if (state->repeat)
883
4.52M
                MARK_POP_DISCARD(ctx->lastmark);
884
6.85M
            RETURN_FAILURE;
885
886
214M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
214M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
214M
                   pattern[1], pattern[2]));
898
899
214M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
171k
                RETURN_FAILURE; /* cannot match */
901
902
213M
            state->ptr = ptr;
903
904
213M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
213M
            RETURN_ON_ERROR(ret);
906
213M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
213M
            ctx->count = ret;
908
213M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
213M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
154M
                RETURN_FAILURE;
917
918
59.0M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
4.16M
                ptr == state->end &&
920
18.3k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
18.3k
            {
922
                /* tail is empty.  we're finished */
923
18.3k
                state->ptr = ptr;
924
18.3k
                RETURN_SUCCESS;
925
18.3k
            }
926
927
58.9M
            LASTMARK_SAVE();
928
58.9M
            if (state->repeat)
929
27.0M
                MARK_PUSH(ctx->lastmark);
930
931
58.9M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.76M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.76M
                for (;;) {
936
12.3M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
9.49M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
5.54M
                        ptr--;
939
5.54M
                        ctx->count--;
940
5.54M
                    }
941
6.76M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.81M
                        break;
943
3.94M
                    state->ptr = ptr;
944
3.94M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.94M
                            pattern+pattern[0]);
946
3.94M
                    if (ret) {
947
3.94M
                        if (state->repeat)
948
3.91M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.94M
                        RETURN_ON_ERROR(ret);
950
3.94M
                        RETURN_SUCCESS;
951
3.94M
                    }
952
206
                    if (state->repeat)
953
206
                        MARK_POP_KEEP(ctx->lastmark);
954
206
                    LASTMARK_RESTORE();
955
956
206
                    ptr--;
957
206
                    ctx->count--;
958
206
                }
959
2.81M
                if (state->repeat)
960
2.80M
                    MARK_POP_DISCARD(ctx->lastmark);
961
52.2M
            } else {
962
                /* general case */
963
85.1M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
72.3M
                    state->ptr = ptr;
965
72.3M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
72.3M
                            pattern+pattern[0]);
967
72.3M
                    if (ret) {
968
39.4M
                        if (state->repeat)
969
19.9M
                            MARK_POP_DISCARD(ctx->lastmark);
970
39.4M
                        RETURN_ON_ERROR(ret);
971
39.4M
                        RETURN_SUCCESS;
972
39.4M
                    }
973
32.8M
                    if (state->repeat)
974
630k
                        MARK_POP_KEEP(ctx->lastmark);
975
32.8M
                    LASTMARK_RESTORE();
976
977
32.8M
                    ptr--;
978
32.8M
                    ctx->count--;
979
32.8M
                }
980
12.7M
                if (state->repeat)
981
375k
                    MARK_POP_DISCARD(ctx->lastmark);
982
12.7M
            }
983
15.5M
            RETURN_FAILURE;
984
985
635k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
635k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
635k
                   pattern[1], pattern[2]));
997
998
635k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
635k
            state->ptr = ptr;
1002
1003
635k
            if (pattern[1] == 0)
1004
635k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
635k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
635k
            } else {
1028
                /* general case */
1029
635k
                LASTMARK_SAVE();
1030
635k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
12.9M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
12.9M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
12.9M
                    state->ptr = ptr;
1036
12.9M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
12.9M
                            pattern+pattern[0]);
1038
12.9M
                    if (ret) {
1039
635k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
635k
                        RETURN_ON_ERROR(ret);
1042
635k
                        RETURN_SUCCESS;
1043
635k
                    }
1044
12.2M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
12.2M
                    LASTMARK_RESTORE();
1047
1048
12.2M
                    state->ptr = ptr;
1049
12.2M
                    ret = SRE(count)(state, pattern+3, 1);
1050
12.2M
                    RETURN_ON_ERROR(ret);
1051
12.2M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
12.2M
                    if (ret == 0)
1053
0
                        break;
1054
12.2M
                    assert(ret == 1);
1055
12.2M
                    ptr++;
1056
12.2M
                    ctx->count++;
1057
12.2M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
39.3M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
39.3M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
39.3M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
39.3M
            ctx->u.rep = repeat_pool_malloc(state);
1127
39.3M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
39.3M
            ctx->u.rep->count = -1;
1131
39.3M
            ctx->u.rep->pattern = pattern;
1132
39.3M
            ctx->u.rep->prev = state->repeat;
1133
39.3M
            ctx->u.rep->last_ptr = NULL;
1134
39.3M
            state->repeat = ctx->u.rep;
1135
1136
39.3M
            state->ptr = ptr;
1137
39.3M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
39.3M
            state->repeat = ctx->u.rep->prev;
1139
39.3M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
39.3M
            if (ret) {
1142
13.4M
                RETURN_ON_ERROR(ret);
1143
13.4M
                RETURN_SUCCESS;
1144
13.4M
            }
1145
25.8M
            RETURN_FAILURE;
1146
1147
64.3M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
64.3M
            ctx->u.rep = state->repeat;
1155
64.3M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
64.3M
            state->ptr = ptr;
1159
1160
64.3M
            ctx->count = ctx->u.rep->count+1;
1161
1162
64.3M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
64.3M
                   ptr, ctx->count));
1164
1165
64.3M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
64.3M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.55M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
60.7M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
60.7M
                ctx->u.rep->count = ctx->count;
1185
60.7M
                LASTMARK_SAVE();
1186
60.7M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
60.7M
                LAST_PTR_PUSH();
1189
60.7M
                ctx->u.rep->last_ptr = state->ptr;
1190
60.7M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
60.7M
                        ctx->u.rep->pattern+3);
1192
60.7M
                LAST_PTR_POP();
1193
60.7M
                if (ret) {
1194
24.7M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
24.7M
                    RETURN_ON_ERROR(ret);
1196
24.7M
                    RETURN_SUCCESS;
1197
24.7M
                }
1198
36.0M
                MARK_POP(ctx->lastmark);
1199
36.0M
                LASTMARK_RESTORE();
1200
36.0M
                ctx->u.rep->count = ctx->count-1;
1201
36.0M
                state->ptr = ptr;
1202
36.0M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
39.5M
            state->repeat = ctx->u.rep->prev;
1207
39.5M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
39.5M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
39.5M
            RETURN_ON_SUCCESS(ret);
1211
26.1M
            state->ptr = ptr;
1212
26.1M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
10.3M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
10.3M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
10.3M
                   ptr, pattern[1]));
1565
10.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
10.3M
            state->ptr = ptr - pattern[1];
1568
10.3M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
10.3M
            RETURN_ON_FAILURE(ret);
1570
6.42M
            pattern += pattern[0];
1571
6.42M
            DISPATCH;
1572
1573
7.05M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
7.05M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
7.05M
                   ptr, pattern[1]));
1578
7.05M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
7.05M
                state->ptr = ptr - pattern[1];
1580
7.05M
                LASTMARK_SAVE();
1581
7.05M
                if (state->repeat)
1582
7.05M
                    MARK_PUSH(ctx->lastmark);
1583
1584
14.1M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
14.1M
                if (ret) {
1586
2.35k
                    if (state->repeat)
1587
2.35k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.35k
                    RETURN_ON_ERROR(ret);
1589
2.35k
                    RETURN_FAILURE;
1590
2.35k
                }
1591
7.05M
                if (state->repeat)
1592
7.05M
                    MARK_POP(ctx->lastmark);
1593
7.05M
                LASTMARK_RESTORE();
1594
7.05M
            }
1595
7.05M
            pattern += pattern[0];
1596
7.05M
            DISPATCH;
1597
1598
7.05M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
483M
exit:
1620
483M
    ctx_pos = ctx->last_ctx_pos;
1621
483M
    jump = ctx->jump;
1622
483M
    DATA_POP_DISCARD(ctx);
1623
483M
    if (ctx_pos == -1) {
1624
215M
        state->sigcount = sigcount;
1625
215M
        return ret;
1626
215M
    }
1627
268M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
268M
    switch (jump) {
1630
60.7M
        case JUMP_MAX_UNTIL_2:
1631
60.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
60.7M
            goto jump_max_until_2;
1633
39.5M
        case JUMP_MAX_UNTIL_3:
1634
39.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
39.5M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
21.8M
        case JUMP_BRANCH:
1643
21.8M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
21.8M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
39.3M
        case JUMP_REPEAT:
1658
39.3M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
39.3M
            goto jump_repeat;
1660
3.94M
        case JUMP_REPEAT_ONE_1:
1661
3.94M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.94M
            goto jump_repeat_one_1;
1663
72.3M
        case JUMP_REPEAT_ONE_2:
1664
72.3M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
72.3M
            goto jump_repeat_one_2;
1666
12.9M
        case JUMP_MIN_REPEAT_ONE:
1667
12.9M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
12.9M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
10.3M
        case JUMP_ASSERT:
1673
10.3M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
10.3M
            goto jump_assert;
1675
7.05M
        case JUMP_ASSERT_NOT:
1676
7.05M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
7.05M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
268M
    }
1683
1684
0
    return ret; /* should never get here */
1685
268M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
80.5M
{
601
80.5M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
80.5M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
80.5M
    Py_ssize_t ret = 0;
604
80.5M
    int jump;
605
80.5M
    unsigned int sigcount = state->sigcount;
606
607
80.5M
    SRE(match_context)* ctx;
608
80.5M
    SRE(match_context)* nextctx;
609
80.5M
    INIT_TRACE(state);
610
611
80.5M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
80.5M
    DATA_ALLOC(SRE(match_context), ctx);
614
80.5M
    ctx->last_ctx_pos = -1;
615
80.5M
    ctx->jump = JUMP_NONE;
616
80.5M
    ctx->toplevel = toplevel;
617
80.5M
    ctx_pos = alloc_pos;
618
619
80.5M
#if USE_COMPUTED_GOTOS
620
80.5M
#include "sre_targets.h"
621
80.5M
#endif
622
623
366M
entrance:
624
625
366M
    ;  // Fashion statement.
626
366M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
366M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
10.3M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.28k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.28k
                   end - ptr, (size_t) pattern[3]));
634
3.28k
            RETURN_FAILURE;
635
3.28k
        }
636
10.3M
        pattern += pattern[1] + 1;
637
10.3M
    }
638
639
366M
#if USE_COMPUTED_GOTOS
640
366M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
366M
    {
647
648
366M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
122M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
122M
                   ptr, pattern[0]));
653
122M
            {
654
122M
                int i = pattern[0];
655
122M
                if (i & 1)
656
21.6M
                    state->lastindex = i/2 + 1;
657
122M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
120M
                    int j = state->lastmark + 1;
663
122M
                    while (j < i)
664
1.88M
                        state->mark[j++] = NULL;
665
120M
                    state->lastmark = i;
666
120M
                }
667
122M
                state->mark[i] = ptr;
668
122M
            }
669
122M
            pattern++;
670
122M
            DISPATCH;
671
672
122M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
26.8M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
26.8M
                   ptr, *pattern));
677
26.8M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
11.1M
                RETURN_FAILURE;
679
15.7M
            pattern++;
680
15.7M
            ptr++;
681
15.7M
            DISPATCH;
682
683
15.7M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
33.1M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
33.1M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
33.1M
            if (ctx->toplevel &&
698
9.32M
                ((state->match_all && ptr != state->end) ||
699
9.32M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
33.1M
            state->ptr = ptr;
704
33.1M
            RETURN_SUCCESS;
705
706
17.7M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
17.7M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
17.7M
            if (!SRE(at)(state, ptr, *pattern))
711
17.7M
                RETURN_FAILURE;
712
23.9k
            pattern++;
713
23.9k
            DISPATCH;
714
715
23.9k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
74.5M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
74.5M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
74.5M
            if (ptr >= end ||
749
74.5M
                !SRE(charset)(state, pattern + 1, *ptr))
750
21.5M
                RETURN_FAILURE;
751
53.0M
            pattern += pattern[0];
752
53.0M
            ptr++;
753
53.0M
            DISPATCH;
754
755
53.0M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.76M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.76M
                   pattern, ptr, pattern[0]));
758
1.76M
            if (ptr >= end ||
759
1.76M
                sre_lower_ascii(*ptr) != *pattern)
760
19.8k
                RETURN_FAILURE;
761
1.74M
            pattern++;
762
1.74M
            ptr++;
763
1.74M
            DISPATCH;
764
765
1.74M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
24.6M
        TARGET(SRE_OP_JUMP):
845
24.6M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
24.6M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
24.6M
                   ptr, pattern[0]));
850
24.6M
            pattern += pattern[0];
851
24.6M
            DISPATCH;
852
853
33.0M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
33.0M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
33.0M
            LASTMARK_SAVE();
858
33.0M
            if (state->repeat)
859
29.4M
                MARK_PUSH(ctx->lastmark);
860
70.9M
            for (; pattern[0]; pattern += pattern[0]) {
861
61.9M
                if (pattern[1] == SRE_OP_LITERAL &&
862
30.8M
                    (ptr >= end ||
863
30.8M
                     (SRE_CODE) *ptr != pattern[2]))
864
21.7M
                    continue;
865
40.2M
                if (pattern[1] == SRE_OP_IN &&
866
25.1M
                    (ptr >= end ||
867
25.1M
                     !SRE(charset)(state, pattern + 3,
868
25.1M
                                   (SRE_CODE) *ptr)))
869
15.1M
                    continue;
870
25.0M
                state->ptr = ptr;
871
25.0M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
25.0M
                if (ret) {
873
24.1M
                    if (state->repeat)
874
21.2M
                        MARK_POP_DISCARD(ctx->lastmark);
875
24.1M
                    RETURN_ON_ERROR(ret);
876
24.1M
                    RETURN_SUCCESS;
877
24.1M
                }
878
990k
                if (state->repeat)
879
7.39k
                    MARK_POP_KEEP(ctx->lastmark);
880
990k
                LASTMARK_RESTORE();
881
990k
            }
882
8.95M
            if (state->repeat)
883
8.19M
                MARK_POP_DISCARD(ctx->lastmark);
884
8.95M
            RETURN_FAILURE;
885
886
128M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
128M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
128M
                   pattern[1], pattern[2]));
898
899
128M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
19.4k
                RETURN_FAILURE; /* cannot match */
901
902
128M
            state->ptr = ptr;
903
904
128M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
128M
            RETURN_ON_ERROR(ret);
906
128M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
128M
            ctx->count = ret;
908
128M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
128M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
62.6M
                RETURN_FAILURE;
917
918
66.1M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
845k
                ptr == state->end &&
920
3.21k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.21k
            {
922
                /* tail is empty.  we're finished */
923
3.21k
                state->ptr = ptr;
924
3.21k
                RETURN_SUCCESS;
925
3.21k
            }
926
927
66.1M
            LASTMARK_SAVE();
928
66.1M
            if (state->repeat)
929
48.7M
                MARK_PUSH(ctx->lastmark);
930
931
66.1M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
18.4M
                ctx->u.chr = pattern[pattern[0]+1];
935
18.4M
                for (;;) {
936
44.7M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
31.8M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
26.3M
                        ptr--;
939
26.3M
                        ctx->count--;
940
26.3M
                    }
941
18.4M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
12.8M
                        break;
943
5.53M
                    state->ptr = ptr;
944
5.53M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
5.53M
                            pattern+pattern[0]);
946
5.53M
                    if (ret) {
947
5.53M
                        if (state->repeat)
948
5.52M
                            MARK_POP_DISCARD(ctx->lastmark);
949
5.53M
                        RETURN_ON_ERROR(ret);
950
5.53M
                        RETURN_SUCCESS;
951
5.53M
                    }
952
256
                    if (state->repeat)
953
256
                        MARK_POP_KEEP(ctx->lastmark);
954
256
                    LASTMARK_RESTORE();
955
956
256
                    ptr--;
957
256
                    ctx->count--;
958
256
                }
959
12.8M
                if (state->repeat)
960
12.8M
                    MARK_POP_DISCARD(ctx->lastmark);
961
47.7M
            } else {
962
                /* general case */
963
68.6M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
66.5M
                    state->ptr = ptr;
965
66.5M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
66.5M
                            pattern+pattern[0]);
967
66.5M
                    if (ret) {
968
45.6M
                        if (state->repeat)
969
30.1M
                            MARK_POP_DISCARD(ctx->lastmark);
970
45.6M
                        RETURN_ON_ERROR(ret);
971
45.6M
                        RETURN_SUCCESS;
972
45.6M
                    }
973
20.8M
                    if (state->repeat)
974
219k
                        MARK_POP_KEEP(ctx->lastmark);
975
20.8M
                    LASTMARK_RESTORE();
976
977
20.8M
                    ptr--;
978
20.8M
                    ctx->count--;
979
20.8M
                }
980
2.13M
                if (state->repeat)
981
139k
                    MARK_POP_DISCARD(ctx->lastmark);
982
2.13M
            }
983
15.0M
            RETURN_FAILURE;
984
985
8.97k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
8.97k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
8.97k
                   pattern[1], pattern[2]));
997
998
8.97k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
8.97k
            state->ptr = ptr;
1002
1003
8.97k
            if (pattern[1] == 0)
1004
8.97k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
8.97k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
8.97k
            } else {
1028
                /* general case */
1029
8.97k
                LASTMARK_SAVE();
1030
8.97k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
1.98M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
1.98M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
1.98M
                    state->ptr = ptr;
1036
1.98M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
1.98M
                            pattern+pattern[0]);
1038
1.98M
                    if (ret) {
1039
8.97k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
8.97k
                        RETURN_ON_ERROR(ret);
1042
8.97k
                        RETURN_SUCCESS;
1043
8.97k
                    }
1044
1.97M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
1.97M
                    LASTMARK_RESTORE();
1047
1048
1.97M
                    state->ptr = ptr;
1049
1.97M
                    ret = SRE(count)(state, pattern+3, 1);
1050
1.97M
                    RETURN_ON_ERROR(ret);
1051
1.97M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
1.97M
                    if (ret == 0)
1053
0
                        break;
1054
1.97M
                    assert(ret == 1);
1055
1.97M
                    ptr++;
1056
1.97M
                    ctx->count++;
1057
1.97M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
42.6M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
42.6M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
42.6M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
42.6M
            ctx->u.rep = repeat_pool_malloc(state);
1127
42.6M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
42.6M
            ctx->u.rep->count = -1;
1131
42.6M
            ctx->u.rep->pattern = pattern;
1132
42.6M
            ctx->u.rep->prev = state->repeat;
1133
42.6M
            ctx->u.rep->last_ptr = NULL;
1134
42.6M
            state->repeat = ctx->u.rep;
1135
1136
42.6M
            state->ptr = ptr;
1137
42.6M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
42.6M
            state->repeat = ctx->u.rep->prev;
1139
42.6M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
42.6M
            if (ret) {
1142
26.0M
                RETURN_ON_ERROR(ret);
1143
26.0M
                RETURN_SUCCESS;
1144
26.0M
            }
1145
16.6M
            RETURN_FAILURE;
1146
1147
77.0M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
77.0M
            ctx->u.rep = state->repeat;
1155
77.0M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
77.0M
            state->ptr = ptr;
1159
1160
77.0M
            ctx->count = ctx->u.rep->count+1;
1161
1162
77.0M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
77.0M
                   ptr, ctx->count));
1164
1165
77.0M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
77.0M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.48M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
72.5M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
72.5M
                ctx->u.rep->count = ctx->count;
1185
72.5M
                LASTMARK_SAVE();
1186
72.5M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
72.5M
                LAST_PTR_PUSH();
1189
72.5M
                ctx->u.rep->last_ptr = state->ptr;
1190
72.5M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
72.5M
                        ctx->u.rep->pattern+3);
1192
72.5M
                LAST_PTR_POP();
1193
72.5M
                if (ret) {
1194
34.3M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
34.3M
                    RETURN_ON_ERROR(ret);
1196
34.3M
                    RETURN_SUCCESS;
1197
34.3M
                }
1198
38.2M
                MARK_POP(ctx->lastmark);
1199
38.2M
                LASTMARK_RESTORE();
1200
38.2M
                ctx->u.rep->count = ctx->count-1;
1201
38.2M
                state->ptr = ptr;
1202
38.2M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
42.7M
            state->repeat = ctx->u.rep->prev;
1207
42.7M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
42.7M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
42.7M
            RETURN_ON_SUCCESS(ret);
1211
16.7M
            state->ptr = ptr;
1212
16.7M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
18.3M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
18.3M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
18.3M
                   ptr, pattern[1]));
1565
18.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
18.3M
            state->ptr = ptr - pattern[1];
1568
18.3M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
18.3M
            RETURN_ON_FAILURE(ret);
1570
17.3M
            pattern += pattern[0];
1571
17.3M
            DISPATCH;
1572
1573
17.3M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
10.1M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
10.1M
                   ptr, pattern[1]));
1578
10.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
10.1M
                state->ptr = ptr - pattern[1];
1580
10.1M
                LASTMARK_SAVE();
1581
10.1M
                if (state->repeat)
1582
10.1M
                    MARK_PUSH(ctx->lastmark);
1583
1584
20.3M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
20.3M
                if (ret) {
1586
7.11k
                    if (state->repeat)
1587
7.11k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
7.11k
                    RETURN_ON_ERROR(ret);
1589
7.11k
                    RETURN_FAILURE;
1590
7.11k
                }
1591
10.1M
                if (state->repeat)
1592
10.1M
                    MARK_POP(ctx->lastmark);
1593
10.1M
                LASTMARK_RESTORE();
1594
10.1M
            }
1595
10.1M
            pattern += pattern[0];
1596
10.1M
            DISPATCH;
1597
1598
10.1M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
366M
exit:
1620
366M
    ctx_pos = ctx->last_ctx_pos;
1621
366M
    jump = ctx->jump;
1622
366M
    DATA_POP_DISCARD(ctx);
1623
366M
    if (ctx_pos == -1) {
1624
80.5M
        state->sigcount = sigcount;
1625
80.5M
        return ret;
1626
80.5M
    }
1627
285M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
285M
    switch (jump) {
1630
72.5M
        case JUMP_MAX_UNTIL_2:
1631
72.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
72.5M
            goto jump_max_until_2;
1633
42.7M
        case JUMP_MAX_UNTIL_3:
1634
42.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
42.7M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
25.0M
        case JUMP_BRANCH:
1643
25.0M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
25.0M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
42.6M
        case JUMP_REPEAT:
1658
42.6M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
42.6M
            goto jump_repeat;
1660
5.53M
        case JUMP_REPEAT_ONE_1:
1661
5.53M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
5.53M
            goto jump_repeat_one_1;
1663
66.5M
        case JUMP_REPEAT_ONE_2:
1664
66.5M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
66.5M
            goto jump_repeat_one_2;
1666
1.98M
        case JUMP_MIN_REPEAT_ONE:
1667
1.98M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
1.98M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
18.3M
        case JUMP_ASSERT:
1673
18.3M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
18.3M
            goto jump_assert;
1675
10.1M
        case JUMP_ASSERT_NOT:
1676
10.1M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
10.1M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
285M
    }
1683
1684
0
    return ret; /* should never get here */
1685
285M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
313M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
99.9M
{
1694
99.9M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
99.9M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
99.9M
    Py_ssize_t status = 0;
1697
99.9M
    Py_ssize_t prefix_len = 0;
1698
99.9M
    Py_ssize_t prefix_skip = 0;
1699
99.9M
    SRE_CODE* prefix = NULL;
1700
99.9M
    SRE_CODE* charset = NULL;
1701
99.9M
    SRE_CODE* overlap = NULL;
1702
99.9M
    int flags = 0;
1703
99.9M
    INIT_TRACE(state);
1704
1705
99.9M
    if (ptr > end)
1706
0
        return 0;
1707
1708
99.9M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
99.9M
        flags = pattern[2];
1713
1714
99.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
4.23M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
4.23M
                   end - ptr, (size_t) pattern[3]));
1717
4.23M
            return 0;
1718
4.23M
        }
1719
95.6M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
9.44M
            end -= pattern[3] - 1;
1723
9.44M
            if (end <= ptr)
1724
0
                end = ptr;
1725
9.44M
        }
1726
1727
95.6M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
9.44M
            prefix_len = pattern[5];
1731
9.44M
            prefix_skip = pattern[6];
1732
9.44M
            prefix = pattern + 7;
1733
9.44M
            overlap = prefix + prefix_len - 1;
1734
86.2M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
77.2M
            charset = pattern + 5;
1738
1739
95.6M
        pattern += 1 + pattern[1];
1740
95.6M
    }
1741
1742
95.6M
    TRACE(("prefix = %p %zd %zd\n",
1743
95.6M
           prefix, prefix_len, prefix_skip));
1744
95.6M
    TRACE(("charset = %p\n", charset));
1745
1746
95.6M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
8.82M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
6.04M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
6.04M
#endif
1753
6.04M
        end = (SRE_CHAR *)state->end;
1754
6.04M
        state->must_advance = 0;
1755
9.24M
        while (ptr < end) {
1756
97.6M
            while (*ptr != c) {
1757
89.4M
                if (++ptr >= end)
1758
1.03M
                    return 0;
1759
89.4M
            }
1760
8.14M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
8.14M
            state->start = ptr;
1762
8.14M
            state->ptr = ptr + prefix_skip;
1763
8.14M
            if (flags & SRE_INFO_LITERAL)
1764
4.13k
                return 1; /* we got all of it */
1765
8.13M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
8.13M
            if (status != 0)
1767
7.71M
                return status;
1768
424k
            ++ptr;
1769
424k
            RESET_CAPTURE_GROUP();
1770
424k
        }
1771
66.5k
        return 0;
1772
6.04M
    }
1773
1774
86.8M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
626k
        Py_ssize_t i = 0;
1778
1779
626k
        end = (SRE_CHAR *)state->end;
1780
626k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.62M
        for (i = 0; i < prefix_len; i++)
1784
1.08M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
542k
#endif
1787
1.24M
        while (ptr < end) {
1788
1.24M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
6.50M
            while (*ptr++ != c) {
1790
5.25M
                if (ptr >= end)
1791
275
                    return 0;
1792
5.25M
            }
1793
1.24M
            if (ptr >= end)
1794
54
                return 0;
1795
1796
1.24M
            i = 1;
1797
1.24M
            state->must_advance = 0;
1798
1.24M
            do {
1799
1.24M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.19M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.19M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.19M
                    state->start = ptr - (prefix_len - 1);
1808
1.19M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.19M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.19M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.19M
                    if (status != 0)
1813
626k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
571k
                    if (++ptr >= end)
1816
50
                        return 0;
1817
571k
                    RESET_CAPTURE_GROUP();
1818
571k
                }
1819
622k
                i = overlap[i];
1820
622k
            } while (i != 0);
1821
1.24M
        }
1822
0
        return 0;
1823
626k
    }
1824
1825
86.2M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
77.2M
        end = (SRE_CHAR *)state->end;
1828
77.2M
        state->must_advance = 0;
1829
79.6M
        for (;;) {
1830
333M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
254M
                ptr++;
1832
79.6M
            if (ptr >= end)
1833
3.90M
                return 0;
1834
75.7M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
75.7M
            state->start = ptr;
1836
75.7M
            state->ptr = ptr;
1837
75.7M
            status = SRE(match)(state, pattern, 0);
1838
75.7M
            if (status != 0)
1839
73.3M
                break;
1840
2.41M
            ptr++;
1841
2.41M
            RESET_CAPTURE_GROUP();
1842
2.41M
        }
1843
77.2M
    } else {
1844
        /* general case */
1845
9.03M
        assert(ptr <= end);
1846
9.03M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
9.03M
        state->start = state->ptr = ptr;
1848
9.03M
        status = SRE(match)(state, pattern, 1);
1849
9.03M
        state->must_advance = 0;
1850
9.03M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.16M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
48
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
4.16M
        {
1854
4.16M
            state->start = state->ptr = ptr = end;
1855
4.16M
            return 0;
1856
4.16M
        }
1857
315M
        while (status == 0 && ptr < end) {
1858
310M
            ptr++;
1859
310M
            RESET_CAPTURE_GROUP();
1860
310M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
310M
            state->start = state->ptr = ptr;
1862
310M
            status = SRE(match)(state, pattern, 0);
1863
310M
        }
1864
4.86M
    }
1865
1866
78.1M
    return status;
1867
86.2M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
46.4M
{
1694
46.4M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
46.4M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
46.4M
    Py_ssize_t status = 0;
1697
46.4M
    Py_ssize_t prefix_len = 0;
1698
46.4M
    Py_ssize_t prefix_skip = 0;
1699
46.4M
    SRE_CODE* prefix = NULL;
1700
46.4M
    SRE_CODE* charset = NULL;
1701
46.4M
    SRE_CODE* overlap = NULL;
1702
46.4M
    int flags = 0;
1703
46.4M
    INIT_TRACE(state);
1704
1705
46.4M
    if (ptr > end)
1706
0
        return 0;
1707
1708
46.4M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
46.4M
        flags = pattern[2];
1713
1714
46.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
4.11M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
4.11M
                   end - ptr, (size_t) pattern[3]));
1717
4.11M
            return 0;
1718
4.11M
        }
1719
42.3M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.54M
            end -= pattern[3] - 1;
1723
3.54M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.54M
        }
1726
1727
42.3M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.54M
            prefix_len = pattern[5];
1731
3.54M
            prefix_skip = pattern[6];
1732
3.54M
            prefix = pattern + 7;
1733
3.54M
            overlap = prefix + prefix_len - 1;
1734
38.8M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
31.8M
            charset = pattern + 5;
1738
1739
42.3M
        pattern += 1 + pattern[1];
1740
42.3M
    }
1741
1742
42.3M
    TRACE(("prefix = %p %zd %zd\n",
1743
42.3M
           prefix, prefix_len, prefix_skip));
1744
42.3M
    TRACE(("charset = %p\n", charset));
1745
1746
42.3M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.50M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
3.50M
#if SIZEOF_SRE_CHAR < 4
1750
3.50M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
3.50M
#endif
1753
3.50M
        end = (SRE_CHAR *)state->end;
1754
3.50M
        state->must_advance = 0;
1755
3.73M
        while (ptr < end) {
1756
25.9M
            while (*ptr != c) {
1757
23.2M
                if (++ptr >= end)
1758
966k
                    return 0;
1759
23.2M
            }
1760
2.70M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.70M
            state->start = ptr;
1762
2.70M
            state->ptr = ptr + prefix_skip;
1763
2.70M
            if (flags & SRE_INFO_LITERAL)
1764
366
                return 1; /* we got all of it */
1765
2.70M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.70M
            if (status != 0)
1767
2.47M
                return status;
1768
229k
            ++ptr;
1769
229k
            RESET_CAPTURE_GROUP();
1770
229k
        }
1771
62.6k
        return 0;
1772
3.50M
    }
1773
1774
38.8M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
40.1k
        Py_ssize_t i = 0;
1778
1779
40.1k
        end = (SRE_CHAR *)state->end;
1780
40.1k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
40.1k
#if SIZEOF_SRE_CHAR < 4
1783
120k
        for (i = 0; i < prefix_len; i++)
1784
80.3k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
40.1k
#endif
1787
106k
        while (ptr < end) {
1788
106k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
1.11M
            while (*ptr++ != c) {
1790
1.00M
                if (ptr >= end)
1791
57
                    return 0;
1792
1.00M
            }
1793
106k
            if (ptr >= end)
1794
23
                return 0;
1795
1796
106k
            i = 1;
1797
106k
            state->must_advance = 0;
1798
106k
            do {
1799
106k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
91.1k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
91.1k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
91.1k
                    state->start = ptr - (prefix_len - 1);
1808
91.1k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
91.1k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
91.1k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
91.1k
                    if (status != 0)
1813
40.0k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
51.0k
                    if (++ptr >= end)
1816
23
                        return 0;
1817
51.0k
                    RESET_CAPTURE_GROUP();
1818
51.0k
                }
1819
66.3k
                i = overlap[i];
1820
66.3k
            } while (i != 0);
1821
106k
        }
1822
0
        return 0;
1823
40.1k
    }
1824
1825
38.8M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
31.8M
        end = (SRE_CHAR *)state->end;
1828
31.8M
        state->must_advance = 0;
1829
33.2M
        for (;;) {
1830
94.6M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
61.3M
                ptr++;
1832
33.2M
            if (ptr >= end)
1833
2.79M
                return 0;
1834
30.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
30.4M
            state->start = ptr;
1836
30.4M
            state->ptr = ptr;
1837
30.4M
            status = SRE(match)(state, pattern, 0);
1838
30.4M
            if (status != 0)
1839
29.0M
                break;
1840
1.40M
            ptr++;
1841
1.40M
            RESET_CAPTURE_GROUP();
1842
1.40M
        }
1843
31.8M
    } else {
1844
        /* general case */
1845
6.95M
        assert(ptr <= end);
1846
6.95M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
6.95M
        state->start = state->ptr = ptr;
1848
6.95M
        status = SRE(match)(state, pattern, 1);
1849
6.95M
        state->must_advance = 0;
1850
6.95M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
3.40M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
17
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
3.40M
        {
1854
3.40M
            state->start = state->ptr = ptr = end;
1855
3.40M
            return 0;
1856
3.40M
        }
1857
97.7M
        while (status == 0 && ptr < end) {
1858
94.1M
            ptr++;
1859
94.1M
            RESET_CAPTURE_GROUP();
1860
94.1M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
94.1M
            state->start = state->ptr = ptr;
1862
94.1M
            status = SRE(match)(state, pattern, 0);
1863
94.1M
        }
1864
3.55M
    }
1865
1866
32.6M
    return status;
1867
38.8M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
46.9M
{
1694
46.9M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
46.9M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
46.9M
    Py_ssize_t status = 0;
1697
46.9M
    Py_ssize_t prefix_len = 0;
1698
46.9M
    Py_ssize_t prefix_skip = 0;
1699
46.9M
    SRE_CODE* prefix = NULL;
1700
46.9M
    SRE_CODE* charset = NULL;
1701
46.9M
    SRE_CODE* overlap = NULL;
1702
46.9M
    int flags = 0;
1703
46.9M
    INIT_TRACE(state);
1704
1705
46.9M
    if (ptr > end)
1706
0
        return 0;
1707
1708
46.9M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
46.9M
        flags = pattern[2];
1713
1714
46.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
114k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
114k
                   end - ptr, (size_t) pattern[3]));
1717
114k
            return 0;
1718
114k
        }
1719
46.8M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.04M
            end -= pattern[3] - 1;
1723
3.04M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.04M
        }
1726
1727
46.8M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.04M
            prefix_len = pattern[5];
1731
3.04M
            prefix_skip = pattern[6];
1732
3.04M
            prefix = pattern + 7;
1733
3.04M
            overlap = prefix + prefix_len - 1;
1734
43.7M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
41.8M
            charset = pattern + 5;
1738
1739
46.8M
        pattern += 1 + pattern[1];
1740
46.8M
    }
1741
1742
46.8M
    TRACE(("prefix = %p %zd %zd\n",
1743
46.8M
           prefix, prefix_len, prefix_skip));
1744
46.8M
    TRACE(("charset = %p\n", charset));
1745
1746
46.8M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.54M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.54M
#if SIZEOF_SRE_CHAR < 4
1750
2.54M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.54M
#endif
1753
2.54M
        end = (SRE_CHAR *)state->end;
1754
2.54M
        state->must_advance = 0;
1755
2.62M
        while (ptr < end) {
1756
44.9M
            while (*ptr != c) {
1757
42.4M
                if (++ptr >= end)
1758
67.6k
                    return 0;
1759
42.4M
            }
1760
2.55M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.55M
            state->start = ptr;
1762
2.55M
            state->ptr = ptr + prefix_skip;
1763
2.55M
            if (flags & SRE_INFO_LITERAL)
1764
2.62k
                return 1; /* we got all of it */
1765
2.55M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.55M
            if (status != 0)
1767
2.46M
                return status;
1768
83.0k
            ++ptr;
1769
83.0k
            RESET_CAPTURE_GROUP();
1770
83.0k
        }
1771
3.06k
        return 0;
1772
2.54M
    }
1773
1774
44.2M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
502k
        Py_ssize_t i = 0;
1778
1779
502k
        end = (SRE_CHAR *)state->end;
1780
502k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
502k
#if SIZEOF_SRE_CHAR < 4
1783
1.50M
        for (i = 0; i < prefix_len; i++)
1784
1.00M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
502k
#endif
1787
765k
        while (ptr < end) {
1788
765k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.28M
            while (*ptr++ != c) {
1790
1.52M
                if (ptr >= end)
1791
99
                    return 0;
1792
1.52M
            }
1793
765k
            if (ptr >= end)
1794
14
                return 0;
1795
1796
765k
            i = 1;
1797
765k
            state->must_advance = 0;
1798
765k
            do {
1799
765k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
742k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
742k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
742k
                    state->start = ptr - (prefix_len - 1);
1808
742k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
742k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
742k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
742k
                    if (status != 0)
1813
502k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
240k
                    if (++ptr >= end)
1816
16
                        return 0;
1817
240k
                    RESET_CAPTURE_GROUP();
1818
240k
                }
1819
263k
                i = overlap[i];
1820
263k
            } while (i != 0);
1821
765k
        }
1822
0
        return 0;
1823
502k
    }
1824
1825
43.7M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
41.8M
        end = (SRE_CHAR *)state->end;
1828
41.8M
        state->must_advance = 0;
1829
42.3M
        for (;;) {
1830
175M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
132M
                ptr++;
1832
42.3M
            if (ptr >= end)
1833
1.06M
                return 0;
1834
41.2M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
41.2M
            state->start = ptr;
1836
41.2M
            state->ptr = ptr;
1837
41.2M
            status = SRE(match)(state, pattern, 0);
1838
41.2M
            if (status != 0)
1839
40.8M
                break;
1840
427k
            ptr++;
1841
427k
            RESET_CAPTURE_GROUP();
1842
427k
        }
1843
41.8M
    } else {
1844
        /* general case */
1845
1.87M
        assert(ptr <= end);
1846
1.87M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
1.87M
        state->start = state->ptr = ptr;
1848
1.87M
        status = SRE(match)(state, pattern, 1);
1849
1.87M
        state->must_advance = 0;
1850
1.87M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
755k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
14
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
755k
        {
1854
755k
            state->start = state->ptr = ptr = end;
1855
755k
            return 0;
1856
755k
        }
1857
154M
        while (status == 0 && ptr < end) {
1858
153M
            ptr++;
1859
153M
            RESET_CAPTURE_GROUP();
1860
153M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
153M
            state->start = state->ptr = ptr;
1862
153M
            status = SRE(match)(state, pattern, 0);
1863
153M
        }
1864
1.12M
    }
1865
1866
41.9M
    return status;
1867
43.7M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
6.51M
{
1694
6.51M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
6.51M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
6.51M
    Py_ssize_t status = 0;
1697
6.51M
    Py_ssize_t prefix_len = 0;
1698
6.51M
    Py_ssize_t prefix_skip = 0;
1699
6.51M
    SRE_CODE* prefix = NULL;
1700
6.51M
    SRE_CODE* charset = NULL;
1701
6.51M
    SRE_CODE* overlap = NULL;
1702
6.51M
    int flags = 0;
1703
6.51M
    INIT_TRACE(state);
1704
1705
6.51M
    if (ptr > end)
1706
0
        return 0;
1707
1708
6.51M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
6.51M
        flags = pattern[2];
1713
1714
6.51M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.81k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.81k
                   end - ptr, (size_t) pattern[3]));
1717
5.81k
            return 0;
1718
5.81k
        }
1719
6.50M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.85M
            end -= pattern[3] - 1;
1723
2.85M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.85M
        }
1726
1727
6.50M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.85M
            prefix_len = pattern[5];
1731
2.85M
            prefix_skip = pattern[6];
1732
2.85M
            prefix = pattern + 7;
1733
2.85M
            overlap = prefix + prefix_len - 1;
1734
3.64M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.44M
            charset = pattern + 5;
1738
1739
6.50M
        pattern += 1 + pattern[1];
1740
6.50M
    }
1741
1742
6.50M
    TRACE(("prefix = %p %zd %zd\n",
1743
6.50M
           prefix, prefix_len, prefix_skip));
1744
6.50M
    TRACE(("charset = %p\n", charset));
1745
1746
6.50M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.77M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.77M
        end = (SRE_CHAR *)state->end;
1754
2.77M
        state->must_advance = 0;
1755
2.88M
        while (ptr < end) {
1756
26.7M
            while (*ptr != c) {
1757
23.8M
                if (++ptr >= end)
1758
3.97k
                    return 0;
1759
23.8M
            }
1760
2.88M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.88M
            state->start = ptr;
1762
2.88M
            state->ptr = ptr + prefix_skip;
1763
2.88M
            if (flags & SRE_INFO_LITERAL)
1764
1.14k
                return 1; /* we got all of it */
1765
2.88M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.88M
            if (status != 0)
1767
2.76M
                return status;
1768
111k
            ++ptr;
1769
111k
            RESET_CAPTURE_GROUP();
1770
111k
        }
1771
839
        return 0;
1772
2.77M
    }
1773
1774
3.73M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
84.4k
        Py_ssize_t i = 0;
1778
1779
84.4k
        end = (SRE_CHAR *)state->end;
1780
84.4k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
376k
        while (ptr < end) {
1788
376k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.10M
            while (*ptr++ != c) {
1790
2.73M
                if (ptr >= end)
1791
119
                    return 0;
1792
2.73M
            }
1793
376k
            if (ptr >= end)
1794
17
                return 0;
1795
1796
376k
            i = 1;
1797
376k
            state->must_advance = 0;
1798
377k
            do {
1799
377k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
364k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
364k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
364k
                    state->start = ptr - (prefix_len - 1);
1808
364k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
364k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
364k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
364k
                    if (status != 0)
1813
84.2k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
279k
                    if (++ptr >= end)
1816
11
                        return 0;
1817
279k
                    RESET_CAPTURE_GROUP();
1818
279k
                }
1819
293k
                i = overlap[i];
1820
293k
            } while (i != 0);
1821
376k
        }
1822
0
        return 0;
1823
84.4k
    }
1824
1825
3.64M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.44M
        end = (SRE_CHAR *)state->end;
1828
3.44M
        state->must_advance = 0;
1829
4.02M
        for (;;) {
1830
64.0M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
60.0M
                ptr++;
1832
4.02M
            if (ptr >= end)
1833
48.2k
                return 0;
1834
3.97M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
3.97M
            state->start = ptr;
1836
3.97M
            state->ptr = ptr;
1837
3.97M
            status = SRE(match)(state, pattern, 0);
1838
3.97M
            if (status != 0)
1839
3.39M
                break;
1840
586k
            ptr++;
1841
586k
            RESET_CAPTURE_GROUP();
1842
586k
        }
1843
3.44M
    } else {
1844
        /* general case */
1845
205k
        assert(ptr <= end);
1846
205k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
205k
        state->start = state->ptr = ptr;
1848
205k
        status = SRE(match)(state, pattern, 1);
1849
205k
        state->must_advance = 0;
1850
205k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
10.9k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
17
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
10.9k
        {
1854
10.9k
            state->start = state->ptr = ptr = end;
1855
10.9k
            return 0;
1856
10.9k
        }
1857
63.0M
        while (status == 0 && ptr < end) {
1858
62.8M
            ptr++;
1859
62.8M
            RESET_CAPTURE_GROUP();
1860
62.8M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
62.8M
            state->start = state->ptr = ptr;
1862
62.8M
            status = SRE(match)(state, pattern, 0);
1863
62.8M
        }
1864
194k
    }
1865
1866
3.58M
    return status;
1867
3.64M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/