Coverage Report

Created: 2025-12-07 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
94.8M
{
18
    /* check if pointer is at given position */
19
20
94.8M
    Py_ssize_t thisp, thatp;
21
22
94.8M
    switch (at) {
23
24
11.1M
    case SRE_AT_BEGINNING:
25
11.1M
    case SRE_AT_BEGINNING_STRING:
26
11.1M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
79.5M
    case SRE_AT_END:
33
79.5M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
993k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
79.5M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
4.03M
    case SRE_AT_END_STRING:
42
4.03M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
94.8M
    }
87
88
0
    return 0;
89
94.8M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
29.0M
{
18
    /* check if pointer is at given position */
19
20
29.0M
    Py_ssize_t thisp, thatp;
21
22
29.0M
    switch (at) {
23
24
10.3M
    case SRE_AT_BEGINNING:
25
10.3M
    case SRE_AT_BEGINNING_STRING:
26
10.3M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
17.2M
    case SRE_AT_END:
33
17.2M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
423k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
17.2M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.46M
    case SRE_AT_END_STRING:
42
1.46M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
29.0M
    }
87
88
0
    return 0;
89
29.0M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
47.5M
{
18
    /* check if pointer is at given position */
19
20
47.5M
    Py_ssize_t thisp, thatp;
21
22
47.5M
    switch (at) {
23
24
860k
    case SRE_AT_BEGINNING:
25
860k
    case SRE_AT_BEGINNING_STRING:
26
860k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
45.8M
    case SRE_AT_END:
33
45.8M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
563k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
45.8M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
921k
    case SRE_AT_END_STRING:
42
921k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
47.5M
    }
87
88
0
    return 0;
89
47.5M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
18.1M
{
18
    /* check if pointer is at given position */
19
20
18.1M
    Py_ssize_t thisp, thatp;
21
22
18.1M
    switch (at) {
23
24
21.1k
    case SRE_AT_BEGINNING:
25
21.1k
    case SRE_AT_BEGINNING_STRING:
26
21.1k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
16.5M
    case SRE_AT_END:
33
16.5M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
7.01k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
16.5M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.64M
    case SRE_AT_END_STRING:
42
1.64M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
18.1M
    }
87
88
0
    return 0;
89
18.1M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.50G
{
94
    /* check if character is a member of the given set */
95
96
1.50G
    int ok = 1;
97
98
3.39G
    for (;;) {
99
3.39G
        switch (*set++) {
100
101
983M
        case SRE_OP_FAILURE:
102
983M
            return !ok;
103
104
1.16G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.16G
            if (ch == set[0])
107
7.87M
                return ok;
108
1.15G
            set++;
109
1.15G
            break;
110
111
97.1M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
97.1M
            if (sre_category(set[0], (int) ch))
114
66.8M
                return ok;
115
30.2M
            set++;
116
30.2M
            break;
117
118
522M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
522M
            if (ch < 256 &&
121
492M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
216M
                return ok;
123
306M
            set += 256/SRE_CODE_BITS;
124
306M
            break;
125
126
360M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
360M
            if (set[0] <= ch && ch <= set[1])
129
226M
                return ok;
130
133M
            set += 2;
131
133M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
261M
        case SRE_OP_NEGATE:
148
261M
            ok = !ok;
149
261M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.39G
        }
175
3.39G
    }
176
1.50G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
396M
{
94
    /* check if character is a member of the given set */
95
96
396M
    int ok = 1;
97
98
789M
    for (;;) {
99
789M
        switch (*set++) {
100
101
206M
        case SRE_OP_FAILURE:
102
206M
            return !ok;
103
104
223M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
223M
            if (ch == set[0])
107
5.12M
                return ok;
108
218M
            set++;
109
218M
            break;
110
111
32.5M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
32.5M
            if (sre_category(set[0], (int) ch))
114
20.6M
                return ok;
115
11.8M
            set++;
116
11.8M
            break;
117
118
98.5M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
98.5M
            if (ch < 256 &&
121
98.5M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
50.3M
                return ok;
123
48.2M
            set += 256/SRE_CODE_BITS;
124
48.2M
            break;
125
126
186M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
186M
            if (set[0] <= ch && ch <= set[1])
129
113M
                return ok;
130
72.8M
            set += 2;
131
72.8M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
41.5M
        case SRE_OP_NEGATE:
148
41.5M
            ok = !ok;
149
41.5M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
789M
        }
175
789M
    }
176
396M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
665M
{
94
    /* check if character is a member of the given set */
95
96
665M
    int ok = 1;
97
98
1.58G
    for (;;) {
99
1.58G
        switch (*set++) {
100
101
475M
        case SRE_OP_FAILURE:
102
475M
            return !ok;
103
104
623M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
623M
            if (ch == set[0])
107
1.58M
                return ok;
108
621M
            set++;
109
621M
            break;
110
111
47.4M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
47.4M
            if (sre_category(set[0], (int) ch))
114
31.4M
                return ok;
115
15.9M
            set++;
116
15.9M
            break;
117
118
181M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
181M
            if (ch < 256 &&
121
169M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
58.5M
                return ok;
123
122M
            set += 256/SRE_CODE_BITS;
124
122M
            break;
125
126
151M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
151M
            if (set[0] <= ch && ch <= set[1])
129
98.4M
                return ok;
130
52.5M
            set += 2;
131
52.5M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
109M
        case SRE_OP_NEGATE:
148
109M
            ok = !ok;
149
109M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.58G
        }
175
1.58G
    }
176
665M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
439M
{
94
    /* check if character is a member of the given set */
95
96
439M
    int ok = 1;
97
98
1.01G
    for (;;) {
99
1.01G
        switch (*set++) {
100
101
300M
        case SRE_OP_FAILURE:
102
300M
            return !ok;
103
104
319M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
319M
            if (ch == set[0])
107
1.15M
                return ok;
108
318M
            set++;
109
318M
            break;
110
111
17.1M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
17.1M
            if (sre_category(set[0], (int) ch))
114
14.7M
                return ok;
115
2.42M
            set++;
116
2.42M
            break;
117
118
242M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
242M
            if (ch < 256 &&
121
224M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
107M
                return ok;
123
135M
            set += 256/SRE_CODE_BITS;
124
135M
            break;
125
126
23.0M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
23.0M
            if (set[0] <= ch && ch <= set[1])
129
14.8M
                return ok;
130
8.19M
            set += 2;
131
8.19M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
110M
        case SRE_OP_NEGATE:
148
110M
            ok = !ok;
149
110M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.01G
        }
175
1.01G
    }
176
439M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
564M
{
195
564M
    SRE_CODE chr;
196
564M
    SRE_CHAR c;
197
564M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
564M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
564M
    Py_ssize_t i;
200
564M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
564M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
62.6M
        end = ptr + maxcount;
205
206
564M
    switch (pattern[0]) {
207
208
445M
    case SRE_OP_IN:
209
        /* repeated set */
210
445M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
839M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
393M
            ptr++;
213
445M
        break;
214
215
24.8M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
24.8M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
73.6M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
48.8M
            ptr++;
220
24.8M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
90.9M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
90.9M
        chr = pattern[1];
232
90.9M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
90.9M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
82.0M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
82.0M
        else
238
82.0M
#endif
239
95.9M
        while (ptr < end && *ptr == c)
240
5.03M
            ptr++;
241
90.9M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.80M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.80M
        chr = pattern[1];
270
2.80M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.80M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
1.25M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
1.25M
        else
276
1.25M
#endif
277
32.8M
        while (ptr < end && *ptr != c)
278
30.0M
            ptr++;
279
2.80M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
564M
    }
319
320
564M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
564M
           ptr - (SRE_CHAR*) state->ptr));
322
564M
    return ptr - (SRE_CHAR*) state->ptr;
323
564M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
180M
{
195
180M
    SRE_CODE chr;
196
180M
    SRE_CHAR c;
197
180M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
180M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
180M
    Py_ssize_t i;
200
180M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
180M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
19.8M
        end = ptr + maxcount;
205
206
180M
    switch (pattern[0]) {
207
208
104M
    case SRE_OP_IN:
209
        /* repeated set */
210
104M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
231M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
126M
            ptr++;
213
104M
        break;
214
215
7.18M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
7.18M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
21.9M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
14.7M
            ptr++;
220
7.18M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
67.7M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
67.7M
        chr = pattern[1];
232
67.7M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
67.7M
        c = (SRE_CHAR) chr;
234
67.7M
#if SIZEOF_SRE_CHAR < 4
235
67.7M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
67.7M
        else
238
67.7M
#endif
239
69.7M
        while (ptr < end && *ptr == c)
240
2.01M
            ptr++;
241
67.7M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
629k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
629k
        chr = pattern[1];
270
629k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
629k
        c = (SRE_CHAR) chr;
272
629k
#if SIZEOF_SRE_CHAR < 4
273
629k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
629k
        else
276
629k
#endif
277
7.80M
        while (ptr < end && *ptr != c)
278
7.17M
            ptr++;
279
629k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
180M
    }
319
320
180M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
180M
           ptr - (SRE_CHAR*) state->ptr));
322
180M
    return ptr - (SRE_CHAR*) state->ptr;
323
180M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
227M
{
195
227M
    SRE_CODE chr;
196
227M
    SRE_CHAR c;
197
227M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
227M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
227M
    Py_ssize_t i;
200
227M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
227M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
27.3M
        end = ptr + maxcount;
205
206
227M
    switch (pattern[0]) {
207
208
196M
    case SRE_OP_IN:
209
        /* repeated set */
210
196M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
341M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
144M
            ptr++;
213
196M
        break;
214
215
15.4M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
15.4M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
42.0M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
26.5M
            ptr++;
220
15.4M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
14.3M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
14.3M
        chr = pattern[1];
232
14.3M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
14.3M
        c = (SRE_CHAR) chr;
234
14.3M
#if SIZEOF_SRE_CHAR < 4
235
14.3M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
14.3M
        else
238
14.3M
#endif
239
16.7M
        while (ptr < end && *ptr == c)
240
2.41M
            ptr++;
241
14.3M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
628k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
628k
        chr = pattern[1];
270
628k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
628k
        c = (SRE_CHAR) chr;
272
628k
#if SIZEOF_SRE_CHAR < 4
273
628k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
628k
        else
276
628k
#endif
277
10.3M
        while (ptr < end && *ptr != c)
278
9.67M
            ptr++;
279
628k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
227M
    }
319
320
227M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
227M
           ptr - (SRE_CHAR*) state->ptr));
322
227M
    return ptr - (SRE_CHAR*) state->ptr;
323
227M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
156M
{
195
156M
    SRE_CODE chr;
196
156M
    SRE_CHAR c;
197
156M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
156M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
156M
    Py_ssize_t i;
200
156M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
156M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
15.4M
        end = ptr + maxcount;
205
206
156M
    switch (pattern[0]) {
207
208
144M
    case SRE_OP_IN:
209
        /* repeated set */
210
144M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
266M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
122M
            ptr++;
213
144M
        break;
214
215
2.14M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
2.14M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
9.69M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
7.55M
            ptr++;
220
2.14M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
8.84M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
8.84M
        chr = pattern[1];
232
8.84M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
8.84M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
9.44M
        while (ptr < end && *ptr == c)
240
606k
            ptr++;
241
8.84M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.54M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.54M
        chr = pattern[1];
270
1.54M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.54M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
14.7M
        while (ptr < end && *ptr != c)
278
13.2M
            ptr++;
279
1.54M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
156M
    }
319
320
156M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
156M
           ptr - (SRE_CHAR*) state->ptr));
322
156M
    return ptr - (SRE_CHAR*) state->ptr;
323
156M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
551M
    do { \
355
551M
        ctx->lastmark = state->lastmark; \
356
551M
        ctx->lastindex = state->lastindex; \
357
551M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
324M
    do { \
360
324M
        state->lastmark = ctx->lastmark; \
361
324M
        state->lastindex = ctx->lastindex; \
362
324M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
205M
    do { \
366
205M
        TRACE(("push last_ptr: %zd", \
367
205M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
205M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
205M
    } while (0)
370
#define LAST_PTR_POP()  \
371
205M
    do { \
372
205M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
205M
        TRACE(("pop last_ptr: %zd", \
374
205M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
205M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
816M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
594M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.10G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
125M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
33.5M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.41G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.41G
do { \
390
1.41G
    alloc_pos = state->data_stack_base; \
391
1.41G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.41G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.41G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
157M
        int j = data_stack_grow(state, sizeof(type)); \
395
157M
        if (j < 0) return j; \
396
157M
        if (ctx_pos != -1) \
397
157M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
157M
    } \
399
1.41G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.41G
    state->data_stack_base += sizeof(type); \
401
1.41G
} while (0)
402
403
1.49G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.49G
do { \
405
1.49G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.49G
    ptr = (type*)(state->data_stack+pos); \
407
1.49G
} while (0)
408
409
507M
#define DATA_STACK_PUSH(state, data, size) \
410
507M
do { \
411
507M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
507M
           data, state->data_stack_base, size)); \
413
507M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
89.9k
        int j = data_stack_grow(state, size); \
415
89.9k
        if (j < 0) return j; \
416
89.9k
        if (ctx_pos != -1) \
417
89.9k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
89.9k
    } \
419
507M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
507M
    state->data_stack_base += size; \
421
507M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
325M
#define DATA_STACK_POP(state, data, size, discard) \
427
325M
do { \
428
325M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
325M
           data, state->data_stack_base-size, size)); \
430
325M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
325M
    if (discard) \
432
325M
        state->data_stack_base -= size; \
433
325M
} while (0)
434
435
1.59G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.59G
do { \
437
1.59G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.59G
           state->data_stack_base-size, size)); \
439
1.59G
    state->data_stack_base -= size; \
440
1.59G
} while(0)
441
442
#define DATA_PUSH(x) \
443
205M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
205M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.41G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.41G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.49G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
410M
    do if (lastmark >= 0) { \
473
301M
        MARK_TRACE("push", (lastmark)); \
474
301M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
301M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
410M
    } while (0)
477
#define MARK_POP(lastmark) \
478
134M
    do if (lastmark >= 0) { \
479
118M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
118M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
118M
        MARK_TRACE("pop", (lastmark)); \
482
134M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.04M
    do if (lastmark >= 0) { \
485
1.90M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.90M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.90M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.04M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
275M
    do if (lastmark >= 0) { \
491
183M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
183M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
183M
        MARK_TRACE("pop discard", (lastmark)); \
494
275M
    } while (0)
495
496
476M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
205M
#define JUMP_MAX_UNTIL_2     2
499
125M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
124M
#define JUMP_REPEAT          7
504
13.1M
#define JUMP_REPEAT_ONE_1    8
505
220M
#define JUMP_REPEAT_ONE_2    9
506
25.4M
#define JUMP_MIN_REPEAT_ONE  10
507
161M
#define JUMP_BRANCH          11
508
33.5M
#define JUMP_ASSERT          12
509
25.6M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
935M
    ctx->pattern = pattern; \
516
935M
    ctx->ptr = ptr; \
517
935M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
935M
    nextctx->pattern = nextpattern; \
519
935M
    nextctx->toplevel = toplevel_; \
520
935M
    nextctx->jump = jumpvalue; \
521
935M
    nextctx->last_ctx_pos = ctx_pos; \
522
935M
    pattern = nextpattern; \
523
935M
    ctx_pos = alloc_pos; \
524
935M
    ctx = nextctx; \
525
935M
    goto entrance; \
526
935M
    jumplabel: \
527
935M
    pattern = ctx->pattern; \
528
935M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
875M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
59.1M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.39G
    do {                                                           \
553
2.39G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.39G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.39G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.47G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.39G
        do {                               \
588
2.39G
            MAYBE_CHECK_SIGNALS;           \
589
2.39G
            goto *sre_targets[*pattern++]; \
590
2.39G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
476M
{
601
476M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
476M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
476M
    Py_ssize_t ret = 0;
604
476M
    int jump;
605
476M
    unsigned int sigcount = state->sigcount;
606
607
476M
    SRE(match_context)* ctx;
608
476M
    SRE(match_context)* nextctx;
609
476M
    INIT_TRACE(state);
610
611
476M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
476M
    DATA_ALLOC(SRE(match_context), ctx);
614
476M
    ctx->last_ctx_pos = -1;
615
476M
    ctx->jump = JUMP_NONE;
616
476M
    ctx->toplevel = toplevel;
617
476M
    ctx_pos = alloc_pos;
618
619
476M
#if USE_COMPUTED_GOTOS
620
476M
#include "sre_targets.h"
621
476M
#endif
622
623
1.41G
entrance:
624
625
1.41G
    ;  // Fashion statement.
626
1.41G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.41G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
65.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.15M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.15M
                   end - ptr, (size_t) pattern[3]));
634
5.15M
            RETURN_FAILURE;
635
5.15M
        }
636
60.6M
        pattern += pattern[1] + 1;
637
60.6M
    }
638
639
1.40G
#if USE_COMPUTED_GOTOS
640
1.40G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.40G
    {
647
648
1.40G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
562M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
562M
                   ptr, pattern[0]));
653
562M
            {
654
562M
                int i = pattern[0];
655
562M
                if (i & 1)
656
93.5M
                    state->lastindex = i/2 + 1;
657
562M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
555M
                    int j = state->lastmark + 1;
663
571M
                    while (j < i)
664
16.2M
                        state->mark[j++] = NULL;
665
555M
                    state->lastmark = i;
666
555M
                }
667
562M
                state->mark[i] = ptr;
668
562M
            }
669
562M
            pattern++;
670
562M
            DISPATCH;
671
672
562M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
139M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
139M
                   ptr, *pattern));
677
139M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
58.4M
                RETURN_FAILURE;
679
80.8M
            pattern++;
680
80.8M
            ptr++;
681
80.8M
            DISPATCH;
682
683
80.8M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
161M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
161M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
161M
            if (ctx->toplevel &&
698
47.9M
                ((state->match_all && ptr != state->end) ||
699
47.9M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
161M
            state->ptr = ptr;
704
161M
            RETURN_SUCCESS;
705
706
94.8M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
94.8M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
94.8M
            if (!SRE(at)(state, ptr, *pattern))
711
76.1M
                RETURN_FAILURE;
712
18.6M
            pattern++;
713
18.6M
            DISPATCH;
714
715
18.6M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
273M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
273M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
273M
            if (ptr >= end ||
749
272M
                !SRE(charset)(state, pattern + 1, *ptr))
750
86.6M
                RETURN_FAILURE;
751
186M
            pattern += pattern[0];
752
186M
            ptr++;
753
186M
            DISPATCH;
754
755
186M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
5.70M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
5.70M
                   pattern, ptr, pattern[0]));
758
5.70M
            if (ptr >= end ||
759
5.70M
                sre_lower_ascii(*ptr) != *pattern)
760
86.6k
                RETURN_FAILURE;
761
5.61M
            pattern++;
762
5.61M
            ptr++;
763
5.61M
            DISPATCH;
764
765
5.61M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
80.1M
        TARGET(SRE_OP_JUMP):
845
80.1M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
80.1M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
80.1M
                   ptr, pattern[0]));
850
80.1M
            pattern += pattern[0];
851
80.1M
            DISPATCH;
852
853
124M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
124M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
124M
            LASTMARK_SAVE();
858
124M
            if (state->repeat)
859
67.1M
                MARK_PUSH(ctx->lastmark);
860
309M
            for (; pattern[0]; pattern += pattern[0]) {
861
263M
                if (pattern[1] == SRE_OP_LITERAL &&
862
123M
                    (ptr >= end ||
863
123M
                     (SRE_CODE) *ptr != pattern[2]))
864
66.1M
                    continue;
865
197M
                if (pattern[1] == SRE_OP_IN &&
866
59.9M
                    (ptr >= end ||
867
59.8M
                     !SRE(charset)(state, pattern + 3,
868
59.8M
                                   (SRE_CODE) *ptr)))
869
35.6M
                    continue;
870
161M
                state->ptr = ptr;
871
161M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
161M
                if (ret) {
873
77.7M
                    if (state->repeat)
874
52.4M
                        MARK_POP_DISCARD(ctx->lastmark);
875
77.7M
                    RETURN_ON_ERROR(ret);
876
77.7M
                    RETURN_SUCCESS;
877
77.7M
                }
878
83.7M
                if (state->repeat)
879
25.2k
                    MARK_POP_KEEP(ctx->lastmark);
880
83.7M
                LASTMARK_RESTORE();
881
83.7M
            }
882
46.3M
            if (state->repeat)
883
14.6M
                MARK_POP_DISCARD(ctx->lastmark);
884
46.3M
            RETURN_FAILURE;
885
886
544M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
544M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
544M
                   pattern[1], pattern[2]));
898
899
544M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.19M
                RETURN_FAILURE; /* cannot match */
901
902
542M
            state->ptr = ptr;
903
904
542M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
542M
            RETURN_ON_ERROR(ret);
906
542M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
542M
            ctx->count = ret;
908
542M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
542M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
351M
                RETURN_FAILURE;
917
918
191M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.90M
                ptr == state->end &&
920
71.9k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
71.9k
            {
922
                /* tail is empty.  we're finished */
923
71.9k
                state->ptr = ptr;
924
71.9k
                RETURN_SUCCESS;
925
71.9k
            }
926
927
191M
            LASTMARK_SAVE();
928
191M
            if (state->repeat)
929
111M
                MARK_PUSH(ctx->lastmark);
930
931
191M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
32.9M
                ctx->u.chr = pattern[pattern[0]+1];
935
32.9M
                for (;;) {
936
75.0M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
55.2M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
42.0M
                        ptr--;
939
42.0M
                        ctx->count--;
940
42.0M
                    }
941
32.9M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
19.7M
                        break;
943
13.1M
                    state->ptr = ptr;
944
13.1M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
13.1M
                            pattern+pattern[0]);
946
13.1M
                    if (ret) {
947
13.1M
                        if (state->repeat)
948
11.7M
                            MARK_POP_DISCARD(ctx->lastmark);
949
13.1M
                        RETURN_ON_ERROR(ret);
950
13.1M
                        RETURN_SUCCESS;
951
13.1M
                    }
952
654
                    if (state->repeat)
953
654
                        MARK_POP_KEEP(ctx->lastmark);
954
654
                    LASTMARK_RESTORE();
955
956
654
                    ptr--;
957
654
                    ctx->count--;
958
654
                }
959
19.7M
                if (state->repeat)
960
18.4M
                    MARK_POP_DISCARD(ctx->lastmark);
961
158M
            } else {
962
                /* general case */
963
243M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
220M
                    state->ptr = ptr;
965
220M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
220M
                            pattern+pattern[0]);
967
220M
                    if (ret) {
968
135M
                        if (state->repeat)
969
80.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
135M
                        RETURN_ON_ERROR(ret);
971
135M
                        RETURN_SUCCESS;
972
135M
                    }
973
84.9M
                    if (state->repeat)
974
2.01M
                        MARK_POP_KEEP(ctx->lastmark);
975
84.9M
                    LASTMARK_RESTORE();
976
977
84.9M
                    ptr--;
978
84.9M
                    ctx->count--;
979
84.9M
                }
980
22.6M
                if (state->repeat)
981
1.15M
                    MARK_POP_DISCARD(ctx->lastmark);
982
22.6M
            }
983
42.4M
            RETURN_FAILURE;
984
985
4.27M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
4.27M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
4.27M
                   pattern[1], pattern[2]));
997
998
4.27M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
4.27M
            state->ptr = ptr;
1002
1003
4.27M
            if (pattern[1] == 0)
1004
4.27M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
4.27M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
4.27M
            } else {
1028
                /* general case */
1029
4.27M
                LASTMARK_SAVE();
1030
4.27M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
25.4M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
25.4M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
25.4M
                    state->ptr = ptr;
1036
25.4M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
25.4M
                            pattern+pattern[0]);
1038
25.4M
                    if (ret) {
1039
4.27M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
4.27M
                        RETURN_ON_ERROR(ret);
1042
4.27M
                        RETURN_SUCCESS;
1043
4.27M
                    }
1044
21.1M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
21.1M
                    LASTMARK_RESTORE();
1047
1048
21.1M
                    state->ptr = ptr;
1049
21.1M
                    ret = SRE(count)(state, pattern+3, 1);
1050
21.1M
                    RETURN_ON_ERROR(ret);
1051
21.1M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
21.1M
                    if (ret == 0)
1053
0
                        break;
1054
21.1M
                    assert(ret == 1);
1055
21.1M
                    ptr++;
1056
21.1M
                    ctx->count++;
1057
21.1M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
124M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
124M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
124M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
124M
            ctx->u.rep = repeat_pool_malloc(state);
1127
124M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
124M
            ctx->u.rep->count = -1;
1131
124M
            ctx->u.rep->pattern = pattern;
1132
124M
            ctx->u.rep->prev = state->repeat;
1133
124M
            ctx->u.rep->last_ptr = NULL;
1134
124M
            state->repeat = ctx->u.rep;
1135
1136
124M
            state->ptr = ptr;
1137
124M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
124M
            state->repeat = ctx->u.rep->prev;
1139
124M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
124M
            if (ret) {
1142
52.9M
                RETURN_ON_ERROR(ret);
1143
52.9M
                RETURN_SUCCESS;
1144
52.9M
            }
1145
71.2M
            RETURN_FAILURE;
1146
1147
221M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
221M
            ctx->u.rep = state->repeat;
1155
221M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
221M
            state->ptr = ptr;
1159
1160
221M
            ctx->count = ctx->u.rep->count+1;
1161
1162
221M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
221M
                   ptr, ctx->count));
1164
1165
221M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
221M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
15.8M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
205M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
205M
                ctx->u.rep->count = ctx->count;
1185
205M
                LASTMARK_SAVE();
1186
205M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
205M
                LAST_PTR_PUSH();
1189
205M
                ctx->u.rep->last_ptr = state->ptr;
1190
205M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
205M
                        ctx->u.rep->pattern+3);
1192
205M
                LAST_PTR_POP();
1193
205M
                if (ret) {
1194
96.4M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
96.4M
                    RETURN_ON_ERROR(ret);
1196
96.4M
                    RETURN_SUCCESS;
1197
96.4M
                }
1198
109M
                MARK_POP(ctx->lastmark);
1199
109M
                LASTMARK_RESTORE();
1200
109M
                ctx->u.rep->count = ctx->count-1;
1201
109M
                state->ptr = ptr;
1202
109M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
125M
            state->repeat = ctx->u.rep->prev;
1207
125M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
125M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
125M
            RETURN_ON_SUCCESS(ret);
1211
72.1M
            state->ptr = ptr;
1212
72.1M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
33.5M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
33.5M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
33.5M
                   ptr, pattern[1]));
1565
33.5M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
33.5M
            state->ptr = ptr - pattern[1];
1568
33.5M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
33.5M
            RETURN_ON_FAILURE(ret);
1570
28.1M
            pattern += pattern[0];
1571
28.1M
            DISPATCH;
1572
1573
28.1M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
25.6M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
25.6M
                   ptr, pattern[1]));
1578
25.6M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
25.6M
                state->ptr = ptr - pattern[1];
1580
25.6M
                LASTMARK_SAVE();
1581
25.6M
                if (state->repeat)
1582
25.6M
                    MARK_PUSH(ctx->lastmark);
1583
1584
51.3M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
51.3M
                if (ret) {
1586
19.2k
                    if (state->repeat)
1587
19.2k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
19.2k
                    RETURN_ON_ERROR(ret);
1589
19.2k
                    RETURN_FAILURE;
1590
19.2k
                }
1591
25.6M
                if (state->repeat)
1592
25.6M
                    MARK_POP(ctx->lastmark);
1593
25.6M
                LASTMARK_RESTORE();
1594
25.6M
            }
1595
25.6M
            pattern += pattern[0];
1596
25.6M
            DISPATCH;
1597
1598
25.6M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.41G
exit:
1620
1.41G
    ctx_pos = ctx->last_ctx_pos;
1621
1.41G
    jump = ctx->jump;
1622
1.41G
    DATA_POP_DISCARD(ctx);
1623
1.41G
    if (ctx_pos == -1) {
1624
476M
        state->sigcount = sigcount;
1625
476M
        return ret;
1626
476M
    }
1627
935M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
935M
    switch (jump) {
1630
205M
        case JUMP_MAX_UNTIL_2:
1631
205M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
205M
            goto jump_max_until_2;
1633
125M
        case JUMP_MAX_UNTIL_3:
1634
125M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
125M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
161M
        case JUMP_BRANCH:
1643
161M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
161M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
124M
        case JUMP_REPEAT:
1658
124M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
124M
            goto jump_repeat;
1660
13.1M
        case JUMP_REPEAT_ONE_1:
1661
13.1M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
13.1M
            goto jump_repeat_one_1;
1663
220M
        case JUMP_REPEAT_ONE_2:
1664
220M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
220M
            goto jump_repeat_one_2;
1666
25.4M
        case JUMP_MIN_REPEAT_ONE:
1667
25.4M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
25.4M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
33.5M
        case JUMP_ASSERT:
1673
33.5M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
33.5M
            goto jump_assert;
1675
25.6M
        case JUMP_ASSERT_NOT:
1676
25.6M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
25.6M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
935M
    }
1683
1684
0
    return ret; /* should never get here */
1685
935M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
164M
{
601
164M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
164M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
164M
    Py_ssize_t ret = 0;
604
164M
    int jump;
605
164M
    unsigned int sigcount = state->sigcount;
606
607
164M
    SRE(match_context)* ctx;
608
164M
    SRE(match_context)* nextctx;
609
164M
    INIT_TRACE(state);
610
611
164M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
164M
    DATA_ALLOC(SRE(match_context), ctx);
614
164M
    ctx->last_ctx_pos = -1;
615
164M
    ctx->jump = JUMP_NONE;
616
164M
    ctx->toplevel = toplevel;
617
164M
    ctx_pos = alloc_pos;
618
619
164M
#if USE_COMPUTED_GOTOS
620
164M
#include "sre_targets.h"
621
164M
#endif
622
623
440M
entrance:
624
625
440M
    ;  // Fashion statement.
626
440M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
440M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
37.3M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.01M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.01M
                   end - ptr, (size_t) pattern[3]));
634
5.01M
            RETURN_FAILURE;
635
5.01M
        }
636
32.2M
        pattern += pattern[1] + 1;
637
32.2M
    }
638
639
435M
#if USE_COMPUTED_GOTOS
640
435M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
435M
    {
647
648
435M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
172M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
172M
                   ptr, pattern[0]));
653
172M
            {
654
172M
                int i = pattern[0];
655
172M
                if (i & 1)
656
37.3M
                    state->lastindex = i/2 + 1;
657
172M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
169M
                    int j = state->lastmark + 1;
663
180M
                    while (j < i)
664
10.8M
                        state->mark[j++] = NULL;
665
169M
                    state->lastmark = i;
666
169M
                }
667
172M
                state->mark[i] = ptr;
668
172M
            }
669
172M
            pattern++;
670
172M
            DISPATCH;
671
672
172M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
81.1M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
81.1M
                   ptr, *pattern));
677
81.1M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
34.8M
                RETURN_FAILURE;
679
46.3M
            pattern++;
680
46.3M
            ptr++;
681
46.3M
            DISPATCH;
682
683
46.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
61.8M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
61.8M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
61.8M
            if (ctx->toplevel &&
698
24.7M
                ((state->match_all && ptr != state->end) ||
699
24.7M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
61.8M
            state->ptr = ptr;
704
61.8M
            RETURN_SUCCESS;
705
706
29.0M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
29.0M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
29.0M
            if (!SRE(at)(state, ptr, *pattern))
711
12.0M
                RETURN_FAILURE;
712
17.0M
            pattern++;
713
17.0M
            DISPATCH;
714
715
17.0M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
67.0M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
67.0M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
67.0M
            if (ptr >= end ||
749
66.6M
                !SRE(charset)(state, pattern + 1, *ptr))
750
11.2M
                RETURN_FAILURE;
751
55.7M
            pattern += pattern[0];
752
55.7M
            ptr++;
753
55.7M
            DISPATCH;
754
755
55.7M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
518k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
518k
                   pattern, ptr, pattern[0]));
758
518k
            if (ptr >= end ||
759
518k
                sre_lower_ascii(*ptr) != *pattern)
760
49.9k
                RETURN_FAILURE;
761
468k
            pattern++;
762
468k
            ptr++;
763
468k
            DISPATCH;
764
765
468k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
33.6M
        TARGET(SRE_OP_JUMP):
845
33.6M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
33.6M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
33.6M
                   ptr, pattern[0]));
850
33.6M
            pattern += pattern[0];
851
33.6M
            DISPATCH;
852
853
61.1M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
61.1M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
61.1M
            LASTMARK_SAVE();
858
61.1M
            if (state->repeat)
859
13.3M
                MARK_PUSH(ctx->lastmark);
860
172M
            for (; pattern[0]; pattern += pattern[0]) {
861
143M
                if (pattern[1] == SRE_OP_LITERAL &&
862
65.8M
                    (ptr >= end ||
863
65.6M
                     (SRE_CODE) *ptr != pattern[2]))
864
26.6M
                    continue;
865
117M
                if (pattern[1] == SRE_OP_IN &&
866
14.0M
                    (ptr >= end ||
867
13.9M
                     !SRE(charset)(state, pattern + 3,
868
13.9M
                                   (SRE_CODE) *ptr)))
869
7.53M
                    continue;
870
109M
                state->ptr = ptr;
871
109M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
109M
                if (ret) {
873
32.1M
                    if (state->repeat)
874
13.0M
                        MARK_POP_DISCARD(ctx->lastmark);
875
32.1M
                    RETURN_ON_ERROR(ret);
876
32.1M
                    RETURN_SUCCESS;
877
32.1M
                }
878
77.3M
                if (state->repeat)
879
6.99k
                    MARK_POP_KEEP(ctx->lastmark);
880
77.3M
                LASTMARK_RESTORE();
881
77.3M
            }
882
28.9M
            if (state->repeat)
883
351k
                MARK_POP_DISCARD(ctx->lastmark);
884
28.9M
            RETURN_FAILURE;
885
886
177M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
177M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
177M
                   pattern[1], pattern[2]));
898
899
177M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
977k
                RETURN_FAILURE; /* cannot match */
901
902
176M
            state->ptr = ptr;
903
904
176M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
176M
            RETURN_ON_ERROR(ret);
906
176M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
176M
            ctx->count = ret;
908
176M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
176M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
120M
                RETURN_FAILURE;
917
918
55.9M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
654k
                ptr == state->end &&
920
50.0k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
50.0k
            {
922
                /* tail is empty.  we're finished */
923
50.0k
                state->ptr = ptr;
924
50.0k
                RETURN_SUCCESS;
925
50.0k
            }
926
927
55.8M
            LASTMARK_SAVE();
928
55.8M
            if (state->repeat)
929
32.8M
                MARK_PUSH(ctx->lastmark);
930
931
55.8M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.01M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.01M
                for (;;) {
936
16.6M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
13.6M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
10.6M
                        ptr--;
939
10.6M
                        ctx->count--;
940
10.6M
                    }
941
6.01M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
3.02M
                        break;
943
2.98M
                    state->ptr = ptr;
944
2.98M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.98M
                            pattern+pattern[0]);
946
2.98M
                    if (ret) {
947
2.98M
                        if (state->repeat)
948
1.57M
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.98M
                        RETURN_ON_ERROR(ret);
950
2.98M
                        RETURN_SUCCESS;
951
2.98M
                    }
952
147
                    if (state->repeat)
953
147
                        MARK_POP_KEEP(ctx->lastmark);
954
147
                    LASTMARK_RESTORE();
955
956
147
                    ptr--;
957
147
                    ctx->count--;
958
147
                }
959
3.02M
                if (state->repeat)
960
1.72M
                    MARK_POP_DISCARD(ctx->lastmark);
961
49.8M
            } else {
962
                /* general case */
963
65.5M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
60.7M
                    state->ptr = ptr;
965
60.7M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
60.7M
                            pattern+pattern[0]);
967
60.7M
                    if (ret) {
968
45.0M
                        if (state->repeat)
969
28.8M
                            MARK_POP_DISCARD(ctx->lastmark);
970
45.0M
                        RETURN_ON_ERROR(ret);
971
45.0M
                        RETURN_SUCCESS;
972
45.0M
                    }
973
15.7M
                    if (state->repeat)
974
1.25M
                        MARK_POP_KEEP(ctx->lastmark);
975
15.7M
                    LASTMARK_RESTORE();
976
977
15.7M
                    ptr--;
978
15.7M
                    ctx->count--;
979
15.7M
                }
980
4.80M
                if (state->repeat)
981
727k
                    MARK_POP_DISCARD(ctx->lastmark);
982
4.80M
            }
983
7.82M
            RETURN_FAILURE;
984
985
3.54M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
3.54M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
3.54M
                   pattern[1], pattern[2]));
997
998
3.54M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
3.54M
            state->ptr = ptr;
1002
1003
3.54M
            if (pattern[1] == 0)
1004
3.54M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
3.54M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
3.54M
            } else {
1028
                /* general case */
1029
3.54M
                LASTMARK_SAVE();
1030
3.54M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
7.37M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
7.37M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
7.37M
                    state->ptr = ptr;
1036
7.37M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
7.37M
                            pattern+pattern[0]);
1038
7.37M
                    if (ret) {
1039
3.54M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
3.54M
                        RETURN_ON_ERROR(ret);
1042
3.54M
                        RETURN_SUCCESS;
1043
3.54M
                    }
1044
3.82M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
3.82M
                    LASTMARK_RESTORE();
1047
1048
3.82M
                    state->ptr = ptr;
1049
3.82M
                    ret = SRE(count)(state, pattern+3, 1);
1050
3.82M
                    RETURN_ON_ERROR(ret);
1051
3.82M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
3.82M
                    if (ret == 0)
1053
0
                        break;
1054
3.82M
                    assert(ret == 1);
1055
3.82M
                    ptr++;
1056
3.82M
                    ctx->count++;
1057
3.82M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
20.0M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
20.0M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
20.0M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
20.0M
            ctx->u.rep = repeat_pool_malloc(state);
1127
20.0M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
20.0M
            ctx->u.rep->count = -1;
1131
20.0M
            ctx->u.rep->pattern = pattern;
1132
20.0M
            ctx->u.rep->prev = state->repeat;
1133
20.0M
            ctx->u.rep->last_ptr = NULL;
1134
20.0M
            state->repeat = ctx->u.rep;
1135
1136
20.0M
            state->ptr = ptr;
1137
20.0M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
20.0M
            state->repeat = ctx->u.rep->prev;
1139
20.0M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
20.0M
            if (ret) {
1142
10.1M
                RETURN_ON_ERROR(ret);
1143
10.1M
                RETURN_SUCCESS;
1144
10.1M
            }
1145
9.97M
            RETURN_FAILURE;
1146
1147
53.1M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
53.1M
            ctx->u.rep = state->repeat;
1155
53.1M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
53.1M
            state->ptr = ptr;
1159
1160
53.1M
            ctx->count = ctx->u.rep->count+1;
1161
1162
53.1M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
53.1M
                   ptr, ctx->count));
1164
1165
53.1M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
53.1M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
7.32M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
45.7M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
45.7M
                ctx->u.rep->count = ctx->count;
1185
45.7M
                LASTMARK_SAVE();
1186
45.7M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
45.7M
                LAST_PTR_PUSH();
1189
45.7M
                ctx->u.rep->last_ptr = state->ptr;
1190
45.7M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
45.7M
                        ctx->u.rep->pattern+3);
1192
45.7M
                LAST_PTR_POP();
1193
45.7M
                if (ret) {
1194
32.4M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
32.4M
                    RETURN_ON_ERROR(ret);
1196
32.4M
                    RETURN_SUCCESS;
1197
32.4M
                }
1198
13.3M
                MARK_POP(ctx->lastmark);
1199
13.3M
                LASTMARK_RESTORE();
1200
13.3M
                ctx->u.rep->count = ctx->count-1;
1201
13.3M
                state->ptr = ptr;
1202
13.3M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
20.6M
            state->repeat = ctx->u.rep->prev;
1207
20.6M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
20.6M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
20.6M
            RETURN_ON_SUCCESS(ret);
1211
10.5M
            state->ptr = ptr;
1212
10.5M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
2.85M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
2.85M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
2.85M
                   ptr, pattern[1]));
1565
2.85M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
2.85M
            state->ptr = ptr - pattern[1];
1568
2.85M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
2.85M
            RETURN_ON_FAILURE(ret);
1570
2.71M
            pattern += pattern[0];
1571
2.71M
            DISPATCH;
1572
1573
6.15M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
6.15M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
6.15M
                   ptr, pattern[1]));
1578
6.15M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
6.15M
                state->ptr = ptr - pattern[1];
1580
6.15M
                LASTMARK_SAVE();
1581
6.15M
                if (state->repeat)
1582
6.15M
                    MARK_PUSH(ctx->lastmark);
1583
1584
12.3M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
12.3M
                if (ret) {
1586
1.58k
                    if (state->repeat)
1587
1.58k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.58k
                    RETURN_ON_ERROR(ret);
1589
1.58k
                    RETURN_FAILURE;
1590
1.58k
                }
1591
6.14M
                if (state->repeat)
1592
6.14M
                    MARK_POP(ctx->lastmark);
1593
6.14M
                LASTMARK_RESTORE();
1594
6.14M
            }
1595
6.14M
            pattern += pattern[0];
1596
6.14M
            DISPATCH;
1597
1598
6.14M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
440M
exit:
1620
440M
    ctx_pos = ctx->last_ctx_pos;
1621
440M
    jump = ctx->jump;
1622
440M
    DATA_POP_DISCARD(ctx);
1623
440M
    if (ctx_pos == -1) {
1624
164M
        state->sigcount = sigcount;
1625
164M
        return ret;
1626
164M
    }
1627
276M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
276M
    switch (jump) {
1630
45.7M
        case JUMP_MAX_UNTIL_2:
1631
45.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
45.7M
            goto jump_max_until_2;
1633
20.6M
        case JUMP_MAX_UNTIL_3:
1634
20.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
20.6M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
109M
        case JUMP_BRANCH:
1643
109M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
109M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
20.0M
        case JUMP_REPEAT:
1658
20.0M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
20.0M
            goto jump_repeat;
1660
2.98M
        case JUMP_REPEAT_ONE_1:
1661
2.98M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.98M
            goto jump_repeat_one_1;
1663
60.7M
        case JUMP_REPEAT_ONE_2:
1664
60.7M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
60.7M
            goto jump_repeat_one_2;
1666
7.37M
        case JUMP_MIN_REPEAT_ONE:
1667
7.37M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
7.37M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
2.85M
        case JUMP_ASSERT:
1673
2.85M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
2.85M
            goto jump_assert;
1675
6.15M
        case JUMP_ASSERT_NOT:
1676
6.15M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
6.15M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
276M
    }
1683
1684
0
    return ret; /* should never get here */
1685
276M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
210M
{
601
210M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
210M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
210M
    Py_ssize_t ret = 0;
604
210M
    int jump;
605
210M
    unsigned int sigcount = state->sigcount;
606
607
210M
    SRE(match_context)* ctx;
608
210M
    SRE(match_context)* nextctx;
609
210M
    INIT_TRACE(state);
610
611
210M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
210M
    DATA_ALLOC(SRE(match_context), ctx);
614
210M
    ctx->last_ctx_pos = -1;
615
210M
    ctx->jump = JUMP_NONE;
616
210M
    ctx->toplevel = toplevel;
617
210M
    ctx_pos = alloc_pos;
618
619
210M
#if USE_COMPUTED_GOTOS
620
210M
#include "sre_targets.h"
621
210M
#endif
622
623
562M
entrance:
624
625
562M
    ;  // Fashion statement.
626
562M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
562M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
16.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
139k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
139k
                   end - ptr, (size_t) pattern[3]));
634
139k
            RETURN_FAILURE;
635
139k
        }
636
16.6M
        pattern += pattern[1] + 1;
637
16.6M
    }
638
639
562M
#if USE_COMPUTED_GOTOS
640
562M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
562M
    {
647
648
562M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
242M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
242M
                   ptr, pattern[0]));
653
242M
            {
654
242M
                int i = pattern[0];
655
242M
                if (i & 1)
656
31.9M
                    state->lastindex = i/2 + 1;
657
242M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
241M
                    int j = state->lastmark + 1;
663
244M
                    while (j < i)
664
3.03M
                        state->mark[j++] = NULL;
665
241M
                    state->lastmark = i;
666
241M
                }
667
242M
                state->mark[i] = ptr;
668
242M
            }
669
242M
            pattern++;
670
242M
            DISPATCH;
671
672
242M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
29.1M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
29.1M
                   ptr, *pattern));
677
29.1M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
12.6M
                RETURN_FAILURE;
679
16.4M
            pattern++;
680
16.4M
            ptr++;
681
16.4M
            DISPATCH;
682
683
16.4M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
62.0M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
62.0M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
62.0M
            if (ctx->toplevel &&
698
12.2M
                ((state->match_all && ptr != state->end) ||
699
12.2M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
62.0M
            state->ptr = ptr;
704
62.0M
            RETURN_SUCCESS;
705
706
47.5M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
47.5M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
47.5M
            if (!SRE(at)(state, ptr, *pattern))
711
46.0M
                RETURN_FAILURE;
712
1.56M
            pattern++;
713
1.56M
            DISPATCH;
714
715
1.56M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
128M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
128M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
128M
            if (ptr >= end ||
749
127M
                !SRE(charset)(state, pattern + 1, *ptr))
750
54.0M
                RETURN_FAILURE;
751
74.2M
            pattern += pattern[0];
752
74.2M
            ptr++;
753
74.2M
            DISPATCH;
754
755
74.2M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
3.32M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
3.32M
                   pattern, ptr, pattern[0]));
758
3.32M
            if (ptr >= end ||
759
3.32M
                sre_lower_ascii(*ptr) != *pattern)
760
19.3k
                RETURN_FAILURE;
761
3.30M
            pattern++;
762
3.30M
            ptr++;
763
3.30M
            DISPATCH;
764
765
3.30M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
18.2M
        TARGET(SRE_OP_JUMP):
845
18.2M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
18.2M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
18.2M
                   ptr, pattern[0]));
850
18.2M
            pattern += pattern[0];
851
18.2M
            DISPATCH;
852
853
25.1M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
25.1M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
25.1M
            LASTMARK_SAVE();
858
25.1M
            if (state->repeat)
859
20.9M
                MARK_PUSH(ctx->lastmark);
860
54.5M
            for (; pattern[0]; pattern += pattern[0]) {
861
47.2M
                if (pattern[1] == SRE_OP_LITERAL &&
862
20.9M
                    (ptr >= end ||
863
20.9M
                     (SRE_CODE) *ptr != pattern[2]))
864
13.0M
                    continue;
865
34.1M
                if (pattern[1] == SRE_OP_IN &&
866
17.9M
                    (ptr >= end ||
867
17.9M
                     !SRE(charset)(state, pattern + 3,
868
17.9M
                                   (SRE_CODE) *ptr)))
869
10.7M
                    continue;
870
23.3M
                state->ptr = ptr;
871
23.3M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
23.3M
                if (ret) {
873
17.8M
                    if (state->repeat)
874
16.0M
                        MARK_POP_DISCARD(ctx->lastmark);
875
17.8M
                    RETURN_ON_ERROR(ret);
876
17.8M
                    RETURN_SUCCESS;
877
17.8M
                }
878
5.50M
                if (state->repeat)
879
3.46k
                    MARK_POP_KEEP(ctx->lastmark);
880
5.50M
                LASTMARK_RESTORE();
881
5.50M
            }
882
7.32M
            if (state->repeat)
883
4.89M
                MARK_POP_DISCARD(ctx->lastmark);
884
7.32M
            RETURN_FAILURE;
885
886
212M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
212M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
212M
                   pattern[1], pattern[2]));
898
899
212M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
198k
                RETURN_FAILURE; /* cannot match */
901
902
212M
            state->ptr = ptr;
903
904
212M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
212M
            RETURN_ON_ERROR(ret);
906
212M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
212M
            ctx->count = ret;
908
212M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
212M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
150M
                RETURN_FAILURE;
917
918
61.4M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
4.28M
                ptr == state->end &&
920
18.1k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
18.1k
            {
922
                /* tail is empty.  we're finished */
923
18.1k
                state->ptr = ptr;
924
18.1k
                RETURN_SUCCESS;
925
18.1k
            }
926
927
61.4M
            LASTMARK_SAVE();
928
61.4M
            if (state->repeat)
929
27.3M
                MARK_PUSH(ctx->lastmark);
930
931
61.4M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.91M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.91M
                for (;;) {
936
12.6M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
9.44M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
5.69M
                        ptr--;
939
5.69M
                        ctx->count--;
940
5.69M
                    }
941
6.91M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
3.16M
                        break;
943
3.74M
                    state->ptr = ptr;
944
3.74M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.74M
                            pattern+pattern[0]);
946
3.74M
                    if (ret) {
947
3.74M
                        if (state->repeat)
948
3.71M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.74M
                        RETURN_ON_ERROR(ret);
950
3.74M
                        RETURN_SUCCESS;
951
3.74M
                    }
952
219
                    if (state->repeat)
953
219
                        MARK_POP_KEEP(ctx->lastmark);
954
219
                    LASTMARK_RESTORE();
955
956
219
                    ptr--;
957
219
                    ctx->count--;
958
219
                }
959
3.16M
                if (state->repeat)
960
3.15M
                    MARK_POP_DISCARD(ctx->lastmark);
961
54.5M
            } else {
962
                /* general case */
963
104M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
88.6M
                    state->ptr = ptr;
965
88.6M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
88.6M
                            pattern+pattern[0]);
967
88.6M
                    if (ret) {
968
38.9M
                        if (state->repeat)
969
20.1M
                            MARK_POP_DISCARD(ctx->lastmark);
970
38.9M
                        RETURN_ON_ERROR(ret);
971
38.9M
                        RETURN_SUCCESS;
972
38.9M
                    }
973
49.6M
                    if (state->repeat)
974
576k
                        MARK_POP_KEEP(ctx->lastmark);
975
49.6M
                    LASTMARK_RESTORE();
976
977
49.6M
                    ptr--;
978
49.6M
                    ctx->count--;
979
49.6M
                }
980
15.5M
                if (state->repeat)
981
322k
                    MARK_POP_DISCARD(ctx->lastmark);
982
15.5M
            }
983
18.7M
            RETURN_FAILURE;
984
985
708k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
708k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
708k
                   pattern[1], pattern[2]));
997
998
708k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
708k
            state->ptr = ptr;
1002
1003
708k
            if (pattern[1] == 0)
1004
708k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
708k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
708k
            } else {
1028
                /* general case */
1029
708k
                LASTMARK_SAVE();
1030
708k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
15.9M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
15.9M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
15.9M
                    state->ptr = ptr;
1036
15.9M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
15.9M
                            pattern+pattern[0]);
1038
15.9M
                    if (ret) {
1039
708k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
708k
                        RETURN_ON_ERROR(ret);
1042
708k
                        RETURN_SUCCESS;
1043
708k
                    }
1044
15.2M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
15.2M
                    LASTMARK_RESTORE();
1047
1048
15.2M
                    state->ptr = ptr;
1049
15.2M
                    ret = SRE(count)(state, pattern+3, 1);
1050
15.2M
                    RETURN_ON_ERROR(ret);
1051
15.2M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
15.2M
                    if (ret == 0)
1053
0
                        break;
1054
15.2M
                    assert(ret == 1);
1055
15.2M
                    ptr++;
1056
15.2M
                    ctx->count++;
1057
15.2M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
59.0M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
59.0M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
59.0M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
59.0M
            ctx->u.rep = repeat_pool_malloc(state);
1127
59.0M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
59.0M
            ctx->u.rep->count = -1;
1131
59.0M
            ctx->u.rep->pattern = pattern;
1132
59.0M
            ctx->u.rep->prev = state->repeat;
1133
59.0M
            ctx->u.rep->last_ptr = NULL;
1134
59.0M
            state->repeat = ctx->u.rep;
1135
1136
59.0M
            state->ptr = ptr;
1137
59.0M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
59.0M
            state->repeat = ctx->u.rep->prev;
1139
59.0M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
59.0M
            if (ret) {
1142
14.2M
                RETURN_ON_ERROR(ret);
1143
14.2M
                RETURN_SUCCESS;
1144
14.2M
            }
1145
44.8M
            RETURN_FAILURE;
1146
1147
86.3M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
86.3M
            ctx->u.rep = state->repeat;
1155
86.3M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
86.3M
            state->ptr = ptr;
1159
1160
86.3M
            ctx->count = ctx->u.rep->count+1;
1161
1162
86.3M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
86.3M
                   ptr, ctx->count));
1164
1165
86.3M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
86.3M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.51M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
82.8M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
82.8M
                ctx->u.rep->count = ctx->count;
1185
82.8M
                LASTMARK_SAVE();
1186
82.8M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
82.8M
                LAST_PTR_PUSH();
1189
82.8M
                ctx->u.rep->last_ptr = state->ptr;
1190
82.8M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
82.8M
                        ctx->u.rep->pattern+3);
1192
82.8M
                LAST_PTR_POP();
1193
82.8M
                if (ret) {
1194
27.0M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
27.0M
                    RETURN_ON_ERROR(ret);
1196
27.0M
                    RETURN_SUCCESS;
1197
27.0M
                }
1198
55.8M
                MARK_POP(ctx->lastmark);
1199
55.8M
                LASTMARK_RESTORE();
1200
55.8M
                ctx->u.rep->count = ctx->count-1;
1201
55.8M
                state->ptr = ptr;
1202
55.8M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
59.3M
            state->repeat = ctx->u.rep->prev;
1207
59.3M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
59.3M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
59.3M
            RETURN_ON_SUCCESS(ret);
1211
45.1M
            state->ptr = ptr;
1212
45.1M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
10.8M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
10.8M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
10.8M
                   ptr, pattern[1]));
1565
10.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
10.8M
            state->ptr = ptr - pattern[1];
1568
10.8M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
10.8M
            RETURN_ON_FAILURE(ret);
1570
6.71M
            pattern += pattern[0];
1571
6.71M
            DISPATCH;
1572
1573
8.27M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
8.27M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
8.27M
                   ptr, pattern[1]));
1578
8.27M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
8.27M
                state->ptr = ptr - pattern[1];
1580
8.27M
                LASTMARK_SAVE();
1581
8.27M
                if (state->repeat)
1582
8.27M
                    MARK_PUSH(ctx->lastmark);
1583
1584
16.5M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
16.5M
                if (ret) {
1586
3.21k
                    if (state->repeat)
1587
3.21k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
3.21k
                    RETURN_ON_ERROR(ret);
1589
3.21k
                    RETURN_FAILURE;
1590
3.21k
                }
1591
8.26M
                if (state->repeat)
1592
8.26M
                    MARK_POP(ctx->lastmark);
1593
8.26M
                LASTMARK_RESTORE();
1594
8.26M
            }
1595
8.26M
            pattern += pattern[0];
1596
8.26M
            DISPATCH;
1597
1598
8.26M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
562M
exit:
1620
562M
    ctx_pos = ctx->last_ctx_pos;
1621
562M
    jump = ctx->jump;
1622
562M
    DATA_POP_DISCARD(ctx);
1623
562M
    if (ctx_pos == -1) {
1624
210M
        state->sigcount = sigcount;
1625
210M
        return ret;
1626
210M
    }
1627
351M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
351M
    switch (jump) {
1630
82.8M
        case JUMP_MAX_UNTIL_2:
1631
82.8M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
82.8M
            goto jump_max_until_2;
1633
59.3M
        case JUMP_MAX_UNTIL_3:
1634
59.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
59.3M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
23.3M
        case JUMP_BRANCH:
1643
23.3M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
23.3M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
59.0M
        case JUMP_REPEAT:
1658
59.0M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
59.0M
            goto jump_repeat;
1660
3.74M
        case JUMP_REPEAT_ONE_1:
1661
3.74M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.74M
            goto jump_repeat_one_1;
1663
88.6M
        case JUMP_REPEAT_ONE_2:
1664
88.6M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
88.6M
            goto jump_repeat_one_2;
1666
15.9M
        case JUMP_MIN_REPEAT_ONE:
1667
15.9M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
15.9M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
10.8M
        case JUMP_ASSERT:
1673
10.8M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
10.8M
            goto jump_assert;
1675
8.27M
        case JUMP_ASSERT_NOT:
1676
8.27M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
8.27M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
351M
    }
1683
1684
0
    return ret; /* should never get here */
1685
351M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
101M
{
601
101M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
101M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
101M
    Py_ssize_t ret = 0;
604
101M
    int jump;
605
101M
    unsigned int sigcount = state->sigcount;
606
607
101M
    SRE(match_context)* ctx;
608
101M
    SRE(match_context)* nextctx;
609
101M
    INIT_TRACE(state);
610
611
101M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
101M
    DATA_ALLOC(SRE(match_context), ctx);
614
101M
    ctx->last_ctx_pos = -1;
615
101M
    ctx->jump = JUMP_NONE;
616
101M
    ctx->toplevel = toplevel;
617
101M
    ctx_pos = alloc_pos;
618
619
101M
#if USE_COMPUTED_GOTOS
620
101M
#include "sre_targets.h"
621
101M
#endif
622
623
408M
entrance:
624
625
408M
    ;  // Fashion statement.
626
408M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
408M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
11.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.03k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.03k
                   end - ptr, (size_t) pattern[3]));
634
3.03k
            RETURN_FAILURE;
635
3.03k
        }
636
11.7M
        pattern += pattern[1] + 1;
637
11.7M
    }
638
639
408M
#if USE_COMPUTED_GOTOS
640
408M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
408M
    {
647
648
408M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
147M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
147M
                   ptr, pattern[0]));
653
147M
            {
654
147M
                int i = pattern[0];
655
147M
                if (i & 1)
656
24.3M
                    state->lastindex = i/2 + 1;
657
147M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
144M
                    int j = state->lastmark + 1;
663
147M
                    while (j < i)
664
2.33M
                        state->mark[j++] = NULL;
665
144M
                    state->lastmark = i;
666
144M
                }
667
147M
                state->mark[i] = ptr;
668
147M
            }
669
147M
            pattern++;
670
147M
            DISPATCH;
671
672
147M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
28.9M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
28.9M
                   ptr, *pattern));
677
28.9M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
10.9M
                RETURN_FAILURE;
679
17.9M
            pattern++;
680
17.9M
            ptr++;
681
17.9M
            DISPATCH;
682
683
17.9M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
37.5M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
37.5M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
37.5M
            if (ctx->toplevel &&
698
10.9M
                ((state->match_all && ptr != state->end) ||
699
10.9M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
37.5M
            state->ptr = ptr;
704
37.5M
            RETURN_SUCCESS;
705
706
18.1M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
18.1M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
18.1M
            if (!SRE(at)(state, ptr, *pattern))
711
18.1M
                RETURN_FAILURE;
712
36.7k
            pattern++;
713
36.7k
            DISPATCH;
714
715
36.7k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
78.2M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
78.2M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
78.2M
            if (ptr >= end ||
749
78.2M
                !SRE(charset)(state, pattern + 1, *ptr))
750
21.3M
                RETURN_FAILURE;
751
56.8M
            pattern += pattern[0];
752
56.8M
            ptr++;
753
56.8M
            DISPATCH;
754
755
56.8M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.85M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.85M
                   pattern, ptr, pattern[0]));
758
1.85M
            if (ptr >= end ||
759
1.85M
                sre_lower_ascii(*ptr) != *pattern)
760
17.3k
                RETURN_FAILURE;
761
1.83M
            pattern++;
762
1.83M
            ptr++;
763
1.83M
            DISPATCH;
764
765
1.83M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
28.2M
        TARGET(SRE_OP_JUMP):
845
28.2M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
28.2M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
28.2M
                   ptr, pattern[0]));
850
28.2M
            pattern += pattern[0];
851
28.2M
            DISPATCH;
852
853
37.8M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
37.8M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
37.8M
            LASTMARK_SAVE();
858
37.8M
            if (state->repeat)
859
32.7M
                MARK_PUSH(ctx->lastmark);
860
82.5M
            for (; pattern[0]; pattern += pattern[0]) {
861
72.4M
                if (pattern[1] == SRE_OP_LITERAL &&
862
36.7M
                    (ptr >= end ||
863
36.7M
                     (SRE_CODE) *ptr != pattern[2]))
864
26.4M
                    continue;
865
45.9M
                if (pattern[1] == SRE_OP_IN &&
866
27.9M
                    (ptr >= end ||
867
27.9M
                     !SRE(charset)(state, pattern + 3,
868
27.9M
                                   (SRE_CODE) *ptr)))
869
17.3M
                    continue;
870
28.6M
                state->ptr = ptr;
871
28.6M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
28.6M
                if (ret) {
873
27.7M
                    if (state->repeat)
874
23.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
27.7M
                    RETURN_ON_ERROR(ret);
876
27.7M
                    RETURN_SUCCESS;
877
27.7M
                }
878
896k
                if (state->repeat)
879
14.8k
                    MARK_POP_KEEP(ctx->lastmark);
880
896k
                LASTMARK_RESTORE();
881
896k
            }
882
10.1M
            if (state->repeat)
883
9.42M
                MARK_POP_DISCARD(ctx->lastmark);
884
10.1M
            RETURN_FAILURE;
885
886
154M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
154M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
154M
                   pattern[1], pattern[2]));
898
899
154M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
19.3k
                RETURN_FAILURE; /* cannot match */
901
902
154M
            state->ptr = ptr;
903
904
154M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
154M
            RETURN_ON_ERROR(ret);
906
154M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
154M
            ctx->count = ret;
908
154M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
154M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
80.3M
                RETURN_FAILURE;
917
918
74.1M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
958k
                ptr == state->end &&
920
3.77k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.77k
            {
922
                /* tail is empty.  we're finished */
923
3.77k
                state->ptr = ptr;
924
3.77k
                RETURN_SUCCESS;
925
3.77k
            }
926
927
74.1M
            LASTMARK_SAVE();
928
74.1M
            if (state->repeat)
929
51.6M
                MARK_PUSH(ctx->lastmark);
930
931
74.1M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
20.0M
                ctx->u.chr = pattern[pattern[0]+1];
935
20.0M
                for (;;) {
936
45.7M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
32.1M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
25.7M
                        ptr--;
939
25.7M
                        ctx->count--;
940
25.7M
                    }
941
20.0M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
13.5M
                        break;
943
6.44M
                    state->ptr = ptr;
944
6.44M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
6.44M
                            pattern+pattern[0]);
946
6.44M
                    if (ret) {
947
6.44M
                        if (state->repeat)
948
6.44M
                            MARK_POP_DISCARD(ctx->lastmark);
949
6.44M
                        RETURN_ON_ERROR(ret);
950
6.44M
                        RETURN_SUCCESS;
951
6.44M
                    }
952
288
                    if (state->repeat)
953
288
                        MARK_POP_KEEP(ctx->lastmark);
954
288
                    LASTMARK_RESTORE();
955
956
288
                    ptr--;
957
288
                    ctx->count--;
958
288
                }
959
13.5M
                if (state->repeat)
960
13.5M
                    MARK_POP_DISCARD(ctx->lastmark);
961
54.0M
            } else {
962
                /* general case */
963
73.6M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
71.3M
                    state->ptr = ptr;
965
71.3M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
71.3M
                            pattern+pattern[0]);
967
71.3M
                    if (ret) {
968
51.8M
                        if (state->repeat)
969
31.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
51.8M
                        RETURN_ON_ERROR(ret);
971
51.8M
                        RETURN_SUCCESS;
972
51.8M
                    }
973
19.5M
                    if (state->repeat)
974
187k
                        MARK_POP_KEEP(ctx->lastmark);
975
19.5M
                    LASTMARK_RESTORE();
976
977
19.5M
                    ptr--;
978
19.5M
                    ctx->count--;
979
19.5M
                }
980
2.25M
                if (state->repeat)
981
107k
                    MARK_POP_DISCARD(ctx->lastmark);
982
2.25M
            }
983
15.8M
            RETURN_FAILURE;
984
985
15.2k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
15.2k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
15.2k
                   pattern[1], pattern[2]));
997
998
15.2k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
15.2k
            state->ptr = ptr;
1002
1003
15.2k
            if (pattern[1] == 0)
1004
15.2k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
15.2k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
15.2k
            } else {
1028
                /* general case */
1029
15.2k
                LASTMARK_SAVE();
1030
15.2k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
2.15M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
2.15M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
2.15M
                    state->ptr = ptr;
1036
2.15M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
2.15M
                            pattern+pattern[0]);
1038
2.15M
                    if (ret) {
1039
15.2k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
15.2k
                        RETURN_ON_ERROR(ret);
1042
15.2k
                        RETURN_SUCCESS;
1043
15.2k
                    }
1044
2.13M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
2.13M
                    LASTMARK_RESTORE();
1047
1048
2.13M
                    state->ptr = ptr;
1049
2.13M
                    ret = SRE(count)(state, pattern+3, 1);
1050
2.13M
                    RETURN_ON_ERROR(ret);
1051
2.13M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
2.13M
                    if (ret == 0)
1053
0
                        break;
1054
2.13M
                    assert(ret == 1);
1055
2.13M
                    ptr++;
1056
2.13M
                    ctx->count++;
1057
2.13M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
45.0M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
45.0M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
45.0M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
45.0M
            ctx->u.rep = repeat_pool_malloc(state);
1127
45.0M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
45.0M
            ctx->u.rep->count = -1;
1131
45.0M
            ctx->u.rep->pattern = pattern;
1132
45.0M
            ctx->u.rep->prev = state->repeat;
1133
45.0M
            ctx->u.rep->last_ptr = NULL;
1134
45.0M
            state->repeat = ctx->u.rep;
1135
1136
45.0M
            state->ptr = ptr;
1137
45.0M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
45.0M
            state->repeat = ctx->u.rep->prev;
1139
45.0M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
45.0M
            if (ret) {
1142
28.5M
                RETURN_ON_ERROR(ret);
1143
28.5M
                RETURN_SUCCESS;
1144
28.5M
            }
1145
16.4M
            RETURN_FAILURE;
1146
1147
82.0M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
82.0M
            ctx->u.rep = state->repeat;
1155
82.0M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
82.0M
            state->ptr = ptr;
1159
1160
82.0M
            ctx->count = ctx->u.rep->count+1;
1161
1162
82.0M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
82.0M
                   ptr, ctx->count));
1164
1165
82.0M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
82.0M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.99M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
77.0M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
77.0M
                ctx->u.rep->count = ctx->count;
1185
77.0M
                LASTMARK_SAVE();
1186
77.0M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
77.0M
                LAST_PTR_PUSH();
1189
77.0M
                ctx->u.rep->last_ptr = state->ptr;
1190
77.0M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
77.0M
                        ctx->u.rep->pattern+3);
1192
77.0M
                LAST_PTR_POP();
1193
77.0M
                if (ret) {
1194
36.9M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
36.9M
                    RETURN_ON_ERROR(ret);
1196
36.9M
                    RETURN_SUCCESS;
1197
36.9M
                }
1198
40.0M
                MARK_POP(ctx->lastmark);
1199
40.0M
                LASTMARK_RESTORE();
1200
40.0M
                ctx->u.rep->count = ctx->count-1;
1201
40.0M
                state->ptr = ptr;
1202
40.0M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
45.0M
            state->repeat = ctx->u.rep->prev;
1207
45.0M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
45.0M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
45.0M
            RETURN_ON_SUCCESS(ret);
1211
16.4M
            state->ptr = ptr;
1212
16.4M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
19.8M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
19.8M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
19.8M
                   ptr, pattern[1]));
1565
19.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
19.8M
            state->ptr = ptr - pattern[1];
1568
19.8M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
19.8M
            RETURN_ON_FAILURE(ret);
1570
18.6M
            pattern += pattern[0];
1571
18.6M
            DISPATCH;
1572
1573
18.6M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
11.2M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
11.2M
                   ptr, pattern[1]));
1578
11.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
11.2M
                state->ptr = ptr - pattern[1];
1580
11.2M
                LASTMARK_SAVE();
1581
11.2M
                if (state->repeat)
1582
11.2M
                    MARK_PUSH(ctx->lastmark);
1583
1584
22.4M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
22.4M
                if (ret) {
1586
14.4k
                    if (state->repeat)
1587
14.4k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
14.4k
                    RETURN_ON_ERROR(ret);
1589
14.4k
                    RETURN_FAILURE;
1590
14.4k
                }
1591
11.2M
                if (state->repeat)
1592
11.2M
                    MARK_POP(ctx->lastmark);
1593
11.2M
                LASTMARK_RESTORE();
1594
11.2M
            }
1595
11.2M
            pattern += pattern[0];
1596
11.2M
            DISPATCH;
1597
1598
11.2M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
408M
exit:
1620
408M
    ctx_pos = ctx->last_ctx_pos;
1621
408M
    jump = ctx->jump;
1622
408M
    DATA_POP_DISCARD(ctx);
1623
408M
    if (ctx_pos == -1) {
1624
101M
        state->sigcount = sigcount;
1625
101M
        return ret;
1626
101M
    }
1627
306M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
306M
    switch (jump) {
1630
77.0M
        case JUMP_MAX_UNTIL_2:
1631
77.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
77.0M
            goto jump_max_until_2;
1633
45.0M
        case JUMP_MAX_UNTIL_3:
1634
45.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
45.0M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
28.6M
        case JUMP_BRANCH:
1643
28.6M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
28.6M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
45.0M
        case JUMP_REPEAT:
1658
45.0M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
45.0M
            goto jump_repeat;
1660
6.44M
        case JUMP_REPEAT_ONE_1:
1661
6.44M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
6.44M
            goto jump_repeat_one_1;
1663
71.3M
        case JUMP_REPEAT_ONE_2:
1664
71.3M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
71.3M
            goto jump_repeat_one_2;
1666
2.15M
        case JUMP_MIN_REPEAT_ONE:
1667
2.15M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
2.15M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
19.8M
        case JUMP_ASSERT:
1673
19.8M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
19.8M
            goto jump_assert;
1675
11.2M
        case JUMP_ASSERT_NOT:
1676
11.2M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
11.2M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
306M
    }
1683
1684
0
    return ret; /* should never get here */
1685
306M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
319M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
101M
{
1694
101M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
101M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
101M
    Py_ssize_t status = 0;
1697
101M
    Py_ssize_t prefix_len = 0;
1698
101M
    Py_ssize_t prefix_skip = 0;
1699
101M
    SRE_CODE* prefix = NULL;
1700
101M
    SRE_CODE* charset = NULL;
1701
101M
    SRE_CODE* overlap = NULL;
1702
101M
    int flags = 0;
1703
101M
    INIT_TRACE(state);
1704
1705
101M
    if (ptr > end)
1706
0
        return 0;
1707
1708
101M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
101M
        flags = pattern[2];
1713
1714
101M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.44M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.44M
                   end - ptr, (size_t) pattern[3]));
1717
5.44M
            return 0;
1718
5.44M
        }
1719
96.0M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
10.4M
            end -= pattern[3] - 1;
1723
10.4M
            if (end <= ptr)
1724
0
                end = ptr;
1725
10.4M
        }
1726
1727
96.0M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
10.4M
            prefix_len = pattern[5];
1731
10.4M
            prefix_skip = pattern[6];
1732
10.4M
            prefix = pattern + 7;
1733
10.4M
            overlap = prefix + prefix_len - 1;
1734
85.6M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
76.3M
            charset = pattern + 5;
1738
1739
96.0M
        pattern += 1 + pattern[1];
1740
96.0M
    }
1741
1742
96.0M
    TRACE(("prefix = %p %zd %zd\n",
1743
96.0M
           prefix, prefix_len, prefix_skip));
1744
96.0M
    TRACE(("charset = %p\n", charset));
1745
1746
96.0M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
9.71M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
5.39M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
5.39M
#endif
1753
5.39M
        end = (SRE_CHAR *)state->end;
1754
5.39M
        state->must_advance = 0;
1755
10.1M
        while (ptr < end) {
1756
100M
            while (*ptr != c) {
1757
91.3M
                if (++ptr >= end)
1758
1.15M
                    return 0;
1759
91.3M
            }
1760
8.97M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
8.97M
            state->start = ptr;
1762
8.97M
            state->ptr = ptr + prefix_skip;
1763
8.97M
            if (flags & SRE_INFO_LITERAL)
1764
3.96k
                return 1; /* we got all of it */
1765
8.96M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
8.96M
            if (status != 0)
1767
8.48M
                return status;
1768
480k
            ++ptr;
1769
480k
            RESET_CAPTURE_GROUP();
1770
480k
        }
1771
68.4k
        return 0;
1772
5.39M
    }
1773
1774
86.3M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
698k
        Py_ssize_t i = 0;
1778
1779
698k
        end = (SRE_CHAR *)state->end;
1780
698k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.55M
        for (i = 0; i < prefix_len; i++)
1784
1.03M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
518k
#endif
1787
1.24M
        while (ptr < end) {
1788
1.24M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
6.35M
            while (*ptr++ != c) {
1790
5.11M
                if (ptr >= end)
1791
317
                    return 0;
1792
5.11M
            }
1793
1.24M
            if (ptr >= end)
1794
61
                return 0;
1795
1796
1.24M
            i = 1;
1797
1.24M
            state->must_advance = 0;
1798
1.24M
            do {
1799
1.24M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.18M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.18M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.18M
                    state->start = ptr - (prefix_len - 1);
1808
1.18M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.18M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.18M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.18M
                    if (status != 0)
1813
697k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
484k
                    if (++ptr >= end)
1816
48
                        return 0;
1817
484k
                    RESET_CAPTURE_GROUP();
1818
484k
                }
1819
549k
                i = overlap[i];
1820
549k
            } while (i != 0);
1821
1.24M
        }
1822
0
        return 0;
1823
698k
    }
1824
1825
85.6M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
76.3M
        end = (SRE_CHAR *)state->end;
1828
76.3M
        state->must_advance = 0;
1829
78.6M
        for (;;) {
1830
341M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
262M
                ptr++;
1832
78.6M
            if (ptr >= end)
1833
3.99M
                return 0;
1834
74.6M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
74.6M
            state->start = ptr;
1836
74.6M
            state->ptr = ptr;
1837
74.6M
            status = SRE(match)(state, pattern, 0);
1838
74.6M
            if (status != 0)
1839
72.3M
                break;
1840
2.36M
            ptr++;
1841
2.36M
            RESET_CAPTURE_GROUP();
1842
2.36M
        }
1843
76.3M
    } else {
1844
        /* general case */
1845
9.37M
        assert(ptr <= end);
1846
9.37M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
9.37M
        state->start = state->ptr = ptr;
1848
9.37M
        status = SRE(match)(state, pattern, 1);
1849
9.37M
        state->must_advance = 0;
1850
9.37M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.58M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
63
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
4.58M
        {
1854
4.58M
            state->start = state->ptr = ptr = end;
1855
4.58M
            return 0;
1856
4.58M
        }
1857
321M
        while (status == 0 && ptr < end) {
1858
316M
            ptr++;
1859
316M
            RESET_CAPTURE_GROUP();
1860
316M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
316M
            state->start = state->ptr = ptr;
1862
316M
            status = SRE(match)(state, pattern, 0);
1863
316M
        }
1864
4.79M
    }
1865
1866
77.1M
    return status;
1867
85.6M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
48.1M
{
1694
48.1M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
48.1M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
48.1M
    Py_ssize_t status = 0;
1697
48.1M
    Py_ssize_t prefix_len = 0;
1698
48.1M
    Py_ssize_t prefix_skip = 0;
1699
48.1M
    SRE_CODE* prefix = NULL;
1700
48.1M
    SRE_CODE* charset = NULL;
1701
48.1M
    SRE_CODE* overlap = NULL;
1702
48.1M
    int flags = 0;
1703
48.1M
    INIT_TRACE(state);
1704
1705
48.1M
    if (ptr > end)
1706
0
        return 0;
1707
1708
48.1M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
48.1M
        flags = pattern[2];
1713
1714
48.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.28M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.28M
                   end - ptr, (size_t) pattern[3]));
1717
5.28M
            return 0;
1718
5.28M
        }
1719
42.8M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.72M
            end -= pattern[3] - 1;
1723
3.72M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.72M
        }
1726
1727
42.8M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.72M
            prefix_len = pattern[5];
1731
3.72M
            prefix_skip = pattern[6];
1732
3.72M
            prefix = pattern + 7;
1733
3.72M
            overlap = prefix + prefix_len - 1;
1734
39.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
31.9M
            charset = pattern + 5;
1738
1739
42.8M
        pattern += 1 + pattern[1];
1740
42.8M
    }
1741
1742
42.8M
    TRACE(("prefix = %p %zd %zd\n",
1743
42.8M
           prefix, prefix_len, prefix_skip));
1744
42.8M
    TRACE(("charset = %p\n", charset));
1745
1746
42.8M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.68M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
3.68M
#if SIZEOF_SRE_CHAR < 4
1750
3.68M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
3.68M
#endif
1753
3.68M
        end = (SRE_CHAR *)state->end;
1754
3.68M
        state->must_advance = 0;
1755
3.91M
        while (ptr < end) {
1756
25.6M
            while (*ptr != c) {
1757
22.8M
                if (++ptr >= end)
1758
1.08M
                    return 0;
1759
22.8M
            }
1760
2.76M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.76M
            state->start = ptr;
1762
2.76M
            state->ptr = ptr + prefix_skip;
1763
2.76M
            if (flags & SRE_INFO_LITERAL)
1764
377
                return 1; /* we got all of it */
1765
2.76M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.76M
            if (status != 0)
1767
2.53M
                return status;
1768
230k
            ++ptr;
1769
230k
            RESET_CAPTURE_GROUP();
1770
230k
        }
1771
64.3k
        return 0;
1772
3.68M
    }
1773
1774
39.1M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
34.9k
        Py_ssize_t i = 0;
1778
1779
34.9k
        end = (SRE_CHAR *)state->end;
1780
34.9k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
34.9k
#if SIZEOF_SRE_CHAR < 4
1783
104k
        for (i = 0; i < prefix_len; i++)
1784
69.8k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
34.9k
#endif
1787
129k
        while (ptr < end) {
1788
129k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
1.19M
            while (*ptr++ != c) {
1790
1.06M
                if (ptr >= end)
1791
61
                    return 0;
1792
1.06M
            }
1793
129k
            if (ptr >= end)
1794
29
                return 0;
1795
1796
129k
            i = 1;
1797
129k
            state->must_advance = 0;
1798
129k
            do {
1799
129k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
106k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
106k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
106k
                    state->start = ptr - (prefix_len - 1);
1808
106k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
106k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
106k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
106k
                    if (status != 0)
1813
34.8k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
71.7k
                    if (++ptr >= end)
1816
24
                        return 0;
1817
71.7k
                    RESET_CAPTURE_GROUP();
1818
71.7k
                }
1819
94.7k
                i = overlap[i];
1820
94.7k
            } while (i != 0);
1821
129k
        }
1822
0
        return 0;
1823
34.9k
    }
1824
1825
39.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
31.9M
        end = (SRE_CHAR *)state->end;
1828
31.9M
        state->must_advance = 0;
1829
33.3M
        for (;;) {
1830
94.2M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
60.8M
                ptr++;
1832
33.3M
            if (ptr >= end)
1833
2.84M
                return 0;
1834
30.5M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
30.5M
            state->start = ptr;
1836
30.5M
            state->ptr = ptr;
1837
30.5M
            status = SRE(match)(state, pattern, 0);
1838
30.5M
            if (status != 0)
1839
29.1M
                break;
1840
1.42M
            ptr++;
1841
1.42M
            RESET_CAPTURE_GROUP();
1842
1.42M
        }
1843
31.9M
    } else {
1844
        /* general case */
1845
7.13M
        assert(ptr <= end);
1846
7.13M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
7.13M
        state->start = state->ptr = ptr;
1848
7.13M
        status = SRE(match)(state, pattern, 1);
1849
7.13M
        state->must_advance = 0;
1850
7.13M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
3.73M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
17
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
3.73M
        {
1854
3.73M
            state->start = state->ptr = ptr = end;
1855
3.73M
            return 0;
1856
3.73M
        }
1857
89.7M
        while (status == 0 && ptr < end) {
1858
86.3M
            ptr++;
1859
86.3M
            RESET_CAPTURE_GROUP();
1860
86.3M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
86.3M
            state->start = state->ptr = ptr;
1862
86.3M
            status = SRE(match)(state, pattern, 0);
1863
86.3M
        }
1864
3.40M
    }
1865
1866
32.5M
    return status;
1867
39.1M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
45.4M
{
1694
45.4M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
45.4M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
45.4M
    Py_ssize_t status = 0;
1697
45.4M
    Py_ssize_t prefix_len = 0;
1698
45.4M
    Py_ssize_t prefix_skip = 0;
1699
45.4M
    SRE_CODE* prefix = NULL;
1700
45.4M
    SRE_CODE* charset = NULL;
1701
45.4M
    SRE_CODE* overlap = NULL;
1702
45.4M
    int flags = 0;
1703
45.4M
    INIT_TRACE(state);
1704
1705
45.4M
    if (ptr > end)
1706
0
        return 0;
1707
1708
45.4M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
45.4M
        flags = pattern[2];
1713
1714
45.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
150k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
150k
                   end - ptr, (size_t) pattern[3]));
1717
150k
            return 0;
1718
150k
        }
1719
45.2M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.19M
            end -= pattern[3] - 1;
1723
2.19M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.19M
        }
1726
1727
45.2M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.19M
            prefix_len = pattern[5];
1731
2.19M
            prefix_skip = pattern[6];
1732
2.19M
            prefix = pattern + 7;
1733
2.19M
            overlap = prefix + prefix_len - 1;
1734
43.0M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
41.0M
            charset = pattern + 5;
1738
1739
45.2M
        pattern += 1 + pattern[1];
1740
45.2M
    }
1741
1742
45.2M
    TRACE(("prefix = %p %zd %zd\n",
1743
45.2M
           prefix, prefix_len, prefix_skip));
1744
45.2M
    TRACE(("charset = %p\n", charset));
1745
1746
45.2M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.71M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.71M
#if SIZEOF_SRE_CHAR < 4
1750
1.71M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.71M
#endif
1753
1.71M
        end = (SRE_CHAR *)state->end;
1754
1.71M
        state->must_advance = 0;
1755
1.82M
        while (ptr < end) {
1756
45.3M
            while (*ptr != c) {
1757
43.5M
                if (++ptr >= end)
1758
62.9k
                    return 0;
1759
43.5M
            }
1760
1.75M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.75M
            state->start = ptr;
1762
1.75M
            state->ptr = ptr + prefix_skip;
1763
1.75M
            if (flags & SRE_INFO_LITERAL)
1764
2.41k
                return 1; /* we got all of it */
1765
1.75M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.75M
            if (status != 0)
1767
1.64M
                return status;
1768
111k
            ++ptr;
1769
111k
            RESET_CAPTURE_GROUP();
1770
111k
        }
1771
3.37k
        return 0;
1772
1.71M
    }
1773
1774
43.5M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
483k
        Py_ssize_t i = 0;
1778
1779
483k
        end = (SRE_CHAR *)state->end;
1780
483k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
483k
#if SIZEOF_SRE_CHAR < 4
1783
1.45M
        for (i = 0; i < prefix_len; i++)
1784
966k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
483k
#endif
1787
715k
        while (ptr < end) {
1788
715k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.04M
            while (*ptr++ != c) {
1790
1.33M
                if (ptr >= end)
1791
116
                    return 0;
1792
1.33M
            }
1793
715k
            if (ptr >= end)
1794
15
                return 0;
1795
1796
715k
            i = 1;
1797
715k
            state->must_advance = 0;
1798
716k
            do {
1799
716k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
693k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
693k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
693k
                    state->start = ptr - (prefix_len - 1);
1808
693k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
693k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
693k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
693k
                    if (status != 0)
1813
483k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
210k
                    if (++ptr >= end)
1816
13
                        return 0;
1817
210k
                    RESET_CAPTURE_GROUP();
1818
210k
                }
1819
232k
                i = overlap[i];
1820
232k
            } while (i != 0);
1821
715k
        }
1822
0
        return 0;
1823
483k
    }
1824
1825
43.0M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
41.0M
        end = (SRE_CHAR *)state->end;
1828
41.0M
        state->must_advance = 0;
1829
41.4M
        for (;;) {
1830
180M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
139M
                ptr++;
1832
41.4M
            if (ptr >= end)
1833
1.10M
                return 0;
1834
40.3M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
40.3M
            state->start = ptr;
1836
40.3M
            state->ptr = ptr;
1837
40.3M
            status = SRE(match)(state, pattern, 0);
1838
40.3M
            if (status != 0)
1839
39.9M
                break;
1840
424k
            ptr++;
1841
424k
            RESET_CAPTURE_GROUP();
1842
424k
        }
1843
41.0M
    } else {
1844
        /* general case */
1845
2.02M
        assert(ptr <= end);
1846
2.02M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.02M
        state->start = state->ptr = ptr;
1848
2.02M
        status = SRE(match)(state, pattern, 1);
1849
2.02M
        state->must_advance = 0;
1850
2.02M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
828k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
19
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
828k
        {
1854
828k
            state->start = state->ptr = ptr = end;
1855
828k
            return 0;
1856
828k
        }
1857
150M
        while (status == 0 && ptr < end) {
1858
149M
            ptr++;
1859
149M
            RESET_CAPTURE_GROUP();
1860
149M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
149M
            state->start = state->ptr = ptr;
1862
149M
            status = SRE(match)(state, pattern, 0);
1863
149M
        }
1864
1.19M
    }
1865
1866
41.1M
    return status;
1867
43.0M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.98M
{
1694
7.98M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.98M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.98M
    Py_ssize_t status = 0;
1697
7.98M
    Py_ssize_t prefix_len = 0;
1698
7.98M
    Py_ssize_t prefix_skip = 0;
1699
7.98M
    SRE_CODE* prefix = NULL;
1700
7.98M
    SRE_CODE* charset = NULL;
1701
7.98M
    SRE_CODE* overlap = NULL;
1702
7.98M
    int flags = 0;
1703
7.98M
    INIT_TRACE(state);
1704
1705
7.98M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.98M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.98M
        flags = pattern[2];
1713
1714
7.98M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.41k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.41k
                   end - ptr, (size_t) pattern[3]));
1717
6.41k
            return 0;
1718
6.41k
        }
1719
7.97M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
4.49M
            end -= pattern[3] - 1;
1723
4.49M
            if (end <= ptr)
1724
0
                end = ptr;
1725
4.49M
        }
1726
1727
7.97M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
4.49M
            prefix_len = pattern[5];
1731
4.49M
            prefix_skip = pattern[6];
1732
4.49M
            prefix = pattern + 7;
1733
4.49M
            overlap = prefix + prefix_len - 1;
1734
4.49M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.26M
            charset = pattern + 5;
1738
1739
7.97M
        pattern += 1 + pattern[1];
1740
7.97M
    }
1741
1742
7.97M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.97M
           prefix, prefix_len, prefix_skip));
1744
7.97M
    TRACE(("charset = %p\n", charset));
1745
1746
7.97M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
4.31M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
4.31M
        end = (SRE_CHAR *)state->end;
1754
4.31M
        state->must_advance = 0;
1755
4.45M
        while (ptr < end) {
1756
29.4M
            while (*ptr != c) {
1757
24.9M
                if (++ptr >= end)
1758
3.59k
                    return 0;
1759
24.9M
            }
1760
4.44M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
4.44M
            state->start = ptr;
1762
4.44M
            state->ptr = ptr + prefix_skip;
1763
4.44M
            if (flags & SRE_INFO_LITERAL)
1764
1.17k
                return 1; /* we got all of it */
1765
4.44M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
4.44M
            if (status != 0)
1767
4.31M
                return status;
1768
137k
            ++ptr;
1769
137k
            RESET_CAPTURE_GROUP();
1770
137k
        }
1771
718
        return 0;
1772
4.31M
    }
1773
1774
3.66M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
179k
        Py_ssize_t i = 0;
1778
1779
179k
        end = (SRE_CHAR *)state->end;
1780
179k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
401k
        while (ptr < end) {
1788
401k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.11M
            while (*ptr++ != c) {
1790
2.71M
                if (ptr >= end)
1791
140
                    return 0;
1792
2.71M
            }
1793
401k
            if (ptr >= end)
1794
17
                return 0;
1795
1796
401k
            i = 1;
1797
401k
            state->must_advance = 0;
1798
401k
            do {
1799
401k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
381k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
381k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
381k
                    state->start = ptr - (prefix_len - 1);
1808
381k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
381k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
381k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
381k
                    if (status != 0)
1813
179k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
201k
                    if (++ptr >= end)
1816
11
                        return 0;
1817
201k
                    RESET_CAPTURE_GROUP();
1818
201k
                }
1819
222k
                i = overlap[i];
1820
222k
            } while (i != 0);
1821
401k
        }
1822
0
        return 0;
1823
179k
    }
1824
1825
3.48M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.26M
        end = (SRE_CHAR *)state->end;
1828
3.26M
        state->must_advance = 0;
1829
3.78M
        for (;;) {
1830
66.4M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
62.7M
                ptr++;
1832
3.78M
            if (ptr >= end)
1833
45.5k
                return 0;
1834
3.73M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
3.73M
            state->start = ptr;
1836
3.73M
            state->ptr = ptr;
1837
3.73M
            status = SRE(match)(state, pattern, 0);
1838
3.73M
            if (status != 0)
1839
3.22M
                break;
1840
515k
            ptr++;
1841
515k
            RESET_CAPTURE_GROUP();
1842
515k
        }
1843
3.26M
    } else {
1844
        /* general case */
1845
214k
        assert(ptr <= end);
1846
214k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
214k
        state->start = state->ptr = ptr;
1848
214k
        status = SRE(match)(state, pattern, 1);
1849
214k
        state->must_advance = 0;
1850
214k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
17.6k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
27
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
17.5k
        {
1854
17.5k
            state->start = state->ptr = ptr = end;
1855
17.5k
            return 0;
1856
17.5k
        }
1857
81.4M
        while (status == 0 && ptr < end) {
1858
81.2M
            ptr++;
1859
81.2M
            RESET_CAPTURE_GROUP();
1860
81.2M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
81.2M
            state->start = state->ptr = ptr;
1862
81.2M
            status = SRE(match)(state, pattern, 0);
1863
81.2M
        }
1864
197k
    }
1865
1866
3.41M
    return status;
1867
3.48M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/