Coverage Report

Created: 2026-01-09 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
93.0M
{
18
    /* check if pointer is at given position */
19
20
93.0M
    Py_ssize_t thisp, thatp;
21
22
93.0M
    switch (at) {
23
24
11.1M
    case SRE_AT_BEGINNING:
25
11.1M
    case SRE_AT_BEGINNING_STRING:
26
11.1M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
78.5M
    case SRE_AT_END:
33
78.5M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
1.14M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
78.5M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
3.33M
    case SRE_AT_END_STRING:
42
3.33M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
93.0M
    }
87
88
0
    return 0;
89
93.0M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
34.1M
{
18
    /* check if pointer is at given position */
19
20
34.1M
    Py_ssize_t thisp, thatp;
21
22
34.1M
    switch (at) {
23
24
10.1M
    case SRE_AT_BEGINNING:
25
10.1M
    case SRE_AT_BEGINNING_STRING:
26
10.1M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
22.7M
    case SRE_AT_END:
33
22.7M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
423k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
22.7M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.30M
    case SRE_AT_END_STRING:
42
1.30M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
34.1M
    }
87
88
0
    return 0;
89
34.1M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
38.4M
{
18
    /* check if pointer is at given position */
19
20
38.4M
    Py_ssize_t thisp, thatp;
21
22
38.4M
    switch (at) {
23
24
1.02M
    case SRE_AT_BEGINNING:
25
1.02M
    case SRE_AT_BEGINNING_STRING:
26
1.02M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
36.5M
    case SRE_AT_END:
33
36.5M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
710k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
36.5M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
920k
    case SRE_AT_END_STRING:
42
920k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
38.4M
    }
87
88
0
    return 0;
89
38.4M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
20.4M
{
18
    /* check if pointer is at given position */
19
20
20.4M
    Py_ssize_t thisp, thatp;
21
22
20.4M
    switch (at) {
23
24
18.6k
    case SRE_AT_BEGINNING:
25
18.6k
    case SRE_AT_BEGINNING_STRING:
26
18.6k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
19.3M
    case SRE_AT_END:
33
19.3M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
7.19k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
19.3M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.11M
    case SRE_AT_END_STRING:
42
1.11M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
20.4M
    }
87
88
0
    return 0;
89
20.4M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.54G
{
94
    /* check if character is a member of the given set */
95
96
1.54G
    int ok = 1;
97
98
3.56G
    for (;;) {
99
3.56G
        switch (*set++) {
100
101
1.05G
        case SRE_OP_FAILURE:
102
1.05G
            return !ok;
103
104
1.30G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.30G
            if (ch == set[0])
107
8.60M
                return ok;
108
1.29G
            set++;
109
1.29G
            break;
110
111
97.6M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
97.6M
            if (sre_category(set[0], (int) ch))
114
49.8M
                return ok;
115
47.8M
            set++;
116
47.8M
            break;
117
118
479M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
479M
            if (ch < 256 &&
121
457M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
192M
                return ok;
123
287M
            set += 256/SRE_CODE_BITS;
124
287M
            break;
125
126
377M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
377M
            if (set[0] <= ch && ch <= set[1])
129
238M
                return ok;
130
138M
            set += 2;
131
138M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
251M
        case SRE_OP_NEGATE:
148
251M
            ok = !ok;
149
251M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.56G
        }
175
3.56G
    }
176
1.54G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
459M
{
94
    /* check if character is a member of the given set */
95
96
459M
    int ok = 1;
97
98
974M
    for (;;) {
99
974M
        switch (*set++) {
100
101
270M
        case SRE_OP_FAILURE:
102
270M
            return !ok;
103
104
335M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
335M
            if (ch == set[0])
107
5.51M
                return ok;
108
330M
            set++;
109
330M
            break;
110
111
39.0M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
39.0M
            if (sre_category(set[0], (int) ch))
114
20.7M
                return ok;
115
18.2M
            set++;
116
18.2M
            break;
117
118
99.1M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
99.1M
            if (ch < 256 &&
121
99.1M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
50.5M
                return ok;
123
48.6M
            set += 256/SRE_CODE_BITS;
124
48.6M
            break;
125
126
187M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
187M
            if (set[0] <= ch && ch <= set[1])
129
112M
                return ok;
130
74.6M
            set += 2;
131
74.6M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
42.3M
        case SRE_OP_NEGATE:
148
42.3M
            ok = !ok;
149
42.3M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
974M
        }
175
974M
    }
176
459M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
688M
{
94
    /* check if character is a member of the given set */
95
96
688M
    int ok = 1;
97
98
1.67G
    for (;;) {
99
1.67G
        switch (*set++) {
100
101
508M
        case SRE_OP_FAILURE:
102
508M
            return !ok;
103
104
690M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
690M
            if (ch == set[0])
107
1.69M
                return ok;
108
689M
            set++;
109
689M
            break;
110
111
38.6M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
38.6M
            if (sre_category(set[0], (int) ch))
114
16.5M
                return ok;
115
22.0M
            set++;
116
22.0M
            break;
117
118
167M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
167M
            if (ch < 256 &&
121
157M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
52.5M
                return ok;
123
115M
            set += 256/SRE_CODE_BITS;
124
115M
            break;
125
126
163M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
163M
            if (set[0] <= ch && ch <= set[1])
129
108M
                return ok;
130
54.4M
            set += 2;
131
54.4M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
105M
        case SRE_OP_NEGATE:
148
105M
            ok = !ok;
149
105M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.67G
        }
175
1.67G
    }
176
688M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
396M
{
94
    /* check if character is a member of the given set */
95
96
396M
    int ok = 1;
97
98
920M
    for (;;) {
99
920M
        switch (*set++) {
100
101
275M
        case SRE_OP_FAILURE:
102
275M
            return !ok;
103
104
280M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
280M
            if (ch == set[0])
107
1.38M
                return ok;
108
279M
            set++;
109
279M
            break;
110
111
19.9M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
19.9M
            if (sre_category(set[0], (int) ch))
114
12.4M
                return ok;
115
7.54M
            set++;
116
7.54M
            break;
117
118
212M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
212M
            if (ch < 256 &&
121
200M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
89.2M
                return ok;
123
123M
            set += 256/SRE_CODE_BITS;
124
123M
            break;
125
126
26.8M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
26.8M
            if (set[0] <= ch && ch <= set[1])
129
17.0M
                return ok;
130
9.79M
            set += 2;
131
9.79M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
103M
        case SRE_OP_NEGATE:
148
103M
            ok = !ok;
149
103M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
920M
        }
175
920M
    }
176
396M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
646M
{
195
646M
    SRE_CODE chr;
196
646M
    SRE_CHAR c;
197
646M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
646M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
646M
    Py_ssize_t i;
200
646M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
646M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
81.3M
        end = ptr + maxcount;
205
206
646M
    switch (pattern[0]) {
207
208
513M
    case SRE_OP_IN:
209
        /* repeated set */
210
513M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
881M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
367M
            ptr++;
213
513M
        break;
214
215
41.3M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
41.3M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
108M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
67.3M
            ptr++;
220
41.3M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
89.4M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
89.4M
        chr = pattern[1];
232
89.4M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
89.4M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
78.2M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
78.2M
        else
238
78.2M
#endif
239
94.5M
        while (ptr < end && *ptr == c)
240
5.10M
            ptr++;
241
89.4M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.54M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.54M
        chr = pattern[1];
270
2.54M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.54M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
1.40M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
1.40M
        else
276
1.40M
#endif
277
44.4M
        while (ptr < end && *ptr != c)
278
41.9M
            ptr++;
279
2.54M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
646M
    }
319
320
646M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
646M
           ptr - (SRE_CHAR*) state->ptr));
322
646M
    return ptr - (SRE_CHAR*) state->ptr;
323
646M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
242M
{
195
242M
    SRE_CODE chr;
196
242M
    SRE_CHAR c;
197
242M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
242M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
242M
    Py_ssize_t i;
200
242M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
242M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
24.2M
        end = ptr + maxcount;
205
206
242M
    switch (pattern[0]) {
207
208
166M
    case SRE_OP_IN:
209
        /* repeated set */
210
166M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
290M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
124M
            ptr++;
213
166M
        break;
214
215
12.8M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
12.8M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
29.7M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
16.8M
            ptr++;
220
12.8M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
62.2M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
62.2M
        chr = pattern[1];
232
62.2M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
62.2M
        c = (SRE_CHAR) chr;
234
62.2M
#if SIZEOF_SRE_CHAR < 4
235
62.2M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
62.2M
        else
238
62.2M
#endif
239
64.3M
        while (ptr < end && *ptr == c)
240
2.05M
            ptr++;
241
62.2M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
671k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
671k
        chr = pattern[1];
270
671k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
671k
        c = (SRE_CHAR) chr;
272
671k
#if SIZEOF_SRE_CHAR < 4
273
671k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
671k
        else
276
671k
#endif
277
9.41M
        while (ptr < end && *ptr != c)
278
8.74M
            ptr++;
279
671k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
242M
    }
319
320
242M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
242M
           ptr - (SRE_CHAR*) state->ptr));
322
242M
    return ptr - (SRE_CHAR*) state->ptr;
323
242M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
269M
{
195
269M
    SRE_CODE chr;
196
269M
    SRE_CHAR c;
197
269M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
269M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
269M
    Py_ssize_t i;
200
269M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
269M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
35.5M
        end = ptr + maxcount;
205
206
269M
    switch (pattern[0]) {
207
208
231M
    case SRE_OP_IN:
209
        /* repeated set */
210
231M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
358M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
126M
            ptr++;
213
231M
        break;
214
215
21.1M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
21.1M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
54.6M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
33.5M
            ptr++;
220
21.1M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
15.9M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
15.9M
        chr = pattern[1];
232
15.9M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
15.9M
        c = (SRE_CHAR) chr;
234
15.9M
#if SIZEOF_SRE_CHAR < 4
235
15.9M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
15.9M
        else
238
15.9M
#endif
239
18.0M
        while (ptr < end && *ptr == c)
240
2.08M
            ptr++;
241
15.9M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
735k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
735k
        chr = pattern[1];
270
735k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
735k
        c = (SRE_CHAR) chr;
272
735k
#if SIZEOF_SRE_CHAR < 4
273
735k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
735k
        else
276
735k
#endif
277
10.9M
        while (ptr < end && *ptr != c)
278
10.2M
            ptr++;
279
735k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
269M
    }
319
320
269M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
269M
           ptr - (SRE_CHAR*) state->ptr));
322
269M
    return ptr - (SRE_CHAR*) state->ptr;
323
269M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
134M
{
195
134M
    SRE_CODE chr;
196
134M
    SRE_CHAR c;
197
134M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
134M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
134M
    Py_ssize_t i;
200
134M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
134M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
21.4M
        end = ptr + maxcount;
205
206
134M
    switch (pattern[0]) {
207
208
115M
    case SRE_OP_IN:
209
        /* repeated set */
210
115M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
231M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
116M
            ptr++;
213
115M
        break;
214
215
7.26M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
7.26M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
24.1M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
16.9M
            ptr++;
220
7.26M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
11.2M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
11.2M
        chr = pattern[1];
232
11.2M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
11.2M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
12.1M
        while (ptr < end && *ptr == c)
240
965k
            ptr++;
241
11.2M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.14M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.14M
        chr = pattern[1];
270
1.14M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.14M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
24.0M
        while (ptr < end && *ptr != c)
278
22.9M
            ptr++;
279
1.14M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
134M
    }
319
320
134M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
134M
           ptr - (SRE_CHAR*) state->ptr));
322
134M
    return ptr - (SRE_CHAR*) state->ptr;
323
134M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
527M
    do { \
355
527M
        ctx->lastmark = state->lastmark; \
356
527M
        ctx->lastindex = state->lastindex; \
357
527M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
326M
    do { \
360
326M
        state->lastmark = ctx->lastmark; \
361
326M
        state->lastindex = ctx->lastindex; \
362
326M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
190M
    do { \
366
190M
        TRACE(("push last_ptr: %zd", \
367
190M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
190M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
190M
    } while (0)
370
#define LAST_PTR_POP()  \
371
190M
    do { \
372
190M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
190M
        TRACE(("pop last_ptr: %zd", \
374
190M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
190M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
880M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
558M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.14G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
114M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
27.4M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.43G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.43G
do { \
390
1.43G
    alloc_pos = state->data_stack_base; \
391
1.43G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.43G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.43G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
161M
        int j = data_stack_grow(state, sizeof(type)); \
395
161M
        if (j < 0) return j; \
396
161M
        if (ctx_pos != -1) \
397
161M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
161M
    } \
399
1.43G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.43G
    state->data_stack_base += sizeof(type); \
401
1.43G
} while (0)
402
403
1.54G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.54G
do { \
405
1.54G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.54G
    ptr = (type*)(state->data_stack+pos); \
407
1.54G
} while (0)
408
409
468M
#define DATA_STACK_PUSH(state, data, size) \
410
468M
do { \
411
468M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
468M
           data, state->data_stack_base, size)); \
413
468M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
85.0k
        int j = data_stack_grow(state, size); \
415
85.0k
        if (j < 0) return j; \
416
85.0k
        if (ctx_pos != -1) \
417
85.0k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
85.0k
    } \
419
468M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
468M
    state->data_stack_base += size; \
421
468M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
298M
#define DATA_STACK_POP(state, data, size, discard) \
427
298M
do { \
428
298M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
298M
           data, state->data_stack_base-size, size)); \
430
298M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
298M
    if (discard) \
432
298M
        state->data_stack_base -= size; \
433
298M
} while (0)
434
435
1.61G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.61G
do { \
437
1.61G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.61G
           state->data_stack_base-size, size)); \
439
1.61G
    state->data_stack_base -= size; \
440
1.61G
} while(0)
441
442
#define DATA_PUSH(x) \
443
190M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
190M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.43G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.43G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.54G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
376M
    do if (lastmark >= 0) { \
473
277M
        MARK_TRACE("push", (lastmark)); \
474
277M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
277M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
376M
    } while (0)
477
#define MARK_POP(lastmark) \
478
120M
    do if (lastmark >= 0) { \
479
105M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
105M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
105M
        MARK_TRACE("pop", (lastmark)); \
482
120M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.27M
    do if (lastmark >= 0) { \
485
2.02M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
2.02M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
2.02M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.27M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
256M
    do if (lastmark >= 0) { \
491
172M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
172M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
172M
        MARK_TRACE("pop discard", (lastmark)); \
494
256M
    } while (0)
495
496
544M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
190M
#define JUMP_MAX_UNTIL_2     2
499
114M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
113M
#define JUMP_REPEAT          7
504
12.6M
#define JUMP_REPEAT_ONE_1    8
505
217M
#define JUMP_REPEAT_ONE_2    9
506
42.0M
#define JUMP_MIN_REPEAT_ONE  10
507
153M
#define JUMP_BRANCH          11
508
27.4M
#define JUMP_ASSERT          12
509
22.1M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
894M
    ctx->pattern = pattern; \
516
894M
    ctx->ptr = ptr; \
517
894M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
894M
    nextctx->pattern = nextpattern; \
519
894M
    nextctx->toplevel = toplevel_; \
520
894M
    nextctx->jump = jumpvalue; \
521
894M
    nextctx->last_ctx_pos = ctx_pos; \
522
894M
    pattern = nextpattern; \
523
894M
    ctx_pos = alloc_pos; \
524
894M
    ctx = nextctx; \
525
894M
    goto entrance; \
526
894M
    jumplabel: \
527
894M
    pattern = ctx->pattern; \
528
894M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
845M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
49.6M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.47G
    do {                                                           \
553
2.47G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.47G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.47G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.54G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.47G
        do {                               \
588
2.47G
            MAYBE_CHECK_SIGNALS;           \
589
2.47G
            goto *sre_targets[*pattern++]; \
590
2.47G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
544M
{
601
544M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
544M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
544M
    Py_ssize_t ret = 0;
604
544M
    int jump;
605
544M
    unsigned int sigcount = state->sigcount;
606
607
544M
    SRE(match_context)* ctx;
608
544M
    SRE(match_context)* nextctx;
609
544M
    INIT_TRACE(state);
610
611
544M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
544M
    DATA_ALLOC(SRE(match_context), ctx);
614
544M
    ctx->last_ctx_pos = -1;
615
544M
    ctx->jump = JUMP_NONE;
616
544M
    ctx->toplevel = toplevel;
617
544M
    ctx_pos = alloc_pos;
618
619
544M
#if USE_COMPUTED_GOTOS
620
544M
#include "sre_targets.h"
621
544M
#endif
622
623
1.43G
entrance:
624
625
1.43G
    ;  // Fashion statement.
626
1.43G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.43G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
64.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.02M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.02M
                   end - ptr, (size_t) pattern[3]));
634
5.02M
            RETURN_FAILURE;
635
5.02M
        }
636
59.6M
        pattern += pattern[1] + 1;
637
59.6M
    }
638
639
1.43G
#if USE_COMPUTED_GOTOS
640
1.43G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.43G
    {
647
648
1.43G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
633M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
633M
                   ptr, pattern[0]));
653
633M
            {
654
633M
                int i = pattern[0];
655
633M
                if (i & 1)
656
108M
                    state->lastindex = i/2 + 1;
657
633M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
626M
                    int j = state->lastmark + 1;
663
643M
                    while (j < i)
664
16.8M
                        state->mark[j++] = NULL;
665
626M
                    state->lastmark = i;
666
626M
                }
667
633M
                state->mark[i] = ptr;
668
633M
            }
669
633M
            pattern++;
670
633M
            DISPATCH;
671
672
633M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
138M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
138M
                   ptr, *pattern));
677
138M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
59.1M
                RETURN_FAILURE;
679
79.7M
            pattern++;
680
79.7M
            ptr++;
681
79.7M
            DISPATCH;
682
683
79.7M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
157M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
157M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
157M
            if (ctx->toplevel &&
698
45.6M
                ((state->match_all && ptr != state->end) ||
699
45.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
157M
            state->ptr = ptr;
704
157M
            RETURN_SUCCESS;
705
706
93.0M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
93.0M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
93.0M
            if (!SRE(at)(state, ptr, *pattern))
711
74.0M
                RETURN_FAILURE;
712
19.0M
            pattern++;
713
19.0M
            DISPATCH;
714
715
19.0M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
271M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
271M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
271M
            if (ptr >= end ||
749
270M
                !SRE(charset)(state, pattern + 1, *ptr))
750
87.6M
                RETURN_FAILURE;
751
183M
            pattern += pattern[0];
752
183M
            ptr++;
753
183M
            DISPATCH;
754
755
183M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
6.76M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
6.76M
                   pattern, ptr, pattern[0]));
758
6.76M
            if (ptr >= end ||
759
6.76M
                sre_lower_ascii(*ptr) != *pattern)
760
74.6k
                RETURN_FAILURE;
761
6.68M
            pattern++;
762
6.68M
            ptr++;
763
6.68M
            DISPATCH;
764
765
6.68M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
73.9M
        TARGET(SRE_OP_JUMP):
845
73.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
73.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
73.9M
                   ptr, pattern[0]));
850
73.9M
            pattern += pattern[0];
851
73.9M
            DISPATCH;
852
853
113M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
113M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
113M
            LASTMARK_SAVE();
858
113M
            if (state->repeat)
859
58.1M
                MARK_PUSH(ctx->lastmark);
860
283M
            for (; pattern[0]; pattern += pattern[0]) {
861
241M
                if (pattern[1] == SRE_OP_LITERAL &&
862
114M
                    (ptr >= end ||
863
114M
                     (SRE_CODE) *ptr != pattern[2]))
864
59.8M
                    continue;
865
181M
                if (pattern[1] == SRE_OP_IN &&
866
51.0M
                    (ptr >= end ||
867
50.9M
                     !SRE(charset)(state, pattern + 3,
868
50.9M
                                   (SRE_CODE) *ptr)))
869
28.1M
                    continue;
870
153M
                state->ptr = ptr;
871
153M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
153M
                if (ret) {
873
71.5M
                    if (state->repeat)
874
47.0M
                        MARK_POP_DISCARD(ctx->lastmark);
875
71.5M
                    RETURN_ON_ERROR(ret);
876
71.5M
                    RETURN_SUCCESS;
877
71.5M
                }
878
82.0M
                if (state->repeat)
879
15.2k
                    MARK_POP_KEEP(ctx->lastmark);
880
82.0M
                LASTMARK_RESTORE();
881
82.0M
            }
882
42.0M
            if (state->repeat)
883
11.0M
                MARK_POP_DISCARD(ctx->lastmark);
884
42.0M
            RETURN_FAILURE;
885
886
610M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
610M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
610M
                   pattern[1], pattern[2]));
898
899
610M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.23M
                RETURN_FAILURE; /* cannot match */
901
902
609M
            state->ptr = ptr;
903
904
609M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
609M
            RETURN_ON_ERROR(ret);
906
609M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
609M
            ctx->count = ret;
908
609M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
609M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
413M
                RETURN_FAILURE;
917
918
196M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
6.00M
                ptr == state->end &&
920
78.2k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
78.2k
            {
922
                /* tail is empty.  we're finished */
923
78.2k
                state->ptr = ptr;
924
78.2k
                RETURN_SUCCESS;
925
78.2k
            }
926
927
196M
            LASTMARK_SAVE();
928
196M
            if (state->repeat)
929
105M
                MARK_PUSH(ctx->lastmark);
930
931
196M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
25.9M
                ctx->u.chr = pattern[pattern[0]+1];
935
25.9M
                for (;;) {
936
62.5M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
49.3M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
36.6M
                        ptr--;
939
36.6M
                        ctx->count--;
940
36.6M
                    }
941
25.9M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
13.2M
                        break;
943
12.6M
                    state->ptr = ptr;
944
12.6M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
12.6M
                            pattern+pattern[0]);
946
12.6M
                    if (ret) {
947
12.6M
                        if (state->repeat)
948
11.8M
                            MARK_POP_DISCARD(ctx->lastmark);
949
12.6M
                        RETURN_ON_ERROR(ret);
950
12.6M
                        RETURN_SUCCESS;
951
12.6M
                    }
952
624
                    if (state->repeat)
953
624
                        MARK_POP_KEEP(ctx->lastmark);
954
624
                    LASTMARK_RESTORE();
955
956
624
                    ptr--;
957
624
                    ctx->count--;
958
624
                }
959
13.2M
                if (state->repeat)
960
11.8M
                    MARK_POP_DISCARD(ctx->lastmark);
961
170M
            } else {
962
                /* general case */
963
256M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
217M
                    state->ptr = ptr;
965
217M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
217M
                            pattern+pattern[0]);
967
217M
                    if (ret) {
968
131M
                        if (state->repeat)
969
80.7M
                            MARK_POP_DISCARD(ctx->lastmark);
970
131M
                        RETURN_ON_ERROR(ret);
971
131M
                        RETURN_SUCCESS;
972
131M
                    }
973
86.5M
                    if (state->repeat)
974
2.25M
                        MARK_POP_KEEP(ctx->lastmark);
975
86.5M
                    LASTMARK_RESTORE();
976
977
86.5M
                    ptr--;
978
86.5M
                    ctx->count--;
979
86.5M
                }
980
39.0M
                if (state->repeat)
981
1.38M
                    MARK_POP_DISCARD(ctx->lastmark);
982
39.0M
            }
983
52.2M
            RETURN_FAILURE;
984
985
4.80M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
4.80M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
4.80M
                   pattern[1], pattern[2]));
997
998
4.80M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
4.80M
            state->ptr = ptr;
1002
1003
4.80M
            if (pattern[1] == 0)
1004
4.80M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
4.80M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
4.80M
            } else {
1028
                /* general case */
1029
4.80M
                LASTMARK_SAVE();
1030
4.80M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
42.0M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
42.0M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
42.0M
                    state->ptr = ptr;
1036
42.0M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
42.0M
                            pattern+pattern[0]);
1038
42.0M
                    if (ret) {
1039
4.80M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
4.80M
                        RETURN_ON_ERROR(ret);
1042
4.80M
                        RETURN_SUCCESS;
1043
4.80M
                    }
1044
37.2M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
37.2M
                    LASTMARK_RESTORE();
1047
1048
37.2M
                    state->ptr = ptr;
1049
37.2M
                    ret = SRE(count)(state, pattern+3, 1);
1050
37.2M
                    RETURN_ON_ERROR(ret);
1051
37.2M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
37.2M
                    if (ret == 0)
1053
0
                        break;
1054
37.2M
                    assert(ret == 1);
1055
37.2M
                    ptr++;
1056
37.2M
                    ctx->count++;
1057
37.2M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
113M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
113M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
113M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
113M
            ctx->u.rep = repeat_pool_malloc(state);
1127
113M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
113M
            ctx->u.rep->count = -1;
1131
113M
            ctx->u.rep->pattern = pattern;
1132
113M
            ctx->u.rep->prev = state->repeat;
1133
113M
            ctx->u.rep->last_ptr = NULL;
1134
113M
            state->repeat = ctx->u.rep;
1135
1136
113M
            state->ptr = ptr;
1137
113M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
113M
            state->repeat = ctx->u.rep->prev;
1139
113M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
113M
            if (ret) {
1142
43.9M
                RETURN_ON_ERROR(ret);
1143
43.9M
                RETURN_SUCCESS;
1144
43.9M
            }
1145
69.7M
            RETURN_FAILURE;
1146
1147
207M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
207M
            ctx->u.rep = state->repeat;
1155
207M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
207M
            state->ptr = ptr;
1159
1160
207M
            ctx->count = ctx->u.rep->count+1;
1161
1162
207M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
207M
                   ptr, ctx->count));
1164
1165
207M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
207M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
16.5M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
190M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
190M
                ctx->u.rep->count = ctx->count;
1185
190M
                LASTMARK_SAVE();
1186
190M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
190M
                LAST_PTR_PUSH();
1189
190M
                ctx->u.rep->last_ptr = state->ptr;
1190
190M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
190M
                        ctx->u.rep->pattern+3);
1192
190M
                LAST_PTR_POP();
1193
190M
                if (ret) {
1194
92.6M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
92.6M
                    RETURN_ON_ERROR(ret);
1196
92.6M
                    RETURN_SUCCESS;
1197
92.6M
                }
1198
98.0M
                MARK_POP(ctx->lastmark);
1199
98.0M
                LASTMARK_RESTORE();
1200
98.0M
                ctx->u.rep->count = ctx->count-1;
1201
98.0M
                state->ptr = ptr;
1202
98.0M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
114M
            state->repeat = ctx->u.rep->prev;
1207
114M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
114M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
114M
            RETURN_ON_SUCCESS(ret);
1211
70.6M
            state->ptr = ptr;
1212
70.6M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
27.4M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
27.4M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
27.4M
                   ptr, pattern[1]));
1565
27.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
27.4M
            state->ptr = ptr - pattern[1];
1568
27.4M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
27.4M
            RETURN_ON_FAILURE(ret);
1570
21.9M
            pattern += pattern[0];
1571
21.9M
            DISPATCH;
1572
1573
22.1M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
22.1M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
22.1M
                   ptr, pattern[1]));
1578
22.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
22.1M
                state->ptr = ptr - pattern[1];
1580
22.1M
                LASTMARK_SAVE();
1581
22.1M
                if (state->repeat)
1582
22.1M
                    MARK_PUSH(ctx->lastmark);
1583
1584
44.3M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
44.3M
                if (ret) {
1586
10.0k
                    if (state->repeat)
1587
10.0k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
10.0k
                    RETURN_ON_ERROR(ret);
1589
10.0k
                    RETURN_FAILURE;
1590
10.0k
                }
1591
22.1M
                if (state->repeat)
1592
22.1M
                    MARK_POP(ctx->lastmark);
1593
22.1M
                LASTMARK_RESTORE();
1594
22.1M
            }
1595
22.1M
            pattern += pattern[0];
1596
22.1M
            DISPATCH;
1597
1598
22.1M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.43G
exit:
1620
1.43G
    ctx_pos = ctx->last_ctx_pos;
1621
1.43G
    jump = ctx->jump;
1622
1.43G
    DATA_POP_DISCARD(ctx);
1623
1.43G
    if (ctx_pos == -1) {
1624
544M
        state->sigcount = sigcount;
1625
544M
        return ret;
1626
544M
    }
1627
894M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
894M
    switch (jump) {
1630
190M
        case JUMP_MAX_UNTIL_2:
1631
190M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
190M
            goto jump_max_until_2;
1633
114M
        case JUMP_MAX_UNTIL_3:
1634
114M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
114M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
153M
        case JUMP_BRANCH:
1643
153M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
153M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
113M
        case JUMP_REPEAT:
1658
113M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
113M
            goto jump_repeat;
1660
12.6M
        case JUMP_REPEAT_ONE_1:
1661
12.6M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
12.6M
            goto jump_repeat_one_1;
1663
217M
        case JUMP_REPEAT_ONE_2:
1664
217M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
217M
            goto jump_repeat_one_2;
1666
42.0M
        case JUMP_MIN_REPEAT_ONE:
1667
42.0M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
42.0M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
27.4M
        case JUMP_ASSERT:
1673
27.4M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
27.4M
            goto jump_assert;
1675
22.1M
        case JUMP_ASSERT_NOT:
1676
22.1M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
22.1M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
894M
    }
1683
1684
0
    return ret; /* should never get here */
1685
894M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
217M
{
601
217M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
217M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
217M
    Py_ssize_t ret = 0;
604
217M
    int jump;
605
217M
    unsigned int sigcount = state->sigcount;
606
607
217M
    SRE(match_context)* ctx;
608
217M
    SRE(match_context)* nextctx;
609
217M
    INIT_TRACE(state);
610
611
217M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
217M
    DATA_ALLOC(SRE(match_context), ctx);
614
217M
    ctx->last_ctx_pos = -1;
615
217M
    ctx->jump = JUMP_NONE;
616
217M
    ctx->toplevel = toplevel;
617
217M
    ctx_pos = alloc_pos;
618
619
217M
#if USE_COMPUTED_GOTOS
620
217M
#include "sre_targets.h"
621
217M
#endif
622
623
520M
entrance:
624
625
520M
    ;  // Fashion statement.
626
520M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
520M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
38.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.86M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.86M
                   end - ptr, (size_t) pattern[3]));
634
4.86M
            RETURN_FAILURE;
635
4.86M
        }
636
33.9M
        pattern += pattern[1] + 1;
637
33.9M
    }
638
639
515M
#if USE_COMPUTED_GOTOS
640
515M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
515M
    {
647
648
515M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
238M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
238M
                   ptr, pattern[0]));
653
238M
            {
654
238M
                int i = pattern[0];
655
238M
                if (i & 1)
656
44.3M
                    state->lastindex = i/2 + 1;
657
238M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
235M
                    int j = state->lastmark + 1;
663
247M
                    while (j < i)
664
11.9M
                        state->mark[j++] = NULL;
665
235M
                    state->lastmark = i;
666
235M
                }
667
238M
                state->mark[i] = ptr;
668
238M
            }
669
238M
            pattern++;
670
238M
            DISPATCH;
671
672
238M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
82.3M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
82.3M
                   ptr, *pattern));
677
82.3M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
34.9M
                RETURN_FAILURE;
679
47.4M
            pattern++;
680
47.4M
            ptr++;
681
47.4M
            DISPATCH;
682
683
47.4M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
64.0M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
64.0M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
64.0M
            if (ctx->toplevel &&
698
26.1M
                ((state->match_all && ptr != state->end) ||
699
26.1M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
64.0M
            state->ptr = ptr;
704
64.0M
            RETURN_SUCCESS;
705
706
34.1M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
34.1M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
34.1M
            if (!SRE(at)(state, ptr, *pattern))
711
16.9M
                RETURN_FAILURE;
712
17.1M
            pattern++;
713
17.1M
            DISPATCH;
714
715
17.1M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
75.0M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
75.0M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
75.0M
            if (ptr >= end ||
749
74.6M
                !SRE(charset)(state, pattern + 1, *ptr))
750
18.1M
                RETURN_FAILURE;
751
56.9M
            pattern += pattern[0];
752
56.9M
            ptr++;
753
56.9M
            DISPATCH;
754
755
56.9M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
637k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
637k
                   pattern, ptr, pattern[0]));
758
637k
            if (ptr >= end ||
759
637k
                sre_lower_ascii(*ptr) != *pattern)
760
22.5k
                RETURN_FAILURE;
761
614k
            pattern++;
762
614k
            ptr++;
763
614k
            DISPATCH;
764
765
614k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
31.7M
        TARGET(SRE_OP_JUMP):
845
31.7M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
31.7M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
31.7M
                   ptr, pattern[0]));
850
31.7M
            pattern += pattern[0];
851
31.7M
            DISPATCH;
852
853
58.2M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
58.2M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
58.2M
            LASTMARK_SAVE();
858
58.2M
            if (state->repeat)
859
12.4M
                MARK_PUSH(ctx->lastmark);
860
164M
            for (; pattern[0]; pattern += pattern[0]) {
861
137M
                if (pattern[1] == SRE_OP_LITERAL &&
862
64.6M
                    (ptr >= end ||
863
64.5M
                     (SRE_CODE) *ptr != pattern[2]))
864
25.8M
                    continue;
865
111M
                if (pattern[1] == SRE_OP_IN &&
866
12.1M
                    (ptr >= end ||
867
12.1M
                     !SRE(charset)(state, pattern + 3,
868
12.1M
                                   (SRE_CODE) *ptr)))
869
6.32M
                    continue;
870
104M
                state->ptr = ptr;
871
104M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
104M
                if (ret) {
873
30.4M
                    if (state->repeat)
874
11.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
30.4M
                    RETURN_ON_ERROR(ret);
876
30.4M
                    RETURN_SUCCESS;
877
30.4M
                }
878
74.4M
                if (state->repeat)
879
6.16k
                    MARK_POP_KEEP(ctx->lastmark);
880
74.4M
                LASTMARK_RESTORE();
881
74.4M
            }
882
27.8M
            if (state->repeat)
883
718k
                MARK_POP_DISCARD(ctx->lastmark);
884
27.8M
            RETURN_FAILURE;
885
886
233M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
233M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
233M
                   pattern[1], pattern[2]));
898
899
233M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.00M
                RETURN_FAILURE; /* cannot match */
901
902
232M
            state->ptr = ptr;
903
904
232M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
232M
            RETURN_ON_ERROR(ret);
906
232M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
232M
            ctx->count = ret;
908
232M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
232M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
169M
                RETURN_FAILURE;
917
918
63.6M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
669k
                ptr == state->end &&
920
57.1k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
57.1k
            {
922
                /* tail is empty.  we're finished */
923
57.1k
                state->ptr = ptr;
924
57.1k
                RETURN_SUCCESS;
925
57.1k
            }
926
927
63.5M
            LASTMARK_SAVE();
928
63.5M
            if (state->repeat)
929
35.6M
                MARK_PUSH(ctx->lastmark);
930
931
63.5M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.97M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.97M
                for (;;) {
936
16.1M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
13.7M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
10.2M
                        ptr--;
939
10.2M
                        ctx->count--;
940
10.2M
                    }
941
5.97M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.45M
                        break;
943
3.52M
                    state->ptr = ptr;
944
3.52M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.52M
                            pattern+pattern[0]);
946
3.52M
                    if (ret) {
947
3.52M
                        if (state->repeat)
948
2.67M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.52M
                        RETURN_ON_ERROR(ret);
950
3.52M
                        RETURN_SUCCESS;
951
3.52M
                    }
952
140
                    if (state->repeat)
953
140
                        MARK_POP_KEEP(ctx->lastmark);
954
140
                    LASTMARK_RESTORE();
955
956
140
                    ptr--;
957
140
                    ctx->count--;
958
140
                }
959
2.45M
                if (state->repeat)
960
1.07M
                    MARK_POP_DISCARD(ctx->lastmark);
961
57.5M
            } else {
962
                /* general case */
963
78.9M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
68.7M
                    state->ptr = ptr;
965
68.7M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
68.7M
                            pattern+pattern[0]);
967
68.7M
                    if (ret) {
968
47.3M
                        if (state->repeat)
969
31.0M
                            MARK_POP_DISCARD(ctx->lastmark);
970
47.3M
                        RETURN_ON_ERROR(ret);
971
47.3M
                        RETURN_SUCCESS;
972
47.3M
                    }
973
21.4M
                    if (state->repeat)
974
1.30M
                        MARK_POP_KEEP(ctx->lastmark);
975
21.4M
                    LASTMARK_RESTORE();
976
977
21.4M
                    ptr--;
978
21.4M
                    ctx->count--;
979
21.4M
                }
980
10.2M
                if (state->repeat)
981
800k
                    MARK_POP_DISCARD(ctx->lastmark);
982
10.2M
            }
983
12.7M
            RETURN_FAILURE;
984
985
3.93M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
3.93M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
3.93M
                   pattern[1], pattern[2]));
997
998
3.93M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
3.93M
            state->ptr = ptr;
1002
1003
3.93M
            if (pattern[1] == 0)
1004
3.93M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
3.93M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
3.93M
            } else {
1028
                /* general case */
1029
3.93M
                LASTMARK_SAVE();
1030
3.93M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
13.0M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
13.0M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
13.0M
                    state->ptr = ptr;
1036
13.0M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
13.0M
                            pattern+pattern[0]);
1038
13.0M
                    if (ret) {
1039
3.93M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
3.93M
                        RETURN_ON_ERROR(ret);
1042
3.93M
                        RETURN_SUCCESS;
1043
3.93M
                    }
1044
9.16M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
9.16M
                    LASTMARK_RESTORE();
1047
1048
9.16M
                    state->ptr = ptr;
1049
9.16M
                    ret = SRE(count)(state, pattern+3, 1);
1050
9.16M
                    RETURN_ON_ERROR(ret);
1051
9.16M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
9.16M
                    if (ret == 0)
1053
0
                        break;
1054
9.16M
                    assert(ret == 1);
1055
9.16M
                    ptr++;
1056
9.16M
                    ctx->count++;
1057
9.16M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
26.5M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
26.5M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
26.5M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
26.5M
            ctx->u.rep = repeat_pool_malloc(state);
1127
26.5M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
26.5M
            ctx->u.rep->count = -1;
1131
26.5M
            ctx->u.rep->pattern = pattern;
1132
26.5M
            ctx->u.rep->prev = state->repeat;
1133
26.5M
            ctx->u.rep->last_ptr = NULL;
1134
26.5M
            state->repeat = ctx->u.rep;
1135
1136
26.5M
            state->ptr = ptr;
1137
26.5M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
26.5M
            state->repeat = ctx->u.rep->prev;
1139
26.5M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
26.5M
            if (ret) {
1142
11.4M
                RETURN_ON_ERROR(ret);
1143
11.4M
                RETURN_SUCCESS;
1144
11.4M
            }
1145
15.1M
            RETURN_FAILURE;
1146
1147
58.0M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
58.0M
            ctx->u.rep = state->repeat;
1155
58.0M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
58.0M
            state->ptr = ptr;
1159
1160
58.0M
            ctx->count = ctx->u.rep->count+1;
1161
1162
58.0M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
58.0M
                   ptr, ctx->count));
1164
1165
58.0M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
58.0M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
8.69M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
49.3M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
49.3M
                ctx->u.rep->count = ctx->count;
1185
49.3M
                LASTMARK_SAVE();
1186
49.3M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
49.3M
                LAST_PTR_PUSH();
1189
49.3M
                ctx->u.rep->last_ptr = state->ptr;
1190
49.3M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
49.3M
                        ctx->u.rep->pattern+3);
1192
49.3M
                LAST_PTR_POP();
1193
49.3M
                if (ret) {
1194
30.9M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
30.9M
                    RETURN_ON_ERROR(ret);
1196
30.9M
                    RETURN_SUCCESS;
1197
30.9M
                }
1198
18.4M
                MARK_POP(ctx->lastmark);
1199
18.4M
                LASTMARK_RESTORE();
1200
18.4M
                ctx->u.rep->count = ctx->count-1;
1201
18.4M
                state->ptr = ptr;
1202
18.4M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
27.1M
            state->repeat = ctx->u.rep->prev;
1207
27.1M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
27.1M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
27.1M
            RETURN_ON_SUCCESS(ret);
1211
15.7M
            state->ptr = ptr;
1212
15.7M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.38M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.38M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.38M
                   ptr, pattern[1]));
1565
3.38M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.38M
            state->ptr = ptr - pattern[1];
1568
3.38M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.38M
            RETURN_ON_FAILURE(ret);
1570
3.14M
            pattern += pattern[0];
1571
3.14M
            DISPATCH;
1572
1573
5.46M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.46M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.46M
                   ptr, pattern[1]));
1578
5.46M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.46M
                state->ptr = ptr - pattern[1];
1580
5.46M
                LASTMARK_SAVE();
1581
5.46M
                if (state->repeat)
1582
5.46M
                    MARK_PUSH(ctx->lastmark);
1583
1584
10.9M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
10.9M
                if (ret) {
1586
1.44k
                    if (state->repeat)
1587
1.44k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.44k
                    RETURN_ON_ERROR(ret);
1589
1.44k
                    RETURN_FAILURE;
1590
1.44k
                }
1591
5.46M
                if (state->repeat)
1592
5.46M
                    MARK_POP(ctx->lastmark);
1593
5.46M
                LASTMARK_RESTORE();
1594
5.46M
            }
1595
5.46M
            pattern += pattern[0];
1596
5.46M
            DISPATCH;
1597
1598
5.46M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
520M
exit:
1620
520M
    ctx_pos = ctx->last_ctx_pos;
1621
520M
    jump = ctx->jump;
1622
520M
    DATA_POP_DISCARD(ctx);
1623
520M
    if (ctx_pos == -1) {
1624
217M
        state->sigcount = sigcount;
1625
217M
        return ret;
1626
217M
    }
1627
302M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
302M
    switch (jump) {
1630
49.3M
        case JUMP_MAX_UNTIL_2:
1631
49.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
49.3M
            goto jump_max_until_2;
1633
27.1M
        case JUMP_MAX_UNTIL_3:
1634
27.1M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
27.1M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
104M
        case JUMP_BRANCH:
1643
104M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
104M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
26.5M
        case JUMP_REPEAT:
1658
26.5M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
26.5M
            goto jump_repeat;
1660
3.52M
        case JUMP_REPEAT_ONE_1:
1661
3.52M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.52M
            goto jump_repeat_one_1;
1663
68.7M
        case JUMP_REPEAT_ONE_2:
1664
68.7M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
68.7M
            goto jump_repeat_one_2;
1666
13.0M
        case JUMP_MIN_REPEAT_ONE:
1667
13.0M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
13.0M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.38M
        case JUMP_ASSERT:
1673
3.38M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.38M
            goto jump_assert;
1675
5.46M
        case JUMP_ASSERT_NOT:
1676
5.46M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.46M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
302M
    }
1683
1684
0
    return ret; /* should never get here */
1685
302M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
249M
{
601
249M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
249M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
249M
    Py_ssize_t ret = 0;
604
249M
    int jump;
605
249M
    unsigned int sigcount = state->sigcount;
606
607
249M
    SRE(match_context)* ctx;
608
249M
    SRE(match_context)* nextctx;
609
249M
    INIT_TRACE(state);
610
611
249M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
249M
    DATA_ALLOC(SRE(match_context), ctx);
614
249M
    ctx->last_ctx_pos = -1;
615
249M
    ctx->jump = JUMP_NONE;
616
249M
    ctx->toplevel = toplevel;
617
249M
    ctx_pos = alloc_pos;
618
619
249M
#if USE_COMPUTED_GOTOS
620
249M
#include "sre_targets.h"
621
249M
#endif
622
623
556M
entrance:
624
625
556M
    ;  // Fashion statement.
626
556M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
556M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
15.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
155k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
155k
                   end - ptr, (size_t) pattern[3]));
634
155k
            RETURN_FAILURE;
635
155k
        }
636
14.9M
        pattern += pattern[1] + 1;
637
14.9M
    }
638
639
556M
#if USE_COMPUTED_GOTOS
640
556M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
556M
    {
647
648
556M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
271M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
271M
                   ptr, pattern[0]));
653
271M
            {
654
271M
                int i = pattern[0];
655
271M
                if (i & 1)
656
37.8M
                    state->lastindex = i/2 + 1;
657
271M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
270M
                    int j = state->lastmark + 1;
663
273M
                    while (j < i)
664
2.99M
                        state->mark[j++] = NULL;
665
270M
                    state->lastmark = i;
666
270M
                }
667
271M
                state->mark[i] = ptr;
668
271M
            }
669
271M
            pattern++;
670
271M
            DISPATCH;
671
672
271M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
29.7M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
29.7M
                   ptr, *pattern));
677
29.7M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
13.8M
                RETURN_FAILURE;
679
15.8M
            pattern++;
680
15.8M
            ptr++;
681
15.8M
            DISPATCH;
682
683
15.8M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
64.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
64.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
64.9M
            if (ctx->toplevel &&
698
10.3M
                ((state->match_all && ptr != state->end) ||
699
10.3M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
64.9M
            state->ptr = ptr;
704
64.9M
            RETURN_SUCCESS;
705
706
38.4M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
38.4M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
38.4M
            if (!SRE(at)(state, ptr, *pattern))
711
36.5M
                RETURN_FAILURE;
712
1.88M
            pattern++;
713
1.88M
            DISPATCH;
714
715
1.88M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
120M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
120M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
120M
            if (ptr >= end ||
749
119M
                !SRE(charset)(state, pattern + 1, *ptr))
750
44.0M
                RETURN_FAILURE;
751
76.2M
            pattern += pattern[0];
752
76.2M
            ptr++;
753
76.2M
            DISPATCH;
754
755
76.2M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
3.76M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
3.76M
                   pattern, ptr, pattern[0]));
758
3.76M
            if (ptr >= end ||
759
3.76M
                sre_lower_ascii(*ptr) != *pattern)
760
20.4k
                RETURN_FAILURE;
761
3.74M
            pattern++;
762
3.74M
            ptr++;
763
3.74M
            DISPATCH;
764
765
3.74M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
17.7M
        TARGET(SRE_OP_JUMP):
845
17.7M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
17.7M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
17.7M
                   ptr, pattern[0]));
850
17.7M
            pattern += pattern[0];
851
17.7M
            DISPATCH;
852
853
23.8M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
23.8M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
23.8M
            LASTMARK_SAVE();
858
23.8M
            if (state->repeat)
859
18.0M
                MARK_PUSH(ctx->lastmark);
860
52.3M
            for (; pattern[0]; pattern += pattern[0]) {
861
45.6M
                if (pattern[1] == SRE_OP_LITERAL &&
862
20.6M
                    (ptr >= end ||
863
20.6M
                     (SRE_CODE) *ptr != pattern[2]))
864
13.2M
                    continue;
865
32.4M
                if (pattern[1] == SRE_OP_IN &&
866
15.6M
                    (ptr >= end ||
867
15.6M
                     !SRE(charset)(state, pattern + 3,
868
15.6M
                                   (SRE_CODE) *ptr)))
869
8.74M
                    continue;
870
23.6M
                state->ptr = ptr;
871
23.6M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
23.6M
                if (ret) {
873
17.2M
                    if (state->repeat)
874
14.6M
                        MARK_POP_DISCARD(ctx->lastmark);
875
17.2M
                    RETURN_ON_ERROR(ret);
876
17.2M
                    RETURN_SUCCESS;
877
17.2M
                }
878
6.40M
                if (state->repeat)
879
3.25k
                    MARK_POP_KEEP(ctx->lastmark);
880
6.40M
                LASTMARK_RESTORE();
881
6.40M
            }
882
6.61M
            if (state->repeat)
883
3.44M
                MARK_POP_DISCARD(ctx->lastmark);
884
6.61M
            RETURN_FAILURE;
885
886
249M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
249M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
249M
                   pattern[1], pattern[2]));
898
899
249M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
215k
                RETURN_FAILURE; /* cannot match */
901
902
248M
            state->ptr = ptr;
903
904
248M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
248M
            RETURN_ON_ERROR(ret);
906
248M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
248M
            ctx->count = ret;
908
248M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
248M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
184M
                RETURN_FAILURE;
917
918
64.2M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
3.95M
                ptr == state->end &&
920
17.7k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
17.7k
            {
922
                /* tail is empty.  we're finished */
923
17.7k
                state->ptr = ptr;
924
17.7k
                RETURN_SUCCESS;
925
17.7k
            }
926
927
64.2M
            LASTMARK_SAVE();
928
64.2M
            if (state->repeat)
929
25.2M
                MARK_PUSH(ctx->lastmark);
930
931
64.2M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.41M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.41M
                for (;;) {
936
9.20M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
7.29M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
3.79M
                        ptr--;
939
3.79M
                        ctx->count--;
940
3.79M
                    }
941
5.41M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.90M
                        break;
943
3.50M
                    state->ptr = ptr;
944
3.50M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.50M
                            pattern+pattern[0]);
946
3.50M
                    if (ret) {
947
3.50M
                        if (state->repeat)
948
3.47M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.50M
                        RETURN_ON_ERROR(ret);
950
3.50M
                        RETURN_SUCCESS;
951
3.50M
                    }
952
231
                    if (state->repeat)
953
231
                        MARK_POP_KEEP(ctx->lastmark);
954
231
                    LASTMARK_RESTORE();
955
956
231
                    ptr--;
957
231
                    ctx->count--;
958
231
                }
959
1.90M
                if (state->repeat)
960
1.89M
                    MARK_POP_DISCARD(ctx->lastmark);
961
58.8M
            } else {
962
                /* general case */
963
100M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
79.2M
                    state->ptr = ptr;
965
79.2M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
79.2M
                            pattern+pattern[0]);
967
79.2M
                    if (ret) {
968
37.4M
                        if (state->repeat)
969
19.4M
                            MARK_POP_DISCARD(ctx->lastmark);
970
37.4M
                        RETURN_ON_ERROR(ret);
971
37.4M
                        RETURN_SUCCESS;
972
37.4M
                    }
973
41.7M
                    if (state->repeat)
974
733k
                        MARK_POP_KEEP(ctx->lastmark);
975
41.7M
                    LASTMARK_RESTORE();
976
977
41.7M
                    ptr--;
978
41.7M
                    ctx->count--;
979
41.7M
                }
980
21.3M
                if (state->repeat)
981
442k
                    MARK_POP_DISCARD(ctx->lastmark);
982
21.3M
            }
983
23.2M
            RETURN_FAILURE;
984
985
858k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
858k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
858k
                   pattern[1], pattern[2]));
997
998
858k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
858k
            state->ptr = ptr;
1002
1003
858k
            if (pattern[1] == 0)
1004
858k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
858k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
858k
            } else {
1028
                /* general case */
1029
858k
                LASTMARK_SAVE();
1030
858k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
21.7M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
21.7M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
21.7M
                    state->ptr = ptr;
1036
21.7M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
21.7M
                            pattern+pattern[0]);
1038
21.7M
                    if (ret) {
1039
858k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
858k
                        RETURN_ON_ERROR(ret);
1042
858k
                        RETURN_SUCCESS;
1043
858k
                    }
1044
20.8M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
20.8M
                    LASTMARK_RESTORE();
1047
1048
20.8M
                    state->ptr = ptr;
1049
20.8M
                    ret = SRE(count)(state, pattern+3, 1);
1050
20.8M
                    RETURN_ON_ERROR(ret);
1051
20.8M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
20.8M
                    if (ret == 0)
1053
0
                        break;
1054
20.8M
                    assert(ret == 1);
1055
20.8M
                    ptr++;
1056
20.8M
                    ctx->count++;
1057
20.8M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
46.5M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
46.5M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
46.5M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
46.5M
            ctx->u.rep = repeat_pool_malloc(state);
1127
46.5M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
46.5M
            ctx->u.rep->count = -1;
1131
46.5M
            ctx->u.rep->pattern = pattern;
1132
46.5M
            ctx->u.rep->prev = state->repeat;
1133
46.5M
            ctx->u.rep->last_ptr = NULL;
1134
46.5M
            state->repeat = ctx->u.rep;
1135
1136
46.5M
            state->ptr = ptr;
1137
46.5M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
46.5M
            state->repeat = ctx->u.rep->prev;
1139
46.5M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
46.5M
            if (ret) {
1142
11.1M
                RETURN_ON_ERROR(ret);
1143
11.1M
                RETURN_SUCCESS;
1144
11.1M
            }
1145
35.3M
            RETURN_FAILURE;
1146
1147
72.9M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
72.9M
            ctx->u.rep = state->repeat;
1155
72.9M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
72.9M
            state->ptr = ptr;
1159
1160
72.9M
            ctx->count = ctx->u.rep->count+1;
1161
1162
72.9M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
72.9M
                   ptr, ctx->count));
1164
1165
72.9M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
72.9M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.24M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
69.7M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
69.7M
                ctx->u.rep->count = ctx->count;
1185
69.7M
                LASTMARK_SAVE();
1186
69.7M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
69.7M
                LAST_PTR_PUSH();
1189
69.7M
                ctx->u.rep->last_ptr = state->ptr;
1190
69.7M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
69.7M
                        ctx->u.rep->pattern+3);
1192
69.7M
                LAST_PTR_POP();
1193
69.7M
                if (ret) {
1194
26.1M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
26.1M
                    RETURN_ON_ERROR(ret);
1196
26.1M
                    RETURN_SUCCESS;
1197
26.1M
                }
1198
43.5M
                MARK_POP(ctx->lastmark);
1199
43.5M
                LASTMARK_RESTORE();
1200
43.5M
                ctx->u.rep->count = ctx->count-1;
1201
43.5M
                state->ptr = ptr;
1202
43.5M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
46.8M
            state->repeat = ctx->u.rep->prev;
1207
46.8M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
46.8M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
46.8M
            RETURN_ON_SUCCESS(ret);
1211
35.6M
            state->ptr = ptr;
1212
35.6M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
8.60M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
8.60M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
8.60M
                   ptr, pattern[1]));
1565
8.60M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
8.60M
            state->ptr = ptr - pattern[1];
1568
8.60M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
8.60M
            RETURN_ON_FAILURE(ret);
1570
5.15M
            pattern += pattern[0];
1571
5.15M
            DISPATCH;
1572
1573
7.13M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
7.13M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
7.13M
                   ptr, pattern[1]));
1578
7.13M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
7.13M
                state->ptr = ptr - pattern[1];
1580
7.13M
                LASTMARK_SAVE();
1581
7.13M
                if (state->repeat)
1582
7.13M
                    MARK_PUSH(ctx->lastmark);
1583
1584
14.2M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
14.2M
                if (ret) {
1586
2.98k
                    if (state->repeat)
1587
2.98k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.98k
                    RETURN_ON_ERROR(ret);
1589
2.98k
                    RETURN_FAILURE;
1590
2.98k
                }
1591
7.12M
                if (state->repeat)
1592
7.12M
                    MARK_POP(ctx->lastmark);
1593
7.12M
                LASTMARK_RESTORE();
1594
7.12M
            }
1595
7.12M
            pattern += pattern[0];
1596
7.12M
            DISPATCH;
1597
1598
7.12M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
556M
exit:
1620
556M
    ctx_pos = ctx->last_ctx_pos;
1621
556M
    jump = ctx->jump;
1622
556M
    DATA_POP_DISCARD(ctx);
1623
556M
    if (ctx_pos == -1) {
1624
249M
        state->sigcount = sigcount;
1625
249M
        return ret;
1626
249M
    }
1627
306M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
306M
    switch (jump) {
1630
69.7M
        case JUMP_MAX_UNTIL_2:
1631
69.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
69.7M
            goto jump_max_until_2;
1633
46.8M
        case JUMP_MAX_UNTIL_3:
1634
46.8M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
46.8M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
23.6M
        case JUMP_BRANCH:
1643
23.6M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
23.6M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
46.5M
        case JUMP_REPEAT:
1658
46.5M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
46.5M
            goto jump_repeat;
1660
3.50M
        case JUMP_REPEAT_ONE_1:
1661
3.50M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.50M
            goto jump_repeat_one_1;
1663
79.2M
        case JUMP_REPEAT_ONE_2:
1664
79.2M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
79.2M
            goto jump_repeat_one_2;
1666
21.7M
        case JUMP_MIN_REPEAT_ONE:
1667
21.7M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
21.7M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
8.60M
        case JUMP_ASSERT:
1673
8.60M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
8.60M
            goto jump_assert;
1675
7.13M
        case JUMP_ASSERT_NOT:
1676
7.13M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
7.13M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
306M
    }
1683
1684
0
    return ret; /* should never get here */
1685
306M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
77.1M
{
601
77.1M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
77.1M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
77.1M
    Py_ssize_t ret = 0;
604
77.1M
    int jump;
605
77.1M
    unsigned int sigcount = state->sigcount;
606
607
77.1M
    SRE(match_context)* ctx;
608
77.1M
    SRE(match_context)* nextctx;
609
77.1M
    INIT_TRACE(state);
610
611
77.1M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
77.1M
    DATA_ALLOC(SRE(match_context), ctx);
614
77.1M
    ctx->last_ctx_pos = -1;
615
77.1M
    ctx->jump = JUMP_NONE;
616
77.1M
    ctx->toplevel = toplevel;
617
77.1M
    ctx_pos = alloc_pos;
618
619
77.1M
#if USE_COMPUTED_GOTOS
620
77.1M
#include "sre_targets.h"
621
77.1M
#endif
622
623
362M
entrance:
624
625
362M
    ;  // Fashion statement.
626
362M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
362M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
10.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.37k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.37k
                   end - ptr, (size_t) pattern[3]));
634
4.37k
            RETURN_FAILURE;
635
4.37k
        }
636
10.7M
        pattern += pattern[1] + 1;
637
10.7M
    }
638
639
362M
#if USE_COMPUTED_GOTOS
640
362M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
362M
    {
647
648
362M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
123M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
123M
                   ptr, pattern[0]));
653
123M
            {
654
123M
                int i = pattern[0];
655
123M
                if (i & 1)
656
25.9M
                    state->lastindex = i/2 + 1;
657
123M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
121M
                    int j = state->lastmark + 1;
663
123M
                    while (j < i)
664
2.00M
                        state->mark[j++] = NULL;
665
121M
                    state->lastmark = i;
666
121M
                }
667
123M
                state->mark[i] = ptr;
668
123M
            }
669
123M
            pattern++;
670
123M
            DISPATCH;
671
672
123M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
26.7M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
26.7M
                   ptr, *pattern));
677
26.7M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
10.2M
                RETURN_FAILURE;
679
16.4M
            pattern++;
680
16.4M
            ptr++;
681
16.4M
            DISPATCH;
682
683
16.4M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
28.8M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
28.8M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
28.8M
            if (ctx->toplevel &&
698
9.09M
                ((state->match_all && ptr != state->end) ||
699
9.09M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
28.8M
            state->ptr = ptr;
704
28.8M
            RETURN_SUCCESS;
705
706
20.4M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
20.4M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
20.4M
            if (!SRE(at)(state, ptr, *pattern))
711
20.4M
                RETURN_FAILURE;
712
31.3k
            pattern++;
713
31.3k
            DISPATCH;
714
715
31.3k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
75.8M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
75.8M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
75.8M
            if (ptr >= end ||
749
75.8M
                !SRE(charset)(state, pattern + 1, *ptr))
750
25.4M
                RETURN_FAILURE;
751
50.3M
            pattern += pattern[0];
752
50.3M
            ptr++;
753
50.3M
            DISPATCH;
754
755
50.3M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.35M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.35M
                   pattern, ptr, pattern[0]));
758
2.35M
            if (ptr >= end ||
759
2.35M
                sre_lower_ascii(*ptr) != *pattern)
760
31.6k
                RETURN_FAILURE;
761
2.32M
            pattern++;
762
2.32M
            ptr++;
763
2.32M
            DISPATCH;
764
765
2.32M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
24.4M
        TARGET(SRE_OP_JUMP):
845
24.4M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
24.4M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
24.4M
                   ptr, pattern[0]));
850
24.4M
            pattern += pattern[0];
851
24.4M
            DISPATCH;
852
853
31.4M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
31.4M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
31.4M
            LASTMARK_SAVE();
858
31.4M
            if (state->repeat)
859
27.6M
                MARK_PUSH(ctx->lastmark);
860
66.4M
            for (; pattern[0]; pattern += pattern[0]) {
861
58.8M
                if (pattern[1] == SRE_OP_LITERAL &&
862
29.4M
                    (ptr >= end ||
863
29.4M
                     (SRE_CODE) *ptr != pattern[2]))
864
20.7M
                    continue;
865
38.0M
                if (pattern[1] == SRE_OP_IN &&
866
23.2M
                    (ptr >= end ||
867
23.2M
                     !SRE(charset)(state, pattern + 3,
868
23.2M
                                   (SRE_CODE) *ptr)))
869
13.1M
                    continue;
870
24.9M
                state->ptr = ptr;
871
24.9M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
24.9M
                if (ret) {
873
23.8M
                    if (state->repeat)
874
20.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
23.8M
                    RETURN_ON_ERROR(ret);
876
23.8M
                    RETURN_SUCCESS;
877
23.8M
                }
878
1.10M
                if (state->repeat)
879
5.84k
                    MARK_POP_KEEP(ctx->lastmark);
880
1.10M
                LASTMARK_RESTORE();
881
1.10M
            }
882
7.61M
            if (state->repeat)
883
6.86M
                MARK_POP_DISCARD(ctx->lastmark);
884
7.61M
            RETURN_FAILURE;
885
886
127M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
127M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
127M
                   pattern[1], pattern[2]));
898
899
127M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
19.4k
                RETURN_FAILURE; /* cannot match */
901
902
127M
            state->ptr = ptr;
903
904
127M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
127M
            RETURN_ON_ERROR(ret);
906
127M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
127M
            ctx->count = ret;
908
127M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
127M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
59.2M
                RETURN_FAILURE;
917
918
68.3M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.38M
                ptr == state->end &&
920
3.31k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.31k
            {
922
                /* tail is empty.  we're finished */
923
3.31k
                state->ptr = ptr;
924
3.31k
                RETURN_SUCCESS;
925
3.31k
            }
926
927
68.3M
            LASTMARK_SAVE();
928
68.3M
            if (state->repeat)
929
44.8M
                MARK_PUSH(ctx->lastmark);
930
931
68.3M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
14.5M
                ctx->u.chr = pattern[pattern[0]+1];
935
14.5M
                for (;;) {
936
37.1M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
28.3M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
22.6M
                        ptr--;
939
22.6M
                        ctx->count--;
940
22.6M
                    }
941
14.5M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
8.87M
                        break;
943
5.66M
                    state->ptr = ptr;
944
5.66M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
5.66M
                            pattern+pattern[0]);
946
5.66M
                    if (ret) {
947
5.66M
                        if (state->repeat)
948
5.65M
                            MARK_POP_DISCARD(ctx->lastmark);
949
5.66M
                        RETURN_ON_ERROR(ret);
950
5.66M
                        RETURN_SUCCESS;
951
5.66M
                    }
952
253
                    if (state->repeat)
953
253
                        MARK_POP_KEEP(ctx->lastmark);
954
253
                    LASTMARK_RESTORE();
955
956
253
                    ptr--;
957
253
                    ctx->count--;
958
253
                }
959
8.87M
                if (state->repeat)
960
8.87M
                    MARK_POP_DISCARD(ctx->lastmark);
961
53.8M
            } else {
962
                /* general case */
963
77.1M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
69.7M
                    state->ptr = ptr;
965
69.7M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
69.7M
                            pattern+pattern[0]);
967
69.7M
                    if (ret) {
968
46.3M
                        if (state->repeat)
969
30.1M
                            MARK_POP_DISCARD(ctx->lastmark);
970
46.3M
                        RETURN_ON_ERROR(ret);
971
46.3M
                        RETURN_SUCCESS;
972
46.3M
                    }
973
23.3M
                    if (state->repeat)
974
220k
                        MARK_POP_KEEP(ctx->lastmark);
975
23.3M
                    LASTMARK_RESTORE();
976
977
23.3M
                    ptr--;
978
23.3M
                    ctx->count--;
979
23.3M
                }
980
7.42M
                if (state->repeat)
981
140k
                    MARK_POP_DISCARD(ctx->lastmark);
982
7.42M
            }
983
16.2M
            RETURN_FAILURE;
984
985
12.0k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
12.0k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
12.0k
                   pattern[1], pattern[2]));
997
998
12.0k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
12.0k
            state->ptr = ptr;
1002
1003
12.0k
            if (pattern[1] == 0)
1004
12.0k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
12.0k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
12.0k
            } else {
1028
                /* general case */
1029
12.0k
                LASTMARK_SAVE();
1030
12.0k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
7.27M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
7.27M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
7.27M
                    state->ptr = ptr;
1036
7.27M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
7.27M
                            pattern+pattern[0]);
1038
7.27M
                    if (ret) {
1039
12.0k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
12.0k
                        RETURN_ON_ERROR(ret);
1042
12.0k
                        RETURN_SUCCESS;
1043
12.0k
                    }
1044
7.26M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
7.26M
                    LASTMARK_RESTORE();
1047
1048
7.26M
                    state->ptr = ptr;
1049
7.26M
                    ret = SRE(count)(state, pattern+3, 1);
1050
7.26M
                    RETURN_ON_ERROR(ret);
1051
7.26M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
7.26M
                    if (ret == 0)
1053
0
                        break;
1054
7.26M
                    assert(ret == 1);
1055
7.26M
                    ptr++;
1056
7.26M
                    ctx->count++;
1057
7.26M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
40.6M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
40.6M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
40.6M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
40.6M
            ctx->u.rep = repeat_pool_malloc(state);
1127
40.6M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
40.6M
            ctx->u.rep->count = -1;
1131
40.6M
            ctx->u.rep->pattern = pattern;
1132
40.6M
            ctx->u.rep->prev = state->repeat;
1133
40.6M
            ctx->u.rep->last_ptr = NULL;
1134
40.6M
            state->repeat = ctx->u.rep;
1135
1136
40.6M
            state->ptr = ptr;
1137
40.6M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
40.6M
            state->repeat = ctx->u.rep->prev;
1139
40.6M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
40.6M
            if (ret) {
1142
21.3M
                RETURN_ON_ERROR(ret);
1143
21.3M
                RETURN_SUCCESS;
1144
21.3M
            }
1145
19.2M
            RETURN_FAILURE;
1146
1147
76.2M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
76.2M
            ctx->u.rep = state->repeat;
1155
76.2M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
76.2M
            state->ptr = ptr;
1159
1160
76.2M
            ctx->count = ctx->u.rep->count+1;
1161
1162
76.2M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
76.2M
                   ptr, ctx->count));
1164
1165
76.2M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
76.2M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.63M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
71.6M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
71.6M
                ctx->u.rep->count = ctx->count;
1185
71.6M
                LASTMARK_SAVE();
1186
71.6M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
71.6M
                LAST_PTR_PUSH();
1189
71.6M
                ctx->u.rep->last_ptr = state->ptr;
1190
71.6M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
71.6M
                        ctx->u.rep->pattern+3);
1192
71.6M
                LAST_PTR_POP();
1193
71.6M
                if (ret) {
1194
35.5M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
35.5M
                    RETURN_ON_ERROR(ret);
1196
35.5M
                    RETURN_SUCCESS;
1197
35.5M
                }
1198
36.0M
                MARK_POP(ctx->lastmark);
1199
36.0M
                LASTMARK_RESTORE();
1200
36.0M
                ctx->u.rep->count = ctx->count-1;
1201
36.0M
                state->ptr = ptr;
1202
36.0M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
40.6M
            state->repeat = ctx->u.rep->prev;
1207
40.6M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
40.6M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
40.6M
            RETURN_ON_SUCCESS(ret);
1211
19.3M
            state->ptr = ptr;
1212
19.3M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
15.4M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
15.4M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
15.4M
                   ptr, pattern[1]));
1565
15.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
15.4M
            state->ptr = ptr - pattern[1];
1568
15.4M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
15.4M
            RETURN_ON_FAILURE(ret);
1570
13.6M
            pattern += pattern[0];
1571
13.6M
            DISPATCH;
1572
1573
13.6M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
9.57M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
9.57M
                   ptr, pattern[1]));
1578
9.57M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
9.57M
                state->ptr = ptr - pattern[1];
1580
9.57M
                LASTMARK_SAVE();
1581
9.57M
                if (state->repeat)
1582
9.57M
                    MARK_PUSH(ctx->lastmark);
1583
1584
19.1M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
19.1M
                if (ret) {
1586
5.56k
                    if (state->repeat)
1587
5.56k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
5.56k
                    RETURN_ON_ERROR(ret);
1589
5.56k
                    RETURN_FAILURE;
1590
5.56k
                }
1591
9.57M
                if (state->repeat)
1592
9.57M
                    MARK_POP(ctx->lastmark);
1593
9.57M
                LASTMARK_RESTORE();
1594
9.57M
            }
1595
9.57M
            pattern += pattern[0];
1596
9.57M
            DISPATCH;
1597
1598
9.57M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
362M
exit:
1620
362M
    ctx_pos = ctx->last_ctx_pos;
1621
362M
    jump = ctx->jump;
1622
362M
    DATA_POP_DISCARD(ctx);
1623
362M
    if (ctx_pos == -1) {
1624
77.1M
        state->sigcount = sigcount;
1625
77.1M
        return ret;
1626
77.1M
    }
1627
285M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
285M
    switch (jump) {
1630
71.6M
        case JUMP_MAX_UNTIL_2:
1631
71.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
71.6M
            goto jump_max_until_2;
1633
40.6M
        case JUMP_MAX_UNTIL_3:
1634
40.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
40.6M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
24.9M
        case JUMP_BRANCH:
1643
24.9M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
24.9M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
40.6M
        case JUMP_REPEAT:
1658
40.6M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
40.6M
            goto jump_repeat;
1660
5.66M
        case JUMP_REPEAT_ONE_1:
1661
5.66M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
5.66M
            goto jump_repeat_one_1;
1663
69.7M
        case JUMP_REPEAT_ONE_2:
1664
69.7M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
69.7M
            goto jump_repeat_one_2;
1666
7.27M
        case JUMP_MIN_REPEAT_ONE:
1667
7.27M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
7.27M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
15.4M
        case JUMP_ASSERT:
1673
15.4M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
15.4M
            goto jump_assert;
1675
9.57M
        case JUMP_ASSERT_NOT:
1676
9.57M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
9.57M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
285M
    }
1683
1684
0
    return ret; /* should never get here */
1685
285M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
383M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
107M
{
1694
107M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
107M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
107M
    Py_ssize_t status = 0;
1697
107M
    Py_ssize_t prefix_len = 0;
1698
107M
    Py_ssize_t prefix_skip = 0;
1699
107M
    SRE_CODE* prefix = NULL;
1700
107M
    SRE_CODE* charset = NULL;
1701
107M
    SRE_CODE* overlap = NULL;
1702
107M
    int flags = 0;
1703
107M
    INIT_TRACE(state);
1704
1705
107M
    if (ptr > end)
1706
0
        return 0;
1707
1708
107M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
107M
        flags = pattern[2];
1713
1714
107M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.29M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.29M
                   end - ptr, (size_t) pattern[3]));
1717
6.29M
            return 0;
1718
6.29M
        }
1719
101M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
9.15M
            end -= pattern[3] - 1;
1723
9.15M
            if (end <= ptr)
1724
0
                end = ptr;
1725
9.15M
        }
1726
1727
101M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
9.15M
            prefix_len = pattern[5];
1731
9.15M
            prefix_skip = pattern[6];
1732
9.15M
            prefix = pattern + 7;
1733
9.15M
            overlap = prefix + prefix_len - 1;
1734
92.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
82.1M
            charset = pattern + 5;
1738
1739
101M
        pattern += 1 + pattern[1];
1740
101M
    }
1741
1742
101M
    TRACE(("prefix = %p %zd %zd\n",
1743
101M
           prefix, prefix_len, prefix_skip));
1744
101M
    TRACE(("charset = %p\n", charset));
1745
1746
101M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
8.30M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
5.31M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
5.31M
#endif
1753
5.31M
        end = (SRE_CHAR *)state->end;
1754
5.31M
        state->must_advance = 0;
1755
9.26M
        while (ptr < end) {
1756
99.1M
            while (*ptr != c) {
1757
90.9M
                if (++ptr >= end)
1758
1.00M
                    return 0;
1759
90.9M
            }
1760
8.18M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
8.18M
            state->start = ptr;
1762
8.18M
            state->ptr = ptr + prefix_skip;
1763
8.18M
            if (flags & SRE_INFO_LITERAL)
1764
4.82k
                return 1; /* we got all of it */
1765
8.17M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
8.17M
            if (status != 0)
1767
7.21M
                return status;
1768
958k
            ++ptr;
1769
958k
            RESET_CAPTURE_GROUP();
1770
958k
        }
1771
79.6k
        return 0;
1772
5.31M
    }
1773
1774
93.0M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
856k
        Py_ssize_t i = 0;
1778
1779
856k
        end = (SRE_CHAR *)state->end;
1780
856k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.77M
        for (i = 0; i < prefix_len; i++)
1784
1.18M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
592k
#endif
1787
1.47M
        while (ptr < end) {
1788
1.47M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
9.99M
            while (*ptr++ != c) {
1790
8.52M
                if (ptr >= end)
1791
297
                    return 0;
1792
8.52M
            }
1793
1.47M
            if (ptr >= end)
1794
58
                return 0;
1795
1796
1.47M
            i = 1;
1797
1.47M
            state->must_advance = 0;
1798
1.47M
            do {
1799
1.47M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.40M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.40M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.40M
                    state->start = ptr - (prefix_len - 1);
1808
1.40M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.40M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.40M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.40M
                    if (status != 0)
1813
856k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
548k
                    if (++ptr >= end)
1816
62
                        return 0;
1817
548k
                    RESET_CAPTURE_GROUP();
1818
548k
                }
1819
618k
                i = overlap[i];
1820
618k
            } while (i != 0);
1821
1.47M
        }
1822
0
        return 0;
1823
856k
    }
1824
1825
92.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
82.1M
        end = (SRE_CHAR *)state->end;
1828
82.1M
        state->must_advance = 0;
1829
84.4M
        for (;;) {
1830
354M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
269M
                ptr++;
1832
84.4M
            if (ptr >= end)
1833
3.96M
                return 0;
1834
80.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
80.4M
            state->start = ptr;
1836
80.4M
            state->ptr = ptr;
1837
80.4M
            status = SRE(match)(state, pattern, 0);
1838
80.4M
            if (status != 0)
1839
78.1M
                break;
1840
2.30M
            ptr++;
1841
2.30M
            RESET_CAPTURE_GROUP();
1842
2.30M
        }
1843
82.1M
    } else {
1844
        /* general case */
1845
10.0M
        assert(ptr <= end);
1846
10.0M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
10.0M
        state->start = state->ptr = ptr;
1848
10.0M
        status = SRE(match)(state, pattern, 1);
1849
10.0M
        state->must_advance = 0;
1850
10.0M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
5.10M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
53
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
5.10M
        {
1854
5.10M
            state->start = state->ptr = ptr = end;
1855
5.10M
            return 0;
1856
5.10M
        }
1857
384M
        while (status == 0 && ptr < end) {
1858
379M
            ptr++;
1859
379M
            RESET_CAPTURE_GROUP();
1860
379M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
379M
            state->start = state->ptr = ptr;
1862
379M
            status = SRE(match)(state, pattern, 0);
1863
379M
        }
1864
4.97M
    }
1865
1866
83.1M
    return status;
1867
92.1M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
49.4M
{
1694
49.4M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
49.4M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
49.4M
    Py_ssize_t status = 0;
1697
49.4M
    Py_ssize_t prefix_len = 0;
1698
49.4M
    Py_ssize_t prefix_skip = 0;
1699
49.4M
    SRE_CODE* prefix = NULL;
1700
49.4M
    SRE_CODE* charset = NULL;
1701
49.4M
    SRE_CODE* overlap = NULL;
1702
49.4M
    int flags = 0;
1703
49.4M
    INIT_TRACE(state);
1704
1705
49.4M
    if (ptr > end)
1706
0
        return 0;
1707
1708
49.4M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
49.4M
        flags = pattern[2];
1713
1714
49.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.16M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.16M
                   end - ptr, (size_t) pattern[3]));
1717
6.16M
            return 0;
1718
6.16M
        }
1719
43.2M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.89M
            end -= pattern[3] - 1;
1723
2.89M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.89M
        }
1726
1727
43.2M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.89M
            prefix_len = pattern[5];
1731
2.89M
            prefix_skip = pattern[6];
1732
2.89M
            prefix = pattern + 7;
1733
2.89M
            overlap = prefix + prefix_len - 1;
1734
40.3M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
32.7M
            charset = pattern + 5;
1738
1739
43.2M
        pattern += 1 + pattern[1];
1740
43.2M
    }
1741
1742
43.2M
    TRACE(("prefix = %p %zd %zd\n",
1743
43.2M
           prefix, prefix_len, prefix_skip));
1744
43.2M
    TRACE(("charset = %p\n", charset));
1745
1746
43.2M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.83M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.83M
#if SIZEOF_SRE_CHAR < 4
1750
2.83M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.83M
#endif
1753
2.83M
        end = (SRE_CHAR *)state->end;
1754
2.83M
        state->must_advance = 0;
1755
3.10M
        while (ptr < end) {
1756
24.9M
            while (*ptr != c) {
1757
22.8M
                if (++ptr >= end)
1758
924k
                    return 0;
1759
22.8M
            }
1760
2.10M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.10M
            state->start = ptr;
1762
2.10M
            state->ptr = ptr + prefix_skip;
1763
2.10M
            if (flags & SRE_INFO_LITERAL)
1764
500
                return 1; /* we got all of it */
1765
2.09M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.09M
            if (status != 0)
1767
1.83M
                return status;
1768
264k
            ++ptr;
1769
264k
            RESET_CAPTURE_GROUP();
1770
264k
        }
1771
75.9k
        return 0;
1772
2.83M
    }
1773
1774
40.4M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
60.4k
        Py_ssize_t i = 0;
1778
1779
60.4k
        end = (SRE_CHAR *)state->end;
1780
60.4k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
60.4k
#if SIZEOF_SRE_CHAR < 4
1783
181k
        for (i = 0; i < prefix_len; i++)
1784
120k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
60.4k
#endif
1787
145k
        while (ptr < end) {
1788
145k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
1.08M
            while (*ptr++ != c) {
1790
938k
                if (ptr >= end)
1791
57
                    return 0;
1792
938k
            }
1793
145k
            if (ptr >= end)
1794
22
                return 0;
1795
1796
145k
            i = 1;
1797
145k
            state->must_advance = 0;
1798
145k
            do {
1799
145k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
130k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
130k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
130k
                    state->start = ptr - (prefix_len - 1);
1808
130k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
130k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
130k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
130k
                    if (status != 0)
1813
60.2k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
70.1k
                    if (++ptr >= end)
1816
25
                        return 0;
1817
70.1k
                    RESET_CAPTURE_GROUP();
1818
70.1k
                }
1819
85.5k
                i = overlap[i];
1820
85.5k
            } while (i != 0);
1821
145k
        }
1822
0
        return 0;
1823
60.4k
    }
1824
1825
40.3M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
32.7M
        end = (SRE_CHAR *)state->end;
1828
32.7M
        state->must_advance = 0;
1829
34.0M
        for (;;) {
1830
92.2M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
58.1M
                ptr++;
1832
34.0M
            if (ptr >= end)
1833
2.77M
                return 0;
1834
31.2M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
31.2M
            state->start = ptr;
1836
31.2M
            state->ptr = ptr;
1837
31.2M
            status = SRE(match)(state, pattern, 0);
1838
31.2M
            if (status != 0)
1839
29.9M
                break;
1840
1.28M
            ptr++;
1841
1.28M
            RESET_CAPTURE_GROUP();
1842
1.28M
        }
1843
32.7M
    } else {
1844
        /* general case */
1845
7.62M
        assert(ptr <= end);
1846
7.62M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
7.62M
        state->start = state->ptr = ptr;
1848
7.62M
        status = SRE(match)(state, pattern, 1);
1849
7.62M
        state->must_advance = 0;
1850
7.62M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.09M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
18
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
4.09M
        {
1854
4.09M
            state->start = state->ptr = ptr = end;
1855
4.09M
            return 0;
1856
4.09M
        }
1857
141M
        while (status == 0 && ptr < end) {
1858
137M
            ptr++;
1859
137M
            RESET_CAPTURE_GROUP();
1860
137M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
137M
            state->start = state->ptr = ptr;
1862
137M
            status = SRE(match)(state, pattern, 0);
1863
137M
        }
1864
3.53M
    }
1865
1866
33.5M
    return status;
1867
40.3M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
51.9M
{
1694
51.9M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
51.9M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
51.9M
    Py_ssize_t status = 0;
1697
51.9M
    Py_ssize_t prefix_len = 0;
1698
51.9M
    Py_ssize_t prefix_skip = 0;
1699
51.9M
    SRE_CODE* prefix = NULL;
1700
51.9M
    SRE_CODE* charset = NULL;
1701
51.9M
    SRE_CODE* overlap = NULL;
1702
51.9M
    int flags = 0;
1703
51.9M
    INIT_TRACE(state);
1704
1705
51.9M
    if (ptr > end)
1706
0
        return 0;
1707
1708
51.9M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
51.9M
        flags = pattern[2];
1713
1714
51.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
118k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
118k
                   end - ptr, (size_t) pattern[3]));
1717
118k
            return 0;
1718
118k
        }
1719
51.8M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.00M
            end -= pattern[3] - 1;
1723
3.00M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.00M
        }
1726
1727
51.8M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.00M
            prefix_len = pattern[5];
1731
3.00M
            prefix_skip = pattern[6];
1732
3.00M
            prefix = pattern + 7;
1733
3.00M
            overlap = prefix + prefix_len - 1;
1734
48.8M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
46.5M
            charset = pattern + 5;
1738
1739
51.8M
        pattern += 1 + pattern[1];
1740
51.8M
    }
1741
1742
51.8M
    TRACE(("prefix = %p %zd %zd\n",
1743
51.8M
           prefix, prefix_len, prefix_skip));
1744
51.8M
    TRACE(("charset = %p\n", charset));
1745
1746
51.8M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.47M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.47M
#if SIZEOF_SRE_CHAR < 4
1750
2.47M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.47M
#endif
1753
2.47M
        end = (SRE_CHAR *)state->end;
1754
2.47M
        state->must_advance = 0;
1755
3.07M
        while (ptr < end) {
1756
48.0M
            while (*ptr != c) {
1757
45.0M
                if (++ptr >= end)
1758
72.5k
                    return 0;
1759
45.0M
            }
1760
3.00M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.00M
            state->start = ptr;
1762
3.00M
            state->ptr = ptr + prefix_skip;
1763
3.00M
            if (flags & SRE_INFO_LITERAL)
1764
3.25k
                return 1; /* we got all of it */
1765
2.99M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.99M
            if (status != 0)
1767
2.39M
                return status;
1768
601k
            ++ptr;
1769
601k
            RESET_CAPTURE_GROUP();
1770
601k
        }
1771
2.73k
        return 0;
1772
2.47M
    }
1773
1774
49.4M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
532k
        Py_ssize_t i = 0;
1778
1779
532k
        end = (SRE_CHAR *)state->end;
1780
532k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
532k
#if SIZEOF_SRE_CHAR < 4
1783
1.59M
        for (i = 0; i < prefix_len; i++)
1784
1.06M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
532k
#endif
1787
804k
        while (ptr < end) {
1788
804k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.88M
            while (*ptr++ != c) {
1790
2.07M
                if (ptr >= end)
1791
99
                    return 0;
1792
2.07M
            }
1793
804k
            if (ptr >= end)
1794
21
                return 0;
1795
1796
804k
            i = 1;
1797
804k
            state->must_advance = 0;
1798
804k
            do {
1799
804k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
782k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
782k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
782k
                    state->start = ptr - (prefix_len - 1);
1808
782k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
782k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
782k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
782k
                    if (status != 0)
1813
532k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
250k
                    if (++ptr >= end)
1816
23
                        return 0;
1817
250k
                    RESET_CAPTURE_GROUP();
1818
250k
                }
1819
272k
                i = overlap[i];
1820
272k
            } while (i != 0);
1821
804k
        }
1822
0
        return 0;
1823
532k
    }
1824
1825
48.8M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
46.5M
        end = (SRE_CHAR *)state->end;
1828
46.5M
        state->must_advance = 0;
1829
47.0M
        for (;;) {
1830
196M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
149M
                ptr++;
1832
47.0M
            if (ptr >= end)
1833
1.13M
                return 0;
1834
45.8M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
45.8M
            state->start = ptr;
1836
45.8M
            state->ptr = ptr;
1837
45.8M
            status = SRE(match)(state, pattern, 0);
1838
45.8M
            if (status != 0)
1839
45.4M
                break;
1840
421k
            ptr++;
1841
421k
            RESET_CAPTURE_GROUP();
1842
421k
        }
1843
46.5M
    } else {
1844
        /* general case */
1845
2.27M
        assert(ptr <= end);
1846
2.27M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.27M
        state->start = state->ptr = ptr;
1848
2.27M
        status = SRE(match)(state, pattern, 1);
1849
2.27M
        state->must_advance = 0;
1850
2.27M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
998k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
18
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
998k
        {
1854
998k
            state->start = state->ptr = ptr = end;
1855
998k
            return 0;
1856
998k
        }
1857
183M
        while (status == 0 && ptr < end) {
1858
182M
            ptr++;
1859
182M
            RESET_CAPTURE_GROUP();
1860
182M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
182M
            state->start = state->ptr = ptr;
1862
182M
            status = SRE(match)(state, pattern, 0);
1863
182M
        }
1864
1.27M
    }
1865
1866
46.7M
    return status;
1867
48.8M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
6.20M
{
1694
6.20M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
6.20M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
6.20M
    Py_ssize_t status = 0;
1697
6.20M
    Py_ssize_t prefix_len = 0;
1698
6.20M
    Py_ssize_t prefix_skip = 0;
1699
6.20M
    SRE_CODE* prefix = NULL;
1700
6.20M
    SRE_CODE* charset = NULL;
1701
6.20M
    SRE_CODE* overlap = NULL;
1702
6.20M
    int flags = 0;
1703
6.20M
    INIT_TRACE(state);
1704
1705
6.20M
    if (ptr > end)
1706
0
        return 0;
1707
1708
6.20M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
6.20M
        flags = pattern[2];
1713
1714
6.20M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
16.2k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
16.2k
                   end - ptr, (size_t) pattern[3]));
1717
16.2k
            return 0;
1718
16.2k
        }
1719
6.19M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.25M
            end -= pattern[3] - 1;
1723
3.25M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.25M
        }
1726
1727
6.19M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.25M
            prefix_len = pattern[5];
1731
3.25M
            prefix_skip = pattern[6];
1732
3.25M
            prefix = pattern + 7;
1733
3.25M
            overlap = prefix + prefix_len - 1;
1734
3.25M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
2.75M
            charset = pattern + 5;
1738
1739
6.19M
        pattern += 1 + pattern[1];
1740
6.19M
    }
1741
1742
6.19M
    TRACE(("prefix = %p %zd %zd\n",
1743
6.19M
           prefix, prefix_len, prefix_skip));
1744
6.19M
    TRACE(("charset = %p\n", charset));
1745
1746
6.19M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.99M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.99M
        end = (SRE_CHAR *)state->end;
1754
2.99M
        state->must_advance = 0;
1755
3.08M
        while (ptr < end) {
1756
26.1M
            while (*ptr != c) {
1757
23.1M
                if (++ptr >= end)
1758
3.99k
                    return 0;
1759
23.1M
            }
1760
3.07M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.07M
            state->start = ptr;
1762
3.07M
            state->ptr = ptr + prefix_skip;
1763
3.07M
            if (flags & SRE_INFO_LITERAL)
1764
1.07k
                return 1; /* we got all of it */
1765
3.07M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.07M
            if (status != 0)
1767
2.98M
                return status;
1768
93.0k
            ++ptr;
1769
93.0k
            RESET_CAPTURE_GROUP();
1770
93.0k
        }
1771
950
        return 0;
1772
2.99M
    }
1773
1774
3.20M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
263k
        Py_ssize_t i = 0;
1778
1779
263k
        end = (SRE_CHAR *)state->end;
1780
263k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
523k
        while (ptr < end) {
1788
523k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
6.02M
            while (*ptr++ != c) {
1790
5.50M
                if (ptr >= end)
1791
141
                    return 0;
1792
5.50M
            }
1793
523k
            if (ptr >= end)
1794
15
                return 0;
1795
1796
523k
            i = 1;
1797
523k
            state->must_advance = 0;
1798
523k
            do {
1799
523k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
491k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
491k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
491k
                    state->start = ptr - (prefix_len - 1);
1808
491k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
491k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
491k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
491k
                    if (status != 0)
1813
263k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
228k
                    if (++ptr >= end)
1816
14
                        return 0;
1817
228k
                    RESET_CAPTURE_GROUP();
1818
228k
                }
1819
260k
                i = overlap[i];
1820
260k
            } while (i != 0);
1821
523k
        }
1822
0
        return 0;
1823
263k
    }
1824
1825
2.93M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
2.75M
        end = (SRE_CHAR *)state->end;
1828
2.75M
        state->must_advance = 0;
1829
3.34M
        for (;;) {
1830
65.2M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
61.8M
                ptr++;
1832
3.34M
            if (ptr >= end)
1833
48.6k
                return 0;
1834
3.30M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
3.30M
            state->start = ptr;
1836
3.30M
            state->ptr = ptr;
1837
3.30M
            status = SRE(match)(state, pattern, 0);
1838
3.30M
            if (status != 0)
1839
2.70M
                break;
1840
596k
            ptr++;
1841
596k
            RESET_CAPTURE_GROUP();
1842
596k
        }
1843
2.75M
    } else {
1844
        /* general case */
1845
185k
        assert(ptr <= end);
1846
185k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
185k
        state->start = state->ptr = ptr;
1848
185k
        status = SRE(match)(state, pattern, 1);
1849
185k
        state->must_advance = 0;
1850
185k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
14.9k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
17
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
14.9k
        {
1854
14.9k
            state->start = state->ptr = ptr = end;
1855
14.9k
            return 0;
1856
14.9k
        }
1857
59.5M
        while (status == 0 && ptr < end) {
1858
59.3M
            ptr++;
1859
59.3M
            RESET_CAPTURE_GROUP();
1860
59.3M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
59.3M
            state->start = state->ptr = ptr;
1862
59.3M
            status = SRE(match)(state, pattern, 0);
1863
59.3M
        }
1864
170k
    }
1865
1866
2.87M
    return status;
1867
2.93M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/