Coverage Report

Created: 2026-04-12 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
22.3M
{
18
    /* check if pointer is at given position */
19
20
22.3M
    Py_ssize_t thisp, thatp;
21
22
22.3M
    switch (at) {
23
24
10.1M
    case SRE_AT_BEGINNING:
25
10.1M
    case SRE_AT_BEGINNING_STRING:
26
10.1M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
7.16M
    case SRE_AT_END:
33
7.16M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
23.2k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
7.16M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
5.05M
    case SRE_AT_END_STRING:
42
5.05M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
22.3M
    }
87
88
0
    return 0;
89
22.3M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
17.0M
{
18
    /* check if pointer is at given position */
19
20
17.0M
    Py_ssize_t thisp, thatp;
21
22
17.0M
    switch (at) {
23
24
8.69M
    case SRE_AT_BEGINNING:
25
8.69M
    case SRE_AT_BEGINNING_STRING:
26
8.69M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
5.78M
    case SRE_AT_END:
33
5.78M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
23.1k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
5.78M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.61M
    case SRE_AT_END_STRING:
42
2.61M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
17.0M
    }
87
88
0
    return 0;
89
17.0M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
4.14M
{
18
    /* check if pointer is at given position */
19
20
4.14M
    Py_ssize_t thisp, thatp;
21
22
4.14M
    switch (at) {
23
24
1.43M
    case SRE_AT_BEGINNING:
25
1.43M
    case SRE_AT_BEGINNING_STRING:
26
1.43M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
1.33M
    case SRE_AT_END:
33
1.33M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
55
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
1.33M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.36M
    case SRE_AT_END_STRING:
42
1.36M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
4.14M
    }
87
88
0
    return 0;
89
4.14M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
1.14M
{
18
    /* check if pointer is at given position */
19
20
1.14M
    Py_ssize_t thisp, thatp;
21
22
1.14M
    switch (at) {
23
24
17.3k
    case SRE_AT_BEGINNING:
25
17.3k
    case SRE_AT_BEGINNING_STRING:
26
17.3k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
45.3k
    case SRE_AT_END:
33
45.3k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
68
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
45.3k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.07M
    case SRE_AT_END_STRING:
42
1.07M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
1.14M
    }
87
88
0
    return 0;
89
1.14M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.56G
{
94
    /* check if character is a member of the given set */
95
96
1.56G
    int ok = 1;
97
98
3.53G
    for (;;) {
99
3.53G
        switch (*set++) {
100
101
1.01G
        case SRE_OP_FAILURE:
102
1.01G
            return !ok;
103
104
1.21G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.21G
            if (ch == set[0])
107
9.59M
                return ok;
108
1.20G
            set++;
109
1.20G
            break;
110
111
87.2M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
87.2M
            if (sre_category(set[0], (int) ch))
114
76.3M
                return ok;
115
10.9M
            set++;
116
10.9M
            break;
117
118
510M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
510M
            if (ch < 256 &&
121
488M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
187M
                return ok;
123
323M
            set += 256/SRE_CODE_BITS;
124
323M
            break;
125
126
422M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
422M
            if (set[0] <= ch && ch <= set[1])
129
276M
                return ok;
130
145M
            set += 2;
131
145M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
286M
        case SRE_OP_NEGATE:
148
286M
            ok = !ok;
149
286M
            break;
150
151
4
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
4
        {
154
4
            Py_ssize_t count, block;
155
4
            count = *(set++);
156
157
4
            if (ch < 0x10000u)
158
4
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
4
            set += 256/sizeof(SRE_CODE);
162
4
            if (block >=0 &&
163
4
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
4
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
4
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
4
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.53G
        }
175
3.53G
    }
176
1.56G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
517M
{
94
    /* check if character is a member of the given set */
95
96
517M
    int ok = 1;
97
98
1.08G
    for (;;) {
99
1.08G
        switch (*set++) {
100
101
292M
        case SRE_OP_FAILURE:
102
292M
            return !ok;
103
104
335M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
335M
            if (ch == set[0])
107
6.97M
                return ok;
108
328M
            set++;
109
328M
            break;
110
111
32.5M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
32.5M
            if (sre_category(set[0], (int) ch))
114
22.9M
                return ok;
115
9.66M
            set++;
116
9.66M
            break;
117
118
131M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
131M
            if (ch < 256 &&
121
131M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
54.0M
                return ok;
123
77.5M
            set += 256/SRE_CODE_BITS;
124
77.5M
            break;
125
126
220M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
220M
            if (set[0] <= ch && ch <= set[1])
129
141M
                return ok;
130
79.8M
            set += 2;
131
79.8M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
69.8M
        case SRE_OP_NEGATE:
148
69.8M
            ok = !ok;
149
69.8M
            break;
150
151
4
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
4
        {
154
4
            Py_ssize_t count, block;
155
4
            count = *(set++);
156
157
4
            if (ch < 0x10000u)
158
4
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
4
            set += 256/sizeof(SRE_CODE);
162
4
            if (block >=0 &&
163
4
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
4
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
4
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
4
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.08G
        }
175
1.08G
    }
176
517M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
700M
{
94
    /* check if character is a member of the given set */
95
96
700M
    int ok = 1;
97
98
1.65G
    for (;;) {
99
1.65G
        switch (*set++) {
100
101
483M
        case SRE_OP_FAILURE:
102
483M
            return !ok;
103
104
663M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
663M
            if (ch == set[0])
107
1.56M
                return ok;
108
661M
            set++;
109
661M
            break;
110
111
48.8M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
48.8M
            if (sre_category(set[0], (int) ch))
114
47.9M
                return ok;
115
976k
            set++;
116
976k
            break;
117
118
177M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
177M
            if (ch < 256 &&
121
167M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
53.4M
                return ok;
123
124M
            set += 256/SRE_CODE_BITS;
124
124M
            break;
125
126
169M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
169M
            if (set[0] <= ch && ch <= set[1])
129
114M
                return ok;
130
54.8M
            set += 2;
131
54.8M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
113M
        case SRE_OP_NEGATE:
148
113M
            ok = !ok;
149
113M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.65G
        }
175
1.65G
    }
176
700M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
343M
{
94
    /* check if character is a member of the given set */
95
96
343M
    int ok = 1;
97
98
798M
    for (;;) {
99
798M
        switch (*set++) {
100
101
235M
        case SRE_OP_FAILURE:
102
235M
            return !ok;
103
104
219M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
219M
            if (ch == set[0])
107
1.06M
                return ok;
108
218M
            set++;
109
218M
            break;
110
111
5.77M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
5.77M
            if (sre_category(set[0], (int) ch))
114
5.46M
                return ok;
115
307k
            set++;
116
307k
            break;
117
118
201M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
201M
            if (ch < 256 &&
121
189M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
79.9M
                return ok;
123
121M
            set += 256/SRE_CODE_BITS;
124
121M
            break;
125
126
32.6M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
32.6M
            if (set[0] <= ch && ch <= set[1])
129
21.5M
                return ok;
130
11.0M
            set += 2;
131
11.0M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
103M
        case SRE_OP_NEGATE:
148
103M
            ok = !ok;
149
103M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
798M
        }
175
798M
    }
176
343M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
473M
{
195
473M
    SRE_CODE chr;
196
473M
    SRE_CHAR c;
197
473M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
473M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
473M
    Py_ssize_t i;
200
473M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
473M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
50.8M
        end = ptr + maxcount;
205
206
473M
    switch (pattern[0]) {
207
208
418M
    case SRE_OP_IN:
209
        /* repeated set */
210
418M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
794M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
376M
            ptr++;
213
418M
        break;
214
215
2.61M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
2.61M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
38.6M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
36.0M
            ptr++;
220
2.61M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
51.2M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
51.2M
        chr = pattern[1];
232
51.2M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
51.2M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
39.8M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
39.8M
        else
238
39.8M
#endif
239
54.6M
        while (ptr < end && *ptr == c)
240
3.39M
            ptr++;
241
51.2M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.14M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.14M
        chr = pattern[1];
270
1.14M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.14M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
511k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
511k
        else
276
511k
#endif
277
53.2M
        while (ptr < end && *ptr != c)
278
52.1M
            ptr++;
279
1.14M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
473M
    }
319
320
473M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
473M
           ptr - (SRE_CHAR*) state->ptr));
322
473M
    return ptr - (SRE_CHAR*) state->ptr;
323
473M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
181M
{
195
181M
    SRE_CODE chr;
196
181M
    SRE_CHAR c;
197
181M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
181M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
181M
    Py_ssize_t i;
200
181M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
181M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
22.1M
        end = ptr + maxcount;
205
206
181M
    switch (pattern[0]) {
207
208
153M
    case SRE_OP_IN:
209
        /* repeated set */
210
153M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
299M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
146M
            ptr++;
213
153M
        break;
214
215
2.37M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
2.37M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
11.9M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
9.54M
            ptr++;
220
2.37M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
25.5M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
25.5M
        chr = pattern[1];
232
25.5M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
25.5M
        c = (SRE_CHAR) chr;
234
25.5M
#if SIZEOF_SRE_CHAR < 4
235
25.5M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
25.5M
        else
238
25.5M
#endif
239
25.9M
        while (ptr < end && *ptr == c)
240
345k
            ptr++;
241
25.5M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
291k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
291k
        chr = pattern[1];
270
291k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
291k
        c = (SRE_CHAR) chr;
272
291k
#if SIZEOF_SRE_CHAR < 4
273
291k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
291k
        else
276
291k
#endif
277
13.2M
        while (ptr < end && *ptr != c)
278
12.9M
            ptr++;
279
291k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
181M
    }
319
320
181M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
181M
           ptr - (SRE_CHAR*) state->ptr));
322
181M
    return ptr - (SRE_CHAR*) state->ptr;
323
181M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
199M
{
195
199M
    SRE_CODE chr;
196
199M
    SRE_CHAR c;
197
199M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
199M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
199M
    Py_ssize_t i;
200
199M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
199M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
15.0M
        end = ptr + maxcount;
205
206
199M
    switch (pattern[0]) {
207
208
184M
    case SRE_OP_IN:
209
        /* repeated set */
210
184M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
308M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
123M
            ptr++;
213
184M
        break;
214
215
232k
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
232k
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
12.2M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
12.0M
            ptr++;
220
232k
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
14.2M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
14.2M
        chr = pattern[1];
232
14.2M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
14.2M
        c = (SRE_CHAR) chr;
234
14.2M
#if SIZEOF_SRE_CHAR < 4
235
14.2M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
14.2M
        else
238
14.2M
#endif
239
16.4M
        while (ptr < end && *ptr == c)
240
2.25M
            ptr++;
241
14.2M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
220k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
220k
        chr = pattern[1];
270
220k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
220k
        c = (SRE_CHAR) chr;
272
220k
#if SIZEOF_SRE_CHAR < 4
273
220k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
220k
        else
276
220k
#endif
277
11.6M
        while (ptr < end && *ptr != c)
278
11.4M
            ptr++;
279
220k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
199M
    }
319
320
199M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
199M
           ptr - (SRE_CHAR*) state->ptr));
322
199M
    return ptr - (SRE_CHAR*) state->ptr;
323
199M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
92.6M
{
195
92.6M
    SRE_CODE chr;
196
92.6M
    SRE_CHAR c;
197
92.6M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
92.6M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
92.6M
    Py_ssize_t i;
200
92.6M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
92.6M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
13.6M
        end = ptr + maxcount;
205
206
92.6M
    switch (pattern[0]) {
207
208
80.5M
    case SRE_OP_IN:
209
        /* repeated set */
210
80.5M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
186M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
105M
            ptr++;
213
80.5M
        break;
214
215
8.42k
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
8.42k
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
14.4M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
14.4M
            ptr++;
220
8.42k
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
11.4M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
11.4M
        chr = pattern[1];
232
11.4M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
11.4M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
12.2M
        while (ptr < end && *ptr == c)
240
792k
            ptr++;
241
11.4M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
634k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
634k
        chr = pattern[1];
270
634k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
634k
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
28.3M
        while (ptr < end && *ptr != c)
278
27.7M
            ptr++;
279
634k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
92.6M
    }
319
320
92.6M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
92.6M
           ptr - (SRE_CHAR*) state->ptr));
322
92.6M
    return ptr - (SRE_CHAR*) state->ptr;
323
92.6M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
541M
    do { \
355
541M
        ctx->lastmark = state->lastmark; \
356
541M
        ctx->lastindex = state->lastindex; \
357
541M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
176M
    do { \
360
176M
        state->lastmark = ctx->lastmark; \
361
176M
        state->lastindex = ctx->lastindex; \
362
176M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
193M
    do { \
366
193M
        TRACE(("push last_ptr: %zd", \
367
193M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
193M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
193M
    } while (0)
370
#define LAST_PTR_POP()  \
371
193M
    do { \
372
193M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
193M
        TRACE(("pop last_ptr: %zd", \
374
193M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
193M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
519M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
647M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
981M
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
48.3M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
25.7M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.16G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.16G
do { \
390
1.16G
    alloc_pos = state->data_stack_base; \
391
1.16G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.16G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.16G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
172M
        int j = data_stack_grow(state, sizeof(type)); \
395
172M
        if (j < 0) return j; \
396
172M
        if (ctx_pos != -1) \
397
172M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
172M
    } \
399
1.16G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.16G
    state->data_stack_base += sizeof(type); \
401
1.16G
} while (0)
402
403
1.16G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.16G
do { \
405
1.16G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.16G
    ptr = (type*)(state->data_stack+pos); \
407
1.16G
} while (0)
408
409
539M
#define DATA_STACK_PUSH(state, data, size) \
410
539M
do { \
411
539M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
539M
           data, state->data_stack_base, size)); \
413
539M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
83.4k
        int j = data_stack_grow(state, size); \
415
83.4k
        if (j < 0) return j; \
416
83.4k
        if (ctx_pos != -1) \
417
83.4k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
83.4k
    } \
419
539M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
539M
    state->data_stack_base += size; \
421
539M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
298M
#define DATA_STACK_POP(state, data, size, discard) \
427
298M
do { \
428
298M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
298M
           data, state->data_stack_base-size, size)); \
430
298M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
298M
    if (discard) \
432
298M
        state->data_stack_base -= size; \
433
298M
} while (0)
434
435
1.40G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.40G
do { \
437
1.40G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.40G
           state->data_stack_base-size, size)); \
439
1.40G
    state->data_stack_base -= size; \
440
1.40G
} while(0)
441
442
#define DATA_PUSH(x) \
443
193M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
193M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.16G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.16G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.15G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
452M
    do if (lastmark >= 0) { \
473
345M
        MARK_TRACE("push", (lastmark)); \
474
345M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
345M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
452M
    } while (0)
477
#define MARK_POP(lastmark) \
478
119M
    do if (lastmark >= 0) { \
479
103M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
103M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
103M
        MARK_TRACE("pop", (lastmark)); \
482
119M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.68M
    do if (lastmark >= 0) { \
485
1.06M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.06M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.06M
        MARK_TRACE("pop keep", (lastmark)); \
488
1.68M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
332M
    do if (lastmark >= 0) { \
491
241M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
241M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
241M
        MARK_TRACE("pop discard", (lastmark)); \
494
332M
    } while (0)
495
496
483M
#define JUMP_NONE            0
497
145k
#define JUMP_MAX_UNTIL_1     1
498
193M
#define JUMP_MAX_UNTIL_2     2
499
48.3M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
48.1M
#define JUMP_REPEAT          7
504
12.3M
#define JUMP_REPEAT_ONE_1    8
505
158M
#define JUMP_REPEAT_ONE_2    9
506
832
#define JUMP_MIN_REPEAT_ONE  10
507
108M
#define JUMP_BRANCH          11
508
25.7M
#define JUMP_ASSERT          12
509
89.1M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
684M
    ctx->pattern = pattern; \
516
684M
    ctx->ptr = ptr; \
517
684M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
684M
    nextctx->pattern = nextpattern; \
519
684M
    nextctx->toplevel = toplevel_; \
520
684M
    nextctx->jump = jumpvalue; \
521
684M
    nextctx->last_ctx_pos = ctx_pos; \
522
684M
    pattern = nextpattern; \
523
684M
    ctx_pos = alloc_pos; \
524
684M
    ctx = nextctx; \
525
684M
    goto entrance; \
526
684M
    jumplabel: \
527
684M
    pattern = ctx->pattern; \
528
684M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
569M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
114M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.15G
    do {                                                           \
553
2.15G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.15G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.15G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.23G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.15G
        do {                               \
588
2.15G
            MAYBE_CHECK_SIGNALS;           \
589
2.15G
            goto *sre_targets[*pattern++]; \
590
2.15G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
483M
{
601
483M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
483M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
483M
    Py_ssize_t ret = 0;
604
483M
    int jump;
605
483M
    unsigned int sigcount = state->sigcount;
606
607
483M
    SRE(match_context)* ctx;
608
483M
    SRE(match_context)* nextctx;
609
483M
    INIT_TRACE(state);
610
611
483M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
483M
    DATA_ALLOC(SRE(match_context), ctx);
614
483M
    ctx->last_ctx_pos = -1;
615
483M
    ctx->jump = JUMP_NONE;
616
483M
    ctx->toplevel = toplevel;
617
483M
    ctx_pos = alloc_pos;
618
619
483M
#if USE_COMPUTED_GOTOS
620
483M
#include "sre_targets.h"
621
483M
#endif
622
623
1.16G
entrance:
624
625
1.16G
    ;  // Fashion statement.
626
1.16G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.16G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
61.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.61M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.61M
                   end - ptr, (size_t) pattern[3]));
634
3.61M
            RETURN_FAILURE;
635
3.61M
        }
636
57.9M
        pattern += pattern[1] + 1;
637
57.9M
    }
638
639
1.16G
#if USE_COMPUTED_GOTOS
640
1.16G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.16G
    {
647
648
1.16G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
443M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
443M
                   ptr, pattern[0]));
653
443M
            {
654
443M
                int i = pattern[0];
655
443M
                if (i & 1)
656
68.8M
                    state->lastindex = i/2 + 1;
657
443M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
437M
                    int j = state->lastmark + 1;
663
451M
                    while (j < i)
664
14.0M
                        state->mark[j++] = NULL;
665
437M
                    state->lastmark = i;
666
437M
                }
667
443M
                state->mark[i] = ptr;
668
443M
            }
669
443M
            pattern++;
670
443M
            DISPATCH;
671
672
443M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
147M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
147M
                   ptr, *pattern));
677
147M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
65.7M
                RETURN_FAILURE;
679
81.5M
            pattern++;
680
81.5M
            ptr++;
681
81.5M
            DISPATCH;
682
683
81.5M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
167M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
167M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
167M
            if (ctx->toplevel &&
698
42.5M
                ((state->match_all && ptr != state->end) ||
699
42.5M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
167M
            state->ptr = ptr;
704
167M
            RETURN_SUCCESS;
705
706
22.3M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
22.3M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
22.3M
            if (!SRE(at)(state, ptr, *pattern))
711
5.44M
                RETURN_FAILURE;
712
16.9M
            pattern++;
713
16.9M
            DISPATCH;
714
715
16.9M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
346M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
346M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
346M
            if (ptr >= end ||
749
342M
                !SRE(charset)(state, pattern + 1, *ptr))
750
84.8M
                RETURN_FAILURE;
751
261M
            pattern += pattern[0];
752
261M
            ptr++;
753
261M
            DISPATCH;
754
755
261M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
7.65M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
7.65M
                   pattern, ptr, pattern[0]));
758
7.65M
            if (ptr >= end ||
759
7.65M
                sre_lower_ascii(*ptr) != *pattern)
760
47.7k
                RETURN_FAILURE;
761
7.60M
            pattern++;
762
7.60M
            ptr++;
763
7.60M
            DISPATCH;
764
765
7.60M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
28
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
28
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
28
            if (ptr >= end
828
20
                || !SRE(charset)(state, pattern+1,
829
20
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
12
            pattern += pattern[0];
832
12
            ptr++;
833
12
            DISPATCH;
834
835
12
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
77.1M
        TARGET(SRE_OP_JUMP):
845
77.1M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
77.1M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
77.1M
                   ptr, pattern[0]));
850
77.1M
            pattern += pattern[0];
851
77.1M
            DISPATCH;
852
853
98.2M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
98.2M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
98.2M
            LASTMARK_SAVE();
858
98.2M
            if (state->repeat)
859
59.0M
                MARK_PUSH(ctx->lastmark);
860
218M
            for (; pattern[0]; pattern += pattern[0]) {
861
194M
                if (pattern[1] == SRE_OP_LITERAL &&
862
113M
                    (ptr >= end ||
863
113M
                     (SRE_CODE) *ptr != pattern[2]))
864
57.9M
                    continue;
865
136M
                if (pattern[1] == SRE_OP_IN &&
866
51.9M
                    (ptr >= end ||
867
51.8M
                     !SRE(charset)(state, pattern + 3,
868
51.8M
                                   (SRE_CODE) *ptr)))
869
28.1M
                    continue;
870
108M
                state->ptr = ptr;
871
108M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
108M
                if (ret) {
873
74.7M
                    if (state->repeat)
874
49.9M
                        MARK_POP_DISCARD(ctx->lastmark);
875
74.7M
                    RETURN_ON_ERROR(ret);
876
74.7M
                    RETURN_SUCCESS;
877
74.7M
                }
878
33.7M
                if (state->repeat)
879
15.3k
                    MARK_POP_KEEP(ctx->lastmark);
880
33.7M
                LASTMARK_RESTORE();
881
33.7M
            }
882
23.5M
            if (state->repeat)
883
9.09M
                MARK_POP_DISCARD(ctx->lastmark);
884
23.5M
            RETURN_FAILURE;
885
886
476M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
476M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
476M
                   pattern[1], pattern[2]));
898
899
476M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
2.43M
                RETURN_FAILURE; /* cannot match */
901
902
473M
            state->ptr = ptr;
903
904
473M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
473M
            RETURN_ON_ERROR(ret);
906
473M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
473M
            ctx->count = ret;
908
473M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
473M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
313M
                RETURN_FAILURE;
917
918
160M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
6.26M
                ptr == state->end &&
920
87.2k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
87.2k
            {
922
                /* tail is empty.  we're finished */
923
87.2k
                state->ptr = ptr;
924
87.2k
                RETURN_SUCCESS;
925
87.2k
            }
926
927
160M
            LASTMARK_SAVE();
928
160M
            if (state->repeat)
929
110M
                MARK_PUSH(ctx->lastmark);
930
931
160M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
22.6M
                ctx->u.chr = pattern[pattern[0]+1];
935
22.6M
                for (;;) {
936
61.0M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
50.7M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
38.3M
                        ptr--;
939
38.3M
                        ctx->count--;
940
38.3M
                    }
941
22.6M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
10.3M
                        break;
943
12.3M
                    state->ptr = ptr;
944
12.3M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
12.3M
                            pattern+pattern[0]);
946
12.3M
                    if (ret) {
947
12.3M
                        if (state->repeat)
948
11.1M
                            MARK_POP_DISCARD(ctx->lastmark);
949
12.3M
                        RETURN_ON_ERROR(ret);
950
12.3M
                        RETURN_SUCCESS;
951
12.3M
                    }
952
835
                    if (state->repeat)
953
819
                        MARK_POP_KEEP(ctx->lastmark);
954
835
                    LASTMARK_RESTORE();
955
956
835
                    ptr--;
957
835
                    ctx->count--;
958
835
                }
959
10.3M
                if (state->repeat)
960
9.11M
                    MARK_POP_DISCARD(ctx->lastmark);
961
137M
            } else {
962
                /* general case */
963
160M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
158M
                    state->ptr = ptr;
965
158M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
158M
                            pattern+pattern[0]);
967
158M
                    if (ret) {
968
135M
                        if (state->repeat)
969
88.6M
                            MARK_POP_DISCARD(ctx->lastmark);
970
135M
                        RETURN_ON_ERROR(ret);
971
135M
                        RETURN_SUCCESS;
972
135M
                    }
973
22.8M
                    if (state->repeat)
974
1.67M
                        MARK_POP_KEEP(ctx->lastmark);
975
22.8M
                    LASTMARK_RESTORE();
976
977
22.8M
                    ptr--;
978
22.8M
                    ctx->count--;
979
22.8M
                }
980
1.96M
                if (state->repeat)
981
1.40M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.96M
            }
983
12.2M
            RETURN_FAILURE;
984
985
16
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
16
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
16
                   pattern[1], pattern[2]));
997
998
16
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
16
            state->ptr = ptr;
1002
1003
16
            if (pattern[1] == 0)
1004
16
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
16
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
16
            } else {
1028
                /* general case */
1029
16
                LASTMARK_SAVE();
1030
16
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
832
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
832
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
832
                    state->ptr = ptr;
1036
832
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
832
                            pattern+pattern[0]);
1038
832
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
832
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
832
                    LASTMARK_RESTORE();
1047
1048
832
                    state->ptr = ptr;
1049
832
                    ret = SRE(count)(state, pattern+3, 1);
1050
832
                    RETURN_ON_ERROR(ret);
1051
832
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
832
                    if (ret == 0)
1053
16
                        break;
1054
832
                    assert(ret == 1);
1055
816
                    ptr++;
1056
816
                    ctx->count++;
1057
816
                }
1058
16
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
16
            }
1061
16
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
48.1M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
48.1M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
48.1M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
48.1M
            ctx->u.rep = repeat_pool_malloc(state);
1127
48.1M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
48.1M
            ctx->u.rep->count = -1;
1131
48.1M
            ctx->u.rep->pattern = pattern;
1132
48.1M
            ctx->u.rep->prev = state->repeat;
1133
48.1M
            ctx->u.rep->last_ptr = NULL;
1134
48.1M
            state->repeat = ctx->u.rep;
1135
1136
48.1M
            state->ptr = ptr;
1137
48.1M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
48.1M
            state->repeat = ctx->u.rep->prev;
1139
48.1M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
48.1M
            if (ret) {
1142
47.9M
                RETURN_ON_ERROR(ret);
1143
47.9M
                RETURN_SUCCESS;
1144
47.9M
            }
1145
110k
            RETURN_FAILURE;
1146
1147
209M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
209M
            ctx->u.rep = state->repeat;
1155
209M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
209M
            state->ptr = ptr;
1159
1160
209M
            ctx->count = ctx->u.rep->count+1;
1161
1162
209M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
209M
                   ptr, ctx->count));
1164
1165
209M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
145k
                ctx->u.rep->count = ctx->count;
1168
145k
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
145k
                        ctx->u.rep->pattern+3);
1170
145k
                if (ret) {
1171
129k
                    RETURN_ON_ERROR(ret);
1172
129k
                    RETURN_SUCCESS;
1173
129k
                }
1174
15.9k
                ctx->u.rep->count = ctx->count-1;
1175
15.9k
                state->ptr = ptr;
1176
15.9k
                RETURN_FAILURE;
1177
15.9k
            }
1178
1179
209M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
15.5M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
193M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
193M
                ctx->u.rep->count = ctx->count;
1185
193M
                LASTMARK_SAVE();
1186
193M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
193M
                LAST_PTR_PUSH();
1189
193M
                ctx->u.rep->last_ptr = state->ptr;
1190
193M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
193M
                        ctx->u.rep->pattern+3);
1192
193M
                LAST_PTR_POP();
1193
193M
                if (ret) {
1194
160M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
160M
                    RETURN_ON_ERROR(ret);
1196
160M
                    RETURN_SUCCESS;
1197
160M
                }
1198
32.8M
                MARK_POP(ctx->lastmark);
1199
32.8M
                LASTMARK_RESTORE();
1200
32.8M
                ctx->u.rep->count = ctx->count-1;
1201
32.8M
                state->ptr = ptr;
1202
32.8M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
48.3M
            state->repeat = ctx->u.rep->prev;
1207
48.3M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
48.3M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
48.3M
            RETURN_ON_SUCCESS(ret);
1211
392k
            state->ptr = ptr;
1212
392k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
25.7M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
25.7M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
25.7M
                   ptr, pattern[1]));
1565
25.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
25.7M
            state->ptr = ptr - pattern[1];
1568
25.7M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
25.7M
            RETURN_ON_FAILURE(ret);
1570
20.1M
            pattern += pattern[0];
1571
20.1M
            DISPATCH;
1572
1573
89.1M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
89.1M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
89.1M
                   ptr, pattern[1]));
1578
89.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
89.1M
                state->ptr = ptr - pattern[1];
1580
89.1M
                LASTMARK_SAVE();
1581
89.1M
                if (state->repeat)
1582
89.1M
                    MARK_PUSH(ctx->lastmark);
1583
1584
178M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
178M
                if (ret) {
1586
2.25M
                    if (state->repeat)
1587
2.25M
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.25M
                    RETURN_ON_ERROR(ret);
1589
2.25M
                    RETURN_FAILURE;
1590
2.25M
                }
1591
86.8M
                if (state->repeat)
1592
86.8M
                    MARK_POP(ctx->lastmark);
1593
86.8M
                LASTMARK_RESTORE();
1594
86.8M
            }
1595
86.8M
            pattern += pattern[0];
1596
86.8M
            DISPATCH;
1597
1598
86.8M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.16G
exit:
1620
1.16G
    ctx_pos = ctx->last_ctx_pos;
1621
1.16G
    jump = ctx->jump;
1622
1.16G
    DATA_POP_DISCARD(ctx);
1623
1.16G
    if (ctx_pos == -1) {
1624
483M
        state->sigcount = sigcount;
1625
483M
        return ret;
1626
483M
    }
1627
684M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
684M
    switch (jump) {
1630
193M
        case JUMP_MAX_UNTIL_2:
1631
193M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
193M
            goto jump_max_until_2;
1633
48.3M
        case JUMP_MAX_UNTIL_3:
1634
48.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
48.3M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
108M
        case JUMP_BRANCH:
1643
108M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
108M
            goto jump_branch;
1645
145k
        case JUMP_MAX_UNTIL_1:
1646
145k
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
145k
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
48.1M
        case JUMP_REPEAT:
1658
48.1M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
48.1M
            goto jump_repeat;
1660
12.3M
        case JUMP_REPEAT_ONE_1:
1661
12.3M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
12.3M
            goto jump_repeat_one_1;
1663
158M
        case JUMP_REPEAT_ONE_2:
1664
158M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
158M
            goto jump_repeat_one_2;
1666
832
        case JUMP_MIN_REPEAT_ONE:
1667
832
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
832
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
25.7M
        case JUMP_ASSERT:
1673
25.7M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
25.7M
            goto jump_assert;
1675
89.1M
        case JUMP_ASSERT_NOT:
1676
89.1M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
89.1M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
684M
    }
1683
1684
0
    return ret; /* should never get here */
1685
684M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
202M
{
601
202M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
202M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
202M
    Py_ssize_t ret = 0;
604
202M
    int jump;
605
202M
    unsigned int sigcount = state->sigcount;
606
607
202M
    SRE(match_context)* ctx;
608
202M
    SRE(match_context)* nextctx;
609
202M
    INIT_TRACE(state);
610
611
202M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
202M
    DATA_ALLOC(SRE(match_context), ctx);
614
202M
    ctx->last_ctx_pos = -1;
615
202M
    ctx->jump = JUMP_NONE;
616
202M
    ctx->toplevel = toplevel;
617
202M
    ctx_pos = alloc_pos;
618
619
202M
#if USE_COMPUTED_GOTOS
620
202M
#include "sre_targets.h"
621
202M
#endif
622
623
444M
entrance:
624
625
444M
    ;  // Fashion statement.
626
444M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
444M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
36.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.50M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.50M
                   end - ptr, (size_t) pattern[3]));
634
3.50M
            RETURN_FAILURE;
635
3.50M
        }
636
32.6M
        pattern += pattern[1] + 1;
637
32.6M
    }
638
639
440M
#if USE_COMPUTED_GOTOS
640
440M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
440M
    {
647
648
440M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
184M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
184M
                   ptr, pattern[0]));
653
184M
            {
654
184M
                int i = pattern[0];
655
184M
                if (i & 1)
656
32.5M
                    state->lastindex = i/2 + 1;
657
184M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
181M
                    int j = state->lastmark + 1;
663
190M
                    while (j < i)
664
8.92M
                        state->mark[j++] = NULL;
665
181M
                    state->lastmark = i;
666
181M
                }
667
184M
                state->mark[i] = ptr;
668
184M
            }
669
184M
            pattern++;
670
184M
            DISPATCH;
671
672
184M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
83.4M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
83.4M
                   ptr, *pattern));
677
83.4M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
34.9M
                RETURN_FAILURE;
679
48.4M
            pattern++;
680
48.4M
            ptr++;
681
48.4M
            DISPATCH;
682
683
48.4M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
73.5M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
73.5M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
73.5M
            if (ctx->toplevel &&
698
24.7M
                ((state->match_all && ptr != state->end) ||
699
24.7M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
73.5M
            state->ptr = ptr;
704
73.5M
            RETURN_SUCCESS;
705
706
17.0M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
17.0M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
17.0M
            if (!SRE(at)(state, ptr, *pattern))
711
2.89M
                RETURN_FAILURE;
712
14.2M
            pattern++;
713
14.2M
            DISPATCH;
714
715
14.2M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
101M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
101M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
101M
            if (ptr >= end ||
749
100M
                !SRE(charset)(state, pattern + 1, *ptr))
750
15.7M
                RETURN_FAILURE;
751
85.8M
            pattern += pattern[0];
752
85.8M
            ptr++;
753
85.8M
            DISPATCH;
754
755
85.8M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
562k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
562k
                   pattern, ptr, pattern[0]));
758
562k
            if (ptr >= end ||
759
562k
                sre_lower_ascii(*ptr) != *pattern)
760
5.11k
                RETURN_FAILURE;
761
557k
            pattern++;
762
557k
            ptr++;
763
557k
            DISPATCH;
764
765
557k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
28
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
28
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
28
            if (ptr >= end
828
20
                || !SRE(charset)(state, pattern+1,
829
20
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
12
            pattern += pattern[0];
832
12
            ptr++;
833
12
            DISPATCH;
834
835
12
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
34.9M
        TARGET(SRE_OP_JUMP):
845
34.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
34.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
34.9M
                   ptr, pattern[0]));
850
34.9M
            pattern += pattern[0];
851
34.9M
            DISPATCH;
852
853
45.4M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
45.4M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
45.4M
            LASTMARK_SAVE();
858
45.4M
            if (state->repeat)
859
14.9M
                MARK_PUSH(ctx->lastmark);
860
107M
            for (; pattern[0]; pattern += pattern[0]) {
861
95.6M
                if (pattern[1] == SRE_OP_LITERAL &&
862
65.9M
                    (ptr >= end ||
863
65.7M
                     (SRE_CODE) *ptr != pattern[2]))
864
26.7M
                    continue;
865
68.9M
                if (pattern[1] == SRE_OP_IN &&
866
14.7M
                    (ptr >= end ||
867
14.6M
                     !SRE(charset)(state, pattern + 3,
868
14.6M
                                   (SRE_CODE) *ptr)))
869
7.26M
                    continue;
870
61.6M
                state->ptr = ptr;
871
61.6M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
61.6M
                if (ret) {
873
33.4M
                    if (state->repeat)
874
14.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
33.4M
                    RETURN_ON_ERROR(ret);
876
33.4M
                    RETURN_SUCCESS;
877
33.4M
                }
878
28.2M
                if (state->repeat)
879
5.67k
                    MARK_POP_KEEP(ctx->lastmark);
880
28.2M
                LASTMARK_RESTORE();
881
28.2M
            }
882
12.0M
            if (state->repeat)
883
655k
                MARK_POP_DISCARD(ctx->lastmark);
884
12.0M
            RETURN_FAILURE;
885
886
182M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
182M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
182M
                   pattern[1], pattern[2]));
898
899
182M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.08M
                RETURN_FAILURE; /* cannot match */
901
902
181M
            state->ptr = ptr;
903
904
181M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
181M
            RETURN_ON_ERROR(ret);
906
181M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
181M
            ctx->count = ret;
908
181M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
181M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
121M
                RETURN_FAILURE;
917
918
59.7M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
616k
                ptr == state->end &&
920
62.5k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
62.5k
            {
922
                /* tail is empty.  we're finished */
923
62.5k
                state->ptr = ptr;
924
62.5k
                RETURN_SUCCESS;
925
62.5k
            }
926
927
59.6M
            LASTMARK_SAVE();
928
59.6M
            if (state->repeat)
929
42.4M
                MARK_PUSH(ctx->lastmark);
930
931
59.6M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.06M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.06M
                for (;;) {
936
15.0M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
13.0M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
8.93M
                        ptr--;
939
8.93M
                        ctx->count--;
940
8.93M
                    }
941
6.06M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.97M
                        break;
943
4.09M
                    state->ptr = ptr;
944
4.09M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.09M
                            pattern+pattern[0]);
946
4.09M
                    if (ret) {
947
4.09M
                        if (state->repeat)
948
2.98M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.09M
                        RETURN_ON_ERROR(ret);
950
4.09M
                        RETURN_SUCCESS;
951
4.09M
                    }
952
225
                    if (state->repeat)
953
209
                        MARK_POP_KEEP(ctx->lastmark);
954
225
                    LASTMARK_RESTORE();
955
956
225
                    ptr--;
957
225
                    ctx->count--;
958
225
                }
959
1.97M
                if (state->repeat)
960
777k
                    MARK_POP_DISCARD(ctx->lastmark);
961
53.6M
            } else {
962
                /* general case */
963
62.0M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
60.3M
                    state->ptr = ptr;
965
60.3M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
60.3M
                            pattern+pattern[0]);
967
60.3M
                    if (ret) {
968
51.9M
                        if (state->repeat)
969
37.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
51.9M
                        RETURN_ON_ERROR(ret);
971
51.9M
                        RETURN_SUCCESS;
972
51.9M
                    }
973
8.44M
                    if (state->repeat)
974
1.33M
                        MARK_POP_KEEP(ctx->lastmark);
975
8.44M
                    LASTMARK_RESTORE();
976
977
8.44M
                    ptr--;
978
8.44M
                    ctx->count--;
979
8.44M
                }
980
1.69M
                if (state->repeat)
981
1.18M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.69M
            }
983
3.66M
            RETURN_FAILURE;
984
985
16
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
16
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
16
                   pattern[1], pattern[2]));
997
998
16
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
16
            state->ptr = ptr;
1002
1003
16
            if (pattern[1] == 0)
1004
16
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
16
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
16
            } else {
1028
                /* general case */
1029
16
                LASTMARK_SAVE();
1030
16
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
832
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
832
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
832
                    state->ptr = ptr;
1036
832
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
832
                            pattern+pattern[0]);
1038
832
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
832
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
832
                    LASTMARK_RESTORE();
1047
1048
832
                    state->ptr = ptr;
1049
832
                    ret = SRE(count)(state, pattern+3, 1);
1050
832
                    RETURN_ON_ERROR(ret);
1051
832
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
832
                    if (ret == 0)
1053
16
                        break;
1054
832
                    assert(ret == 1);
1055
816
                    ptr++;
1056
816
                    ctx->count++;
1057
816
                }
1058
16
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
16
            }
1061
16
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
16.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
16.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
16.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
16.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
16.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
16.8M
            ctx->u.rep->count = -1;
1131
16.8M
            ctx->u.rep->pattern = pattern;
1132
16.8M
            ctx->u.rep->prev = state->repeat;
1133
16.8M
            ctx->u.rep->last_ptr = NULL;
1134
16.8M
            state->repeat = ctx->u.rep;
1135
1136
16.8M
            state->ptr = ptr;
1137
16.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
16.8M
            state->repeat = ctx->u.rep->prev;
1139
16.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
16.8M
            if (ret) {
1142
16.7M
                RETURN_ON_ERROR(ret);
1143
16.7M
                RETURN_SUCCESS;
1144
16.7M
            }
1145
106k
            RETURN_FAILURE;
1146
1147
66.2M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
66.2M
            ctx->u.rep = state->repeat;
1155
66.2M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
66.2M
            state->ptr = ptr;
1159
1160
66.2M
            ctx->count = ctx->u.rep->count+1;
1161
1162
66.2M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
66.2M
                   ptr, ctx->count));
1164
1165
66.2M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
16.7k
                ctx->u.rep->count = ctx->count;
1168
16.7k
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
16.7k
                        ctx->u.rep->pattern+3);
1170
16.7k
                if (ret) {
1171
3.61k
                    RETURN_ON_ERROR(ret);
1172
3.61k
                    RETURN_SUCCESS;
1173
3.61k
                }
1174
13.1k
                ctx->u.rep->count = ctx->count-1;
1175
13.1k
                state->ptr = ptr;
1176
13.1k
                RETURN_FAILURE;
1177
13.1k
            }
1178
1179
66.2M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
7.95M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
58.3M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
58.3M
                ctx->u.rep->count = ctx->count;
1185
58.3M
                LASTMARK_SAVE();
1186
58.3M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
58.3M
                LAST_PTR_PUSH();
1189
58.3M
                ctx->u.rep->last_ptr = state->ptr;
1190
58.3M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
58.3M
                        ctx->u.rep->pattern+3);
1192
58.3M
                LAST_PTR_POP();
1193
58.3M
                if (ret) {
1194
49.2M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
49.2M
                    RETURN_ON_ERROR(ret);
1196
49.2M
                    RETURN_SUCCESS;
1197
49.2M
                }
1198
9.03M
                MARK_POP(ctx->lastmark);
1199
9.03M
                LASTMARK_RESTORE();
1200
9.03M
                ctx->u.rep->count = ctx->count-1;
1201
9.03M
                state->ptr = ptr;
1202
9.03M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
16.9M
            state->repeat = ctx->u.rep->prev;
1207
16.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
16.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
16.9M
            RETURN_ON_SUCCESS(ret);
1211
284k
            state->ptr = ptr;
1212
284k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.75M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.75M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.75M
                   ptr, pattern[1]));
1565
3.75M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.75M
            state->ptr = ptr - pattern[1];
1568
3.75M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.75M
            RETURN_ON_FAILURE(ret);
1570
3.52M
            pattern += pattern[0];
1571
3.52M
            DISPATCH;
1572
1573
19.9M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
19.9M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
19.9M
                   ptr, pattern[1]));
1578
19.9M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
19.9M
                state->ptr = ptr - pattern[1];
1580
19.9M
                LASTMARK_SAVE();
1581
19.9M
                if (state->repeat)
1582
19.9M
                    MARK_PUSH(ctx->lastmark);
1583
1584
39.8M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
39.8M
                if (ret) {
1586
2.16M
                    if (state->repeat)
1587
2.16M
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.16M
                    RETURN_ON_ERROR(ret);
1589
2.16M
                    RETURN_FAILURE;
1590
2.16M
                }
1591
17.7M
                if (state->repeat)
1592
17.7M
                    MARK_POP(ctx->lastmark);
1593
17.7M
                LASTMARK_RESTORE();
1594
17.7M
            }
1595
17.7M
            pattern += pattern[0];
1596
17.7M
            DISPATCH;
1597
1598
17.7M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
444M
exit:
1620
444M
    ctx_pos = ctx->last_ctx_pos;
1621
444M
    jump = ctx->jump;
1622
444M
    DATA_POP_DISCARD(ctx);
1623
444M
    if (ctx_pos == -1) {
1624
202M
        state->sigcount = sigcount;
1625
202M
        return ret;
1626
202M
    }
1627
241M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
241M
    switch (jump) {
1630
58.3M
        case JUMP_MAX_UNTIL_2:
1631
58.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
58.3M
            goto jump_max_until_2;
1633
16.9M
        case JUMP_MAX_UNTIL_3:
1634
16.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
16.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
61.6M
        case JUMP_BRANCH:
1643
61.6M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
61.6M
            goto jump_branch;
1645
16.7k
        case JUMP_MAX_UNTIL_1:
1646
16.7k
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
16.7k
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
16.8M
        case JUMP_REPEAT:
1658
16.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
16.8M
            goto jump_repeat;
1660
4.09M
        case JUMP_REPEAT_ONE_1:
1661
4.09M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.09M
            goto jump_repeat_one_1;
1663
60.3M
        case JUMP_REPEAT_ONE_2:
1664
60.3M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
60.3M
            goto jump_repeat_one_2;
1666
832
        case JUMP_MIN_REPEAT_ONE:
1667
832
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
832
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.75M
        case JUMP_ASSERT:
1673
3.75M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.75M
            goto jump_assert;
1675
19.9M
        case JUMP_ASSERT_NOT:
1676
19.9M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
19.9M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
241M
    }
1683
1684
0
    return ret; /* should never get here */
1685
241M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
224M
{
601
224M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
224M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
224M
    Py_ssize_t ret = 0;
604
224M
    int jump;
605
224M
    unsigned int sigcount = state->sigcount;
606
607
224M
    SRE(match_context)* ctx;
608
224M
    SRE(match_context)* nextctx;
609
224M
    INIT_TRACE(state);
610
611
224M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
224M
    DATA_ALLOC(SRE(match_context), ctx);
614
224M
    ctx->last_ctx_pos = -1;
615
224M
    ctx->jump = JUMP_NONE;
616
224M
    ctx->toplevel = toplevel;
617
224M
    ctx_pos = alloc_pos;
618
619
224M
#if USE_COMPUTED_GOTOS
620
224M
#include "sre_targets.h"
621
224M
#endif
622
623
469M
entrance:
624
625
469M
    ;  // Fashion statement.
626
469M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
469M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
15.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
110k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
110k
                   end - ptr, (size_t) pattern[3]));
634
110k
            RETURN_FAILURE;
635
110k
        }
636
15.6M
        pattern += pattern[1] + 1;
637
15.6M
    }
638
639
469M
#if USE_COMPUTED_GOTOS
640
469M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
469M
    {
647
648
469M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
187M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
187M
                   ptr, pattern[0]));
653
187M
            {
654
187M
                int i = pattern[0];
655
187M
                if (i & 1)
656
17.4M
                    state->lastindex = i/2 + 1;
657
187M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
186M
                    int j = state->lastmark + 1;
663
189M
                    while (j < i)
664
3.22M
                        state->mark[j++] = NULL;
665
186M
                    state->lastmark = i;
666
186M
                }
667
187M
                state->mark[i] = ptr;
668
187M
            }
669
187M
            pattern++;
670
187M
            DISPATCH;
671
672
187M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
32.5M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
32.5M
                   ptr, *pattern));
677
32.5M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
16.7M
                RETURN_FAILURE;
679
15.8M
            pattern++;
680
15.8M
            ptr++;
681
15.8M
            DISPATCH;
682
683
15.8M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
68.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
68.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
68.9M
            if (ctx->toplevel &&
698
10.7M
                ((state->match_all && ptr != state->end) ||
699
10.7M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
68.9M
            state->ptr = ptr;
704
68.9M
            RETURN_SUCCESS;
705
706
4.14M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
4.14M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
4.14M
            if (!SRE(at)(state, ptr, *pattern))
711
1.43M
                RETURN_FAILURE;
712
2.70M
            pattern++;
713
2.70M
            DISPATCH;
714
715
2.70M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
179M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
179M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
179M
            if (ptr >= end ||
749
176M
                !SRE(charset)(state, pattern + 1, *ptr))
750
56.1M
                RETURN_FAILURE;
751
123M
            pattern += pattern[0];
752
123M
            ptr++;
753
123M
            DISPATCH;
754
755
123M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.73M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.73M
                   pattern, ptr, pattern[0]));
758
4.73M
            if (ptr >= end ||
759
4.73M
                sre_lower_ascii(*ptr) != *pattern)
760
25.6k
                RETURN_FAILURE;
761
4.70M
            pattern++;
762
4.70M
            ptr++;
763
4.70M
            DISPATCH;
764
765
4.70M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
17.5M
        TARGET(SRE_OP_JUMP):
845
17.5M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
17.5M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
17.5M
                   ptr, pattern[0]));
850
17.5M
            pattern += pattern[0];
851
17.5M
            DISPATCH;
852
853
22.8M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
22.8M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
22.8M
            LASTMARK_SAVE();
858
22.8M
            if (state->repeat)
859
17.1M
                MARK_PUSH(ctx->lastmark);
860
47.5M
            for (; pattern[0]; pattern += pattern[0]) {
861
41.9M
                if (pattern[1] == SRE_OP_LITERAL &&
862
19.3M
                    (ptr >= end ||
863
19.3M
                     (SRE_CODE) *ptr != pattern[2]))
864
11.4M
                    continue;
865
30.5M
                if (pattern[1] == SRE_OP_IN &&
866
15.0M
                    (ptr >= end ||
867
15.0M
                     !SRE(charset)(state, pattern + 3,
868
15.0M
                                   (SRE_CODE) *ptr)))
869
8.53M
                    continue;
870
22.0M
                state->ptr = ptr;
871
22.0M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
22.0M
                if (ret) {
873
17.2M
                    if (state->repeat)
874
13.9M
                        MARK_POP_DISCARD(ctx->lastmark);
875
17.2M
                    RETURN_ON_ERROR(ret);
876
17.2M
                    RETURN_SUCCESS;
877
17.2M
                }
878
4.78M
                if (state->repeat)
879
5.14k
                    MARK_POP_KEEP(ctx->lastmark);
880
4.78M
                LASTMARK_RESTORE();
881
4.78M
            }
882
5.57M
            if (state->repeat)
883
3.21M
                MARK_POP_DISCARD(ctx->lastmark);
884
5.57M
            RETURN_FAILURE;
885
886
200M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
200M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
200M
                   pattern[1], pattern[2]));
898
899
200M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.33M
                RETURN_FAILURE; /* cannot match */
901
902
199M
            state->ptr = ptr;
903
904
199M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
199M
            RETURN_ON_ERROR(ret);
906
199M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
199M
            ctx->count = ret;
908
199M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
199M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
154M
                RETURN_FAILURE;
917
918
45.5M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
4.55M
                ptr == state->end &&
920
19.1k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
19.1k
            {
922
                /* tail is empty.  we're finished */
923
19.1k
                state->ptr = ptr;
924
19.1k
                RETURN_SUCCESS;
925
19.1k
            }
926
927
45.5M
            LASTMARK_SAVE();
928
45.5M
            if (state->repeat)
929
25.4M
                MARK_PUSH(ctx->lastmark);
930
931
45.5M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
4.93M
                ctx->u.chr = pattern[pattern[0]+1];
935
4.93M
                for (;;) {
936
11.8M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
9.62M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
6.95M
                        ptr--;
939
6.95M
                        ctx->count--;
940
6.95M
                    }
941
4.93M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.27M
                        break;
943
2.66M
                    state->ptr = ptr;
944
2.66M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.66M
                            pattern+pattern[0]);
946
2.66M
                    if (ret) {
947
2.66M
                        if (state->repeat)
948
2.62M
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.66M
                        RETURN_ON_ERROR(ret);
950
2.66M
                        RETURN_SUCCESS;
951
2.66M
                    }
952
322
                    if (state->repeat)
953
322
                        MARK_POP_KEEP(ctx->lastmark);
954
322
                    LASTMARK_RESTORE();
955
956
322
                    ptr--;
957
322
                    ctx->count--;
958
322
                }
959
2.27M
                if (state->repeat)
960
2.26M
                    MARK_POP_DISCARD(ctx->lastmark);
961
40.6M
            } else {
962
                /* general case */
963
48.0M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
47.8M
                    state->ptr = ptr;
965
47.8M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
47.8M
                            pattern+pattern[0]);
967
47.8M
                    if (ret) {
968
40.4M
                        if (state->repeat)
969
20.3M
                            MARK_POP_DISCARD(ctx->lastmark);
970
40.4M
                        RETURN_ON_ERROR(ret);
971
40.4M
                        RETURN_SUCCESS;
972
40.4M
                    }
973
7.44M
                    if (state->repeat)
974
232k
                        MARK_POP_KEEP(ctx->lastmark);
975
7.44M
                    LASTMARK_RESTORE();
976
977
7.44M
                    ptr--;
978
7.44M
                    ctx->count--;
979
7.44M
                }
980
197k
                if (state->repeat)
981
158k
                    MARK_POP_DISCARD(ctx->lastmark);
982
197k
            }
983
2.47M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
14.1M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
14.1M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
14.1M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
14.1M
            ctx->u.rep = repeat_pool_malloc(state);
1127
14.1M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
14.1M
            ctx->u.rep->count = -1;
1131
14.1M
            ctx->u.rep->pattern = pattern;
1132
14.1M
            ctx->u.rep->prev = state->repeat;
1133
14.1M
            ctx->u.rep->last_ptr = NULL;
1134
14.1M
            state->repeat = ctx->u.rep;
1135
1136
14.1M
            state->ptr = ptr;
1137
14.1M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
14.1M
            state->repeat = ctx->u.rep->prev;
1139
14.1M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
14.1M
            if (ret) {
1142
14.1M
                RETURN_ON_ERROR(ret);
1143
14.1M
                RETURN_SUCCESS;
1144
14.1M
            }
1145
3.74k
            RETURN_FAILURE;
1146
1147
84.1M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
84.1M
            ctx->u.rep = state->repeat;
1155
84.1M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
84.1M
            state->ptr = ptr;
1159
1160
84.1M
            ctx->count = ctx->u.rep->count+1;
1161
1162
84.1M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
84.1M
                   ptr, ctx->count));
1164
1165
84.1M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
125k
                ctx->u.rep->count = ctx->count;
1168
125k
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
125k
                        ctx->u.rep->pattern+3);
1170
125k
                if (ret) {
1171
123k
                    RETURN_ON_ERROR(ret);
1172
123k
                    RETURN_SUCCESS;
1173
123k
                }
1174
2.72k
                ctx->u.rep->count = ctx->count-1;
1175
2.72k
                state->ptr = ptr;
1176
2.72k
                RETURN_FAILURE;
1177
2.72k
            }
1178
1179
84.0M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
2.59M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
81.4M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
81.4M
                ctx->u.rep->count = ctx->count;
1185
81.4M
                LASTMARK_SAVE();
1186
81.4M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
81.4M
                LAST_PTR_PUSH();
1189
81.4M
                ctx->u.rep->last_ptr = state->ptr;
1190
81.4M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
81.4M
                        ctx->u.rep->pattern+3);
1192
81.4M
                LAST_PTR_POP();
1193
81.4M
                if (ret) {
1194
69.8M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
69.8M
                    RETURN_ON_ERROR(ret);
1196
69.8M
                    RETURN_SUCCESS;
1197
69.8M
                }
1198
11.6M
                MARK_POP(ctx->lastmark);
1199
11.6M
                LASTMARK_RESTORE();
1200
11.6M
                ctx->u.rep->count = ctx->count-1;
1201
11.6M
                state->ptr = ptr;
1202
11.6M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
14.2M
            state->repeat = ctx->u.rep->prev;
1207
14.2M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
14.2M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
14.2M
            RETURN_ON_SUCCESS(ret);
1211
73.1k
            state->ptr = ptr;
1212
73.1k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
9.24M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
9.24M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
9.24M
                   ptr, pattern[1]));
1565
9.24M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
9.24M
            state->ptr = ptr - pattern[1];
1568
9.24M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
9.24M
            RETURN_ON_FAILURE(ret);
1570
5.33M
            pattern += pattern[0];
1571
5.33M
            DISPATCH;
1572
1573
52.7M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
52.7M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
52.7M
                   ptr, pattern[1]));
1578
52.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
52.7M
                state->ptr = ptr - pattern[1];
1580
52.7M
                LASTMARK_SAVE();
1581
52.7M
                if (state->repeat)
1582
52.7M
                    MARK_PUSH(ctx->lastmark);
1583
1584
105M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
105M
                if (ret) {
1586
86.6k
                    if (state->repeat)
1587
86.6k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
86.6k
                    RETURN_ON_ERROR(ret);
1589
86.6k
                    RETURN_FAILURE;
1590
86.6k
                }
1591
52.6M
                if (state->repeat)
1592
52.6M
                    MARK_POP(ctx->lastmark);
1593
52.6M
                LASTMARK_RESTORE();
1594
52.6M
            }
1595
52.6M
            pattern += pattern[0];
1596
52.6M
            DISPATCH;
1597
1598
52.6M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
469M
exit:
1620
469M
    ctx_pos = ctx->last_ctx_pos;
1621
469M
    jump = ctx->jump;
1622
469M
    DATA_POP_DISCARD(ctx);
1623
469M
    if (ctx_pos == -1) {
1624
224M
        state->sigcount = sigcount;
1625
224M
        return ret;
1626
224M
    }
1627
244M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
244M
    switch (jump) {
1630
81.4M
        case JUMP_MAX_UNTIL_2:
1631
81.4M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
81.4M
            goto jump_max_until_2;
1633
14.2M
        case JUMP_MAX_UNTIL_3:
1634
14.2M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
14.2M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
22.0M
        case JUMP_BRANCH:
1643
22.0M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
22.0M
            goto jump_branch;
1645
125k
        case JUMP_MAX_UNTIL_1:
1646
125k
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
125k
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
14.1M
        case JUMP_REPEAT:
1658
14.1M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
14.1M
            goto jump_repeat;
1660
2.66M
        case JUMP_REPEAT_ONE_1:
1661
2.66M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.66M
            goto jump_repeat_one_1;
1663
47.8M
        case JUMP_REPEAT_ONE_2:
1664
47.8M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
47.8M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
9.24M
        case JUMP_ASSERT:
1673
9.24M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
9.24M
            goto jump_assert;
1675
52.7M
        case JUMP_ASSERT_NOT:
1676
52.7M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
52.7M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
244M
    }
1683
1684
0
    return ret; /* should never get here */
1685
244M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
55.6M
{
601
55.6M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
55.6M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
55.6M
    Py_ssize_t ret = 0;
604
55.6M
    int jump;
605
55.6M
    unsigned int sigcount = state->sigcount;
606
607
55.6M
    SRE(match_context)* ctx;
608
55.6M
    SRE(match_context)* nextctx;
609
55.6M
    INIT_TRACE(state);
610
611
55.6M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
55.6M
    DATA_ALLOC(SRE(match_context), ctx);
614
55.6M
    ctx->last_ctx_pos = -1;
615
55.6M
    ctx->jump = JUMP_NONE;
616
55.6M
    ctx->toplevel = toplevel;
617
55.6M
    ctx_pos = alloc_pos;
618
619
55.6M
#if USE_COMPUTED_GOTOS
620
55.6M
#include "sre_targets.h"
621
55.6M
#endif
622
623
253M
entrance:
624
625
253M
    ;  // Fashion statement.
626
253M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
253M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
9.66M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.81k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.81k
                   end - ptr, (size_t) pattern[3]));
634
3.81k
            RETURN_FAILURE;
635
3.81k
        }
636
9.66M
        pattern += pattern[1] + 1;
637
9.66M
    }
638
639
253M
#if USE_COMPUTED_GOTOS
640
253M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
253M
    {
647
648
253M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
71.5M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
71.5M
                   ptr, pattern[0]));
653
71.5M
            {
654
71.5M
                int i = pattern[0];
655
71.5M
                if (i & 1)
656
18.8M
                    state->lastindex = i/2 + 1;
657
71.5M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
69.6M
                    int j = state->lastmark + 1;
663
71.5M
                    while (j < i)
664
1.89M
                        state->mark[j++] = NULL;
665
69.6M
                    state->lastmark = i;
666
69.6M
                }
667
71.5M
                state->mark[i] = ptr;
668
71.5M
            }
669
71.5M
            pattern++;
670
71.5M
            DISPATCH;
671
672
71.5M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
31.3M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
31.3M
                   ptr, *pattern));
677
31.3M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
14.0M
                RETURN_FAILURE;
679
17.2M
            pattern++;
680
17.2M
            ptr++;
681
17.2M
            DISPATCH;
682
683
17.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
25.4M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
25.4M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
25.4M
            if (ctx->toplevel &&
698
7.08M
                ((state->match_all && ptr != state->end) ||
699
7.08M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
25.4M
            state->ptr = ptr;
704
25.4M
            RETURN_SUCCESS;
705
706
1.14M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
1.14M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
1.14M
            if (!SRE(at)(state, ptr, *pattern))
711
1.11M
                RETURN_FAILURE;
712
28.4k
            pattern++;
713
28.4k
            DISPATCH;
714
715
28.4k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
65.2M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
65.2M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
65.2M
            if (ptr >= end ||
749
65.2M
                !SRE(charset)(state, pattern + 1, *ptr))
750
12.9M
                RETURN_FAILURE;
751
52.3M
            pattern += pattern[0];
752
52.3M
            ptr++;
753
52.3M
            DISPATCH;
754
755
52.3M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.35M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.35M
                   pattern, ptr, pattern[0]));
758
2.35M
            if (ptr >= end ||
759
2.35M
                sre_lower_ascii(*ptr) != *pattern)
760
16.9k
                RETURN_FAILURE;
761
2.34M
            pattern++;
762
2.34M
            ptr++;
763
2.34M
            DISPATCH;
764
765
2.34M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
24.6M
        TARGET(SRE_OP_JUMP):
845
24.6M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
24.6M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
24.6M
                   ptr, pattern[0]));
850
24.6M
            pattern += pattern[0];
851
24.6M
            DISPATCH;
852
853
29.9M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
29.9M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
29.9M
            LASTMARK_SAVE();
858
29.9M
            if (state->repeat)
859
26.9M
                MARK_PUSH(ctx->lastmark);
860
62.8M
            for (; pattern[0]; pattern += pattern[0]) {
861
56.9M
                if (pattern[1] == SRE_OP_LITERAL &&
862
28.3M
                    (ptr >= end ||
863
28.3M
                     (SRE_CODE) *ptr != pattern[2]))
864
19.7M
                    continue;
865
37.2M
                if (pattern[1] == SRE_OP_IN &&
866
22.1M
                    (ptr >= end ||
867
22.1M
                     !SRE(charset)(state, pattern + 3,
868
22.1M
                                   (SRE_CODE) *ptr)))
869
12.3M
                    continue;
870
24.8M
                state->ptr = ptr;
871
24.8M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
24.8M
                if (ret) {
873
24.0M
                    if (state->repeat)
874
21.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
24.0M
                    RETURN_ON_ERROR(ret);
876
24.0M
                    RETURN_SUCCESS;
877
24.0M
                }
878
783k
                if (state->repeat)
879
4.52k
                    MARK_POP_KEEP(ctx->lastmark);
880
783k
                LASTMARK_RESTORE();
881
783k
            }
882
5.89M
            if (state->repeat)
883
5.22M
                MARK_POP_DISCARD(ctx->lastmark);
884
5.89M
            RETURN_FAILURE;
885
886
92.6M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
92.6M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
92.6M
                   pattern[1], pattern[2]));
898
899
92.6M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
17.2k
                RETURN_FAILURE; /* cannot match */
901
902
92.6M
            state->ptr = ptr;
903
904
92.6M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
92.6M
            RETURN_ON_ERROR(ret);
906
92.6M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
92.6M
            ctx->count = ret;
908
92.6M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
92.6M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
37.6M
                RETURN_FAILURE;
917
918
54.9M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.09M
                ptr == state->end &&
920
5.64k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
5.64k
            {
922
                /* tail is empty.  we're finished */
923
5.64k
                state->ptr = ptr;
924
5.64k
                RETURN_SUCCESS;
925
5.64k
            }
926
927
54.9M
            LASTMARK_SAVE();
928
54.9M
            if (state->repeat)
929
42.4M
                MARK_PUSH(ctx->lastmark);
930
931
54.9M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
11.6M
                ctx->u.chr = pattern[pattern[0]+1];
935
11.6M
                for (;;) {
936
34.1M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
28.0M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
22.5M
                        ptr--;
939
22.5M
                        ctx->count--;
940
22.5M
                    }
941
11.6M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
6.07M
                        break;
943
5.57M
                    state->ptr = ptr;
944
5.57M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
5.57M
                            pattern+pattern[0]);
946
5.57M
                    if (ret) {
947
5.56M
                        if (state->repeat)
948
5.56M
                            MARK_POP_DISCARD(ctx->lastmark);
949
5.56M
                        RETURN_ON_ERROR(ret);
950
5.56M
                        RETURN_SUCCESS;
951
5.56M
                    }
952
288
                    if (state->repeat)
953
288
                        MARK_POP_KEEP(ctx->lastmark);
954
288
                    LASTMARK_RESTORE();
955
956
288
                    ptr--;
957
288
                    ctx->count--;
958
288
                }
959
6.07M
                if (state->repeat)
960
6.06M
                    MARK_POP_DISCARD(ctx->lastmark);
961
43.3M
            } else {
962
                /* general case */
963
50.2M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
50.1M
                    state->ptr = ptr;
965
50.1M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
50.1M
                            pattern+pattern[0]);
967
50.1M
                    if (ret) {
968
43.2M
                        if (state->repeat)
969
30.7M
                            MARK_POP_DISCARD(ctx->lastmark);
970
43.2M
                        RETURN_ON_ERROR(ret);
971
43.2M
                        RETURN_SUCCESS;
972
43.2M
                    }
973
6.91M
                    if (state->repeat)
974
100k
                        MARK_POP_KEEP(ctx->lastmark);
975
6.91M
                    LASTMARK_RESTORE();
976
977
6.91M
                    ptr--;
978
6.91M
                    ctx->count--;
979
6.91M
                }
980
77.2k
                if (state->repeat)
981
66.2k
                    MARK_POP_DISCARD(ctx->lastmark);
982
77.2k
            }
983
6.14M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
17.1M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
17.1M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
17.1M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
17.1M
            ctx->u.rep = repeat_pool_malloc(state);
1127
17.1M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
17.1M
            ctx->u.rep->count = -1;
1131
17.1M
            ctx->u.rep->pattern = pattern;
1132
17.1M
            ctx->u.rep->prev = state->repeat;
1133
17.1M
            ctx->u.rep->last_ptr = NULL;
1134
17.1M
            state->repeat = ctx->u.rep;
1135
1136
17.1M
            state->ptr = ptr;
1137
17.1M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
17.1M
            state->repeat = ctx->u.rep->prev;
1139
17.1M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
17.1M
            if (ret) {
1142
17.1M
                RETURN_ON_ERROR(ret);
1143
17.1M
                RETURN_SUCCESS;
1144
17.1M
            }
1145
454
            RETURN_FAILURE;
1146
1147
58.9M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
58.9M
            ctx->u.rep = state->repeat;
1155
58.9M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
58.9M
            state->ptr = ptr;
1159
1160
58.9M
            ctx->count = ctx->u.rep->count+1;
1161
1162
58.9M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
58.9M
                   ptr, ctx->count));
1164
1165
58.9M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
2.99k
                ctx->u.rep->count = ctx->count;
1168
2.99k
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
2.99k
                        ctx->u.rep->pattern+3);
1170
2.99k
                if (ret) {
1171
2.94k
                    RETURN_ON_ERROR(ret);
1172
2.94k
                    RETURN_SUCCESS;
1173
2.94k
                }
1174
49
                ctx->u.rep->count = ctx->count-1;
1175
49
                state->ptr = ptr;
1176
49
                RETURN_FAILURE;
1177
49
            }
1178
1179
58.9M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.98M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
53.9M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
53.9M
                ctx->u.rep->count = ctx->count;
1185
53.9M
                LASTMARK_SAVE();
1186
53.9M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
53.9M
                LAST_PTR_PUSH();
1189
53.9M
                ctx->u.rep->last_ptr = state->ptr;
1190
53.9M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
53.9M
                        ctx->u.rep->pattern+3);
1192
53.9M
                LAST_PTR_POP();
1193
53.9M
                if (ret) {
1194
41.7M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
41.7M
                    RETURN_ON_ERROR(ret);
1196
41.7M
                    RETURN_SUCCESS;
1197
41.7M
                }
1198
12.1M
                MARK_POP(ctx->lastmark);
1199
12.1M
                LASTMARK_RESTORE();
1200
12.1M
                ctx->u.rep->count = ctx->count-1;
1201
12.1M
                state->ptr = ptr;
1202
12.1M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
17.1M
            state->repeat = ctx->u.rep->prev;
1207
17.1M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
17.1M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
17.1M
            RETURN_ON_SUCCESS(ret);
1211
34.8k
            state->ptr = ptr;
1212
34.8k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
12.7M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
12.7M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
12.7M
                   ptr, pattern[1]));
1565
12.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
12.7M
            state->ptr = ptr - pattern[1];
1568
12.7M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
12.7M
            RETURN_ON_FAILURE(ret);
1570
11.2M
            pattern += pattern[0];
1571
11.2M
            DISPATCH;
1572
1573
16.4M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
16.4M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
16.4M
                   ptr, pattern[1]));
1578
16.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
16.4M
                state->ptr = ptr - pattern[1];
1580
16.4M
                LASTMARK_SAVE();
1581
16.4M
                if (state->repeat)
1582
16.4M
                    MARK_PUSH(ctx->lastmark);
1583
1584
32.9M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
32.9M
                if (ret) {
1586
9.84k
                    if (state->repeat)
1587
9.84k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
9.84k
                    RETURN_ON_ERROR(ret);
1589
9.84k
                    RETURN_FAILURE;
1590
9.84k
                }
1591
16.4M
                if (state->repeat)
1592
16.4M
                    MARK_POP(ctx->lastmark);
1593
16.4M
                LASTMARK_RESTORE();
1594
16.4M
            }
1595
16.4M
            pattern += pattern[0];
1596
16.4M
            DISPATCH;
1597
1598
16.4M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
253M
exit:
1620
253M
    ctx_pos = ctx->last_ctx_pos;
1621
253M
    jump = ctx->jump;
1622
253M
    DATA_POP_DISCARD(ctx);
1623
253M
    if (ctx_pos == -1) {
1624
55.6M
        state->sigcount = sigcount;
1625
55.6M
        return ret;
1626
55.6M
    }
1627
198M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
198M
    switch (jump) {
1630
53.9M
        case JUMP_MAX_UNTIL_2:
1631
53.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
53.9M
            goto jump_max_until_2;
1633
17.1M
        case JUMP_MAX_UNTIL_3:
1634
17.1M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
17.1M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
24.8M
        case JUMP_BRANCH:
1643
24.8M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
24.8M
            goto jump_branch;
1645
2.99k
        case JUMP_MAX_UNTIL_1:
1646
2.99k
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
2.99k
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
17.1M
        case JUMP_REPEAT:
1658
17.1M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
17.1M
            goto jump_repeat;
1660
5.57M
        case JUMP_REPEAT_ONE_1:
1661
5.57M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
5.57M
            goto jump_repeat_one_1;
1663
50.1M
        case JUMP_REPEAT_ONE_2:
1664
50.1M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
50.1M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
12.7M
        case JUMP_ASSERT:
1673
12.7M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
12.7M
            goto jump_assert;
1675
16.4M
        case JUMP_ASSERT_NOT:
1676
16.4M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
16.4M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
198M
    }
1683
1684
0
    return ret; /* should never get here */
1685
198M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
313M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
117M
{
1694
117M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
117M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
117M
    Py_ssize_t status = 0;
1697
117M
    Py_ssize_t prefix_len = 0;
1698
117M
    Py_ssize_t prefix_skip = 0;
1699
117M
    SRE_CODE* prefix = NULL;
1700
117M
    SRE_CODE* charset = NULL;
1701
117M
    SRE_CODE* overlap = NULL;
1702
117M
    int flags = 0;
1703
117M
    INIT_TRACE(state);
1704
1705
117M
    if (ptr > end)
1706
0
        return 0;
1707
1708
117M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
117M
        flags = pattern[2];
1713
1714
117M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.48M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.48M
                   end - ptr, (size_t) pattern[3]));
1717
5.48M
            return 0;
1718
5.48M
        }
1719
111M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
8.64M
            end -= pattern[3] - 1;
1723
8.64M
            if (end <= ptr)
1724
0
                end = ptr;
1725
8.64M
        }
1726
1727
111M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
8.65M
            prefix_len = pattern[5];
1731
8.65M
            prefix_skip = pattern[6];
1732
8.65M
            prefix = pattern + 7;
1733
8.65M
            overlap = prefix + prefix_len - 1;
1734
103M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
94.8M
            charset = pattern + 5;
1738
1739
111M
        pattern += 1 + pattern[1];
1740
111M
    }
1741
1742
111M
    TRACE(("prefix = %p %zd %zd\n",
1743
111M
           prefix, prefix_len, prefix_skip));
1744
111M
    TRACE(("charset = %p\n", charset));
1745
1746
111M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
7.61M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
5.36M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
5.36M
#endif
1753
5.36M
        end = (SRE_CHAR *)state->end;
1754
5.36M
        state->must_advance = 0;
1755
8.21M
        while (ptr < end) {
1756
115M
            while (*ptr != c) {
1757
108M
                if (++ptr >= end)
1758
434k
                    return 0;
1759
108M
            }
1760
7.65M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
7.65M
            state->start = ptr;
1762
7.65M
            state->ptr = ptr + prefix_skip;
1763
7.65M
            if (flags & SRE_INFO_LITERAL)
1764
7.57k
                return 1; /* we got all of it */
1765
7.64M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
7.64M
            if (status != 0)
1767
7.05M
                return status;
1768
594k
            ++ptr;
1769
594k
            RESET_CAPTURE_GROUP();
1770
594k
        }
1771
121k
        return 0;
1772
5.36M
    }
1773
1774
104M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
1.03M
        Py_ssize_t i = 0;
1778
1779
1.03M
        end = (SRE_CHAR *)state->end;
1780
1.03M
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
2.31M
        for (i = 0; i < prefix_len; i++)
1784
1.54M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
772k
#endif
1787
1.70M
        while (ptr < end) {
1788
1.70M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
20.9M
            while (*ptr++ != c) {
1790
19.2M
                if (ptr >= end)
1791
372
                    return 0;
1792
19.2M
            }
1793
1.70M
            if (ptr >= end)
1794
61
                return 0;
1795
1796
1.70M
            i = 1;
1797
1.70M
            state->must_advance = 0;
1798
1.70M
            do {
1799
1.70M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.62M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.62M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.62M
                    state->start = ptr - (prefix_len - 1);
1808
1.62M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.62M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.62M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.62M
                    if (status != 0)
1813
1.03M
                        return status;
1814
                    /* close but no cigar -- try again */
1815
592k
                    if (++ptr >= end)
1816
68
                        return 0;
1817
592k
                    RESET_CAPTURE_GROUP();
1818
592k
                }
1819
671k
                i = overlap[i];
1820
671k
            } while (i != 0);
1821
1.70M
        }
1822
0
        return 0;
1823
1.03M
    }
1824
1825
103M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
94.8M
        end = (SRE_CHAR *)state->end;
1828
94.8M
        state->must_advance = 0;
1829
97.2M
        for (;;) {
1830
384M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
287M
                ptr++;
1832
97.2M
            if (ptr >= end)
1833
3.44M
                return 0;
1834
93.7M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
93.7M
            state->start = ptr;
1836
93.7M
            state->ptr = ptr;
1837
93.7M
            status = SRE(match)(state, pattern, 0);
1838
93.7M
            if (status != 0)
1839
91.3M
                break;
1840
2.42M
            ptr++;
1841
2.42M
            RESET_CAPTURE_GROUP();
1842
2.42M
        }
1843
94.8M
    } else {
1844
        /* general case */
1845
8.37M
        assert(ptr <= end);
1846
8.37M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
8.37M
        state->start = state->ptr = ptr;
1848
8.37M
        status = SRE(match)(state, pattern, 1);
1849
8.37M
        state->must_advance = 0;
1850
8.37M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.05M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
90
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
4.05M
        {
1854
4.05M
            state->start = state->ptr = ptr = end;
1855
4.05M
            return 0;
1856
4.05M
        }
1857
314M
        while (status == 0 && ptr < end) {
1858
310M
            ptr++;
1859
310M
            RESET_CAPTURE_GROUP();
1860
310M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
310M
            state->start = state->ptr = ptr;
1862
310M
            status = SRE(match)(state, pattern, 0);
1863
310M
        }
1864
4.32M
    }
1865
1866
95.6M
    return status;
1867
103M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
54.6M
{
1694
54.6M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
54.6M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
54.6M
    Py_ssize_t status = 0;
1697
54.6M
    Py_ssize_t prefix_len = 0;
1698
54.6M
    Py_ssize_t prefix_skip = 0;
1699
54.6M
    SRE_CODE* prefix = NULL;
1700
54.6M
    SRE_CODE* charset = NULL;
1701
54.6M
    SRE_CODE* overlap = NULL;
1702
54.6M
    int flags = 0;
1703
54.6M
    INIT_TRACE(state);
1704
1705
54.6M
    if (ptr > end)
1706
0
        return 0;
1707
1708
54.6M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
54.6M
        flags = pattern[2];
1713
1714
54.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.38M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.38M
                   end - ptr, (size_t) pattern[3]));
1717
5.38M
            return 0;
1718
5.38M
        }
1719
49.2M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.26M
            end -= pattern[3] - 1;
1723
2.26M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.26M
        }
1726
1727
49.2M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.26M
            prefix_len = pattern[5];
1731
2.26M
            prefix_skip = pattern[6];
1732
2.26M
            prefix = pattern + 7;
1733
2.26M
            overlap = prefix + prefix_len - 1;
1734
46.9M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
41.2M
            charset = pattern + 5;
1738
1739
49.2M
        pattern += 1 + pattern[1];
1740
49.2M
    }
1741
1742
49.2M
    TRACE(("prefix = %p %zd %zd\n",
1743
49.2M
           prefix, prefix_len, prefix_skip));
1744
49.2M
    TRACE(("charset = %p\n", charset));
1745
1746
49.2M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.16M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.16M
#if SIZEOF_SRE_CHAR < 4
1750
2.16M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.16M
#endif
1753
2.16M
        end = (SRE_CHAR *)state->end;
1754
2.16M
        state->must_advance = 0;
1755
2.54M
        while (ptr < end) {
1756
29.7M
            while (*ptr != c) {
1757
27.6M
                if (++ptr >= end)
1758
353k
                    return 0;
1759
27.6M
            }
1760
2.07M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.07M
            state->start = ptr;
1762
2.07M
            state->ptr = ptr + prefix_skip;
1763
2.07M
            if (flags & SRE_INFO_LITERAL)
1764
612
                return 1; /* we got all of it */
1765
2.07M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.07M
            if (status != 0)
1767
1.69M
                return status;
1768
379k
            ++ptr;
1769
379k
            RESET_CAPTURE_GROUP();
1770
379k
        }
1771
116k
        return 0;
1772
2.16M
    }
1773
1774
47.0M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
100k
        Py_ssize_t i = 0;
1778
1779
100k
        end = (SRE_CHAR *)state->end;
1780
100k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
100k
#if SIZEOF_SRE_CHAR < 4
1783
300k
        for (i = 0; i < prefix_len; i++)
1784
200k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
100k
#endif
1787
176k
        while (ptr < end) {
1788
176k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.21M
            while (*ptr++ != c) {
1790
3.03M
                if (ptr >= end)
1791
74
                    return 0;
1792
3.03M
            }
1793
176k
            if (ptr >= end)
1794
23
                return 0;
1795
1796
175k
            i = 1;
1797
175k
            state->must_advance = 0;
1798
176k
            do {
1799
176k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
166k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
166k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
166k
                    state->start = ptr - (prefix_len - 1);
1808
166k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
166k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
166k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
166k
                    if (status != 0)
1813
99.9k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
66.5k
                    if (++ptr >= end)
1816
28
                        return 0;
1817
66.5k
                    RESET_CAPTURE_GROUP();
1818
66.5k
                }
1819
76.7k
                i = overlap[i];
1820
76.7k
            } while (i != 0);
1821
175k
        }
1822
0
        return 0;
1823
100k
    }
1824
1825
46.9M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
41.2M
        end = (SRE_CHAR *)state->end;
1828
41.2M
        state->must_advance = 0;
1829
42.7M
        for (;;) {
1830
110M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
67.9M
                ptr++;
1832
42.7M
            if (ptr >= end)
1833
2.40M
                return 0;
1834
40.3M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
40.3M
            state->start = ptr;
1836
40.3M
            state->ptr = ptr;
1837
40.3M
            status = SRE(match)(state, pattern, 0);
1838
40.3M
            if (status != 0)
1839
38.8M
                break;
1840
1.54M
            ptr++;
1841
1.54M
            RESET_CAPTURE_GROUP();
1842
1.54M
        }
1843
41.2M
    } else {
1844
        /* general case */
1845
5.76M
        assert(ptr <= end);
1846
5.76M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
5.76M
        state->start = state->ptr = ptr;
1848
5.76M
        status = SRE(match)(state, pattern, 1);
1849
5.76M
        state->must_advance = 0;
1850
5.76M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
2.63M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
26
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
2.63M
        {
1854
2.63M
            state->start = state->ptr = ptr = end;
1855
2.63M
            return 0;
1856
2.63M
        }
1857
121M
        while (status == 0 && ptr < end) {
1858
117M
            ptr++;
1859
117M
            RESET_CAPTURE_GROUP();
1860
117M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
117M
            state->start = state->ptr = ptr;
1862
117M
            status = SRE(match)(state, pattern, 0);
1863
117M
        }
1864
3.12M
    }
1865
1866
41.9M
    return status;
1867
46.9M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
55.5M
{
1694
55.5M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
55.5M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
55.5M
    Py_ssize_t status = 0;
1697
55.5M
    Py_ssize_t prefix_len = 0;
1698
55.5M
    Py_ssize_t prefix_skip = 0;
1699
55.5M
    SRE_CODE* prefix = NULL;
1700
55.5M
    SRE_CODE* charset = NULL;
1701
55.5M
    SRE_CODE* overlap = NULL;
1702
55.5M
    int flags = 0;
1703
55.5M
    INIT_TRACE(state);
1704
1705
55.5M
    if (ptr > end)
1706
0
        return 0;
1707
1708
55.5M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
55.5M
        flags = pattern[2];
1713
1714
55.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
97.8k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
97.8k
                   end - ptr, (size_t) pattern[3]));
1717
97.8k
            return 0;
1718
97.8k
        }
1719
55.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.87M
            end -= pattern[3] - 1;
1723
3.87M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.87M
        }
1726
1727
55.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.87M
            prefix_len = pattern[5];
1731
3.87M
            prefix_skip = pattern[6];
1732
3.87M
            prefix = pattern + 7;
1733
3.87M
            overlap = prefix + prefix_len - 1;
1734
51.5M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
49.1M
            charset = pattern + 5;
1738
1739
55.4M
        pattern += 1 + pattern[1];
1740
55.4M
    }
1741
1742
55.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
55.4M
           prefix, prefix_len, prefix_skip));
1744
55.4M
    TRACE(("charset = %p\n", charset));
1745
1746
55.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.20M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
3.20M
#if SIZEOF_SRE_CHAR < 4
1750
3.20M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
3.20M
#endif
1753
3.20M
        end = (SRE_CHAR *)state->end;
1754
3.20M
        state->must_advance = 0;
1755
3.33M
        while (ptr < end) {
1756
59.5M
            while (*ptr != c) {
1757
56.2M
                if (++ptr >= end)
1758
77.0k
                    return 0;
1759
56.2M
            }
1760
3.25M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.25M
            state->start = ptr;
1762
3.25M
            state->ptr = ptr + prefix_skip;
1763
3.25M
            if (flags & SRE_INFO_LITERAL)
1764
4.53k
                return 1; /* we got all of it */
1765
3.24M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.24M
            if (status != 0)
1767
3.11M
                return status;
1768
130k
            ++ptr;
1769
130k
            RESET_CAPTURE_GROUP();
1770
130k
        }
1771
3.97k
        return 0;
1772
3.20M
    }
1773
1774
52.2M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
672k
        Py_ssize_t i = 0;
1778
1779
672k
        end = (SRE_CHAR *)state->end;
1780
672k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
672k
#if SIZEOF_SRE_CHAR < 4
1783
2.01M
        for (i = 0; i < prefix_len; i++)
1784
1.34M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
672k
#endif
1787
1.01M
        while (ptr < end) {
1788
1.01M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
8.17M
            while (*ptr++ != c) {
1790
7.16M
                if (ptr >= end)
1791
144
                    return 0;
1792
7.16M
            }
1793
1.01M
            if (ptr >= end)
1794
20
                return 0;
1795
1796
1.01M
            i = 1;
1797
1.01M
            state->must_advance = 0;
1798
1.01M
            do {
1799
1.01M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
982k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
982k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
982k
                    state->start = ptr - (prefix_len - 1);
1808
982k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
982k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
982k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
982k
                    if (status != 0)
1813
672k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
309k
                    if (++ptr >= end)
1816
19
                        return 0;
1817
309k
                    RESET_CAPTURE_GROUP();
1818
309k
                }
1819
338k
                i = overlap[i];
1820
338k
            } while (i != 0);
1821
1.01M
        }
1822
0
        return 0;
1823
672k
    }
1824
1825
51.5M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
49.1M
        end = (SRE_CHAR *)state->end;
1828
49.1M
        state->must_advance = 0;
1829
49.4M
        for (;;) {
1830
203M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
154M
                ptr++;
1832
49.4M
            if (ptr >= end)
1833
994k
                return 0;
1834
48.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
48.4M
            state->start = ptr;
1836
48.4M
            state->ptr = ptr;
1837
48.4M
            status = SRE(match)(state, pattern, 0);
1838
48.4M
            if (status != 0)
1839
48.1M
                break;
1840
338k
            ptr++;
1841
338k
            RESET_CAPTURE_GROUP();
1842
338k
        }
1843
49.1M
    } else {
1844
        /* general case */
1845
2.46M
        assert(ptr <= end);
1846
2.46M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.46M
        state->start = state->ptr = ptr;
1848
2.46M
        status = SRE(match)(state, pattern, 1);
1849
2.46M
        state->must_advance = 0;
1850
2.46M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
1.40M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
33
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
1.40M
        {
1854
1.40M
            state->start = state->ptr = ptr = end;
1855
1.40M
            return 0;
1856
1.40M
        }
1857
155M
        while (status == 0 && ptr < end) {
1858
154M
            ptr++;
1859
154M
            RESET_CAPTURE_GROUP();
1860
154M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
154M
            state->start = state->ptr = ptr;
1862
154M
            status = SRE(match)(state, pattern, 0);
1863
154M
        }
1864
1.05M
    }
1865
1866
49.1M
    return status;
1867
51.5M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.14M
{
1694
7.14M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.14M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.14M
    Py_ssize_t status = 0;
1697
7.14M
    Py_ssize_t prefix_len = 0;
1698
7.14M
    Py_ssize_t prefix_skip = 0;
1699
7.14M
    SRE_CODE* prefix = NULL;
1700
7.14M
    SRE_CODE* charset = NULL;
1701
7.14M
    SRE_CODE* overlap = NULL;
1702
7.14M
    int flags = 0;
1703
7.14M
    INIT_TRACE(state);
1704
1705
7.14M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.14M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.14M
        flags = pattern[2];
1713
1714
7.14M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
4.81k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
4.81k
                   end - ptr, (size_t) pattern[3]));
1717
4.81k
            return 0;
1718
4.81k
        }
1719
7.13M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.50M
            end -= pattern[3] - 1;
1723
2.50M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.50M
        }
1726
1727
7.13M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.50M
            prefix_len = pattern[5];
1731
2.50M
            prefix_skip = pattern[6];
1732
2.50M
            prefix = pattern + 7;
1733
2.50M
            overlap = prefix + prefix_len - 1;
1734
4.62M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
4.47M
            charset = pattern + 5;
1738
1739
7.13M
        pattern += 1 + pattern[1];
1740
7.13M
    }
1741
1742
7.13M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.13M
           prefix, prefix_len, prefix_skip));
1744
7.13M
    TRACE(("charset = %p\n", charset));
1745
1746
7.13M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.24M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.24M
        end = (SRE_CHAR *)state->end;
1754
2.24M
        state->must_advance = 0;
1755
2.33M
        while (ptr < end) {
1756
26.6M
            while (*ptr != c) {
1757
24.3M
                if (++ptr >= end)
1758
4.66k
                    return 0;
1759
24.3M
            }
1760
2.32M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.32M
            state->start = ptr;
1762
2.32M
            state->ptr = ptr + prefix_skip;
1763
2.32M
            if (flags & SRE_INFO_LITERAL)
1764
2.42k
                return 1; /* we got all of it */
1765
2.32M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.32M
            if (status != 0)
1767
2.23M
                return status;
1768
84.9k
            ++ptr;
1769
84.9k
            RESET_CAPTURE_GROUP();
1770
84.9k
        }
1771
892
        return 0;
1772
2.24M
    }
1773
1774
4.89M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
263k
        Py_ssize_t i = 0;
1778
1779
263k
        end = (SRE_CHAR *)state->end;
1780
263k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
519k
        while (ptr < end) {
1788
519k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
9.53M
            while (*ptr++ != c) {
1790
9.01M
                if (ptr >= end)
1791
154
                    return 0;
1792
9.01M
            }
1793
519k
            if (ptr >= end)
1794
18
                return 0;
1795
1796
519k
            i = 1;
1797
519k
            state->must_advance = 0;
1798
519k
            do {
1799
519k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
480k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
480k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
480k
                    state->start = ptr - (prefix_len - 1);
1808
480k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
480k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
480k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
480k
                    if (status != 0)
1813
263k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
216k
                    if (++ptr >= end)
1816
21
                        return 0;
1817
216k
                    RESET_CAPTURE_GROUP();
1818
216k
                }
1819
256k
                i = overlap[i];
1820
256k
            } while (i != 0);
1821
519k
        }
1822
0
        return 0;
1823
263k
    }
1824
1825
4.62M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
4.47M
        end = (SRE_CHAR *)state->end;
1828
4.47M
        state->must_advance = 0;
1829
5.01M
        for (;;) {
1830
70.1M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
65.0M
                ptr++;
1832
5.01M
            if (ptr >= end)
1833
48.8k
                return 0;
1834
4.96M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
4.96M
            state->start = ptr;
1836
4.96M
            state->ptr = ptr;
1837
4.96M
            status = SRE(match)(state, pattern, 0);
1838
4.96M
            if (status != 0)
1839
4.42M
                break;
1840
541k
            ptr++;
1841
541k
            RESET_CAPTURE_GROUP();
1842
541k
        }
1843
4.47M
    } else {
1844
        /* general case */
1845
152k
        assert(ptr <= end);
1846
152k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
152k
        state->start = state->ptr = ptr;
1848
152k
        status = SRE(match)(state, pattern, 1);
1849
152k
        state->must_advance = 0;
1850
152k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
13.8k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
31
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
13.8k
        {
1854
13.8k
            state->start = state->ptr = ptr = end;
1855
13.8k
            return 0;
1856
13.8k
        }
1857
38.2M
        while (status == 0 && ptr < end) {
1858
38.0M
            ptr++;
1859
38.0M
            RESET_CAPTURE_GROUP();
1860
38.0M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
38.0M
            state->start = state->ptr = ptr;
1862
38.0M
            status = SRE(match)(state, pattern, 0);
1863
38.0M
        }
1864
138k
    }
1865
1866
4.56M
    return status;
1867
4.62M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/